diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d71c82447..06f22fe98 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,7 +22,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: "1.17" + go-version: "1.18" - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v6 with: @@ -43,7 +43,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: "1.17" + go-version: "1.18" - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v6 with: @@ -65,7 +65,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: "1.17" + go-version: "1.18" - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v6 with: diff --git a/.github/workflows/check-generated.yml b/.github/workflows/check-generated.yml index 685b164e5..5e4f0b047 100644 --- a/.github/workflows/check-generated.yml +++ b/.github/workflows/check-generated.yml @@ -7,7 +7,7 @@ name: Check generated code on: pull_request: branches: - - v0.34.x + - main permissions: contents: read @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: actions/checkout@v3 @@ -26,8 +26,6 @@ jobs: run: | set -euo pipefail - readonly MOCKERY=2.12.3 # N.B. no leading "v" - curl -sL "https://github.com/vektra/mockery/releases/download/v${MOCKERY}/mockery_${MOCKERY}_Linux_x86_64.tar.gz" | tar -C /usr/local/bin -xzf - make mockery 2>/dev/null if ! git diff --stat --exit-code ; then @@ -44,7 +42,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: actions/checkout@v3 with: diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 44681245e..129f4c83d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -49,7 +49,7 @@ jobs: password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Publish to Docker Hub - uses: docker/build-push-action@v3.1.0 + uses: docker/build-push-action@v3.1.1 with: context: . file: ./DOCKER/Dockerfile diff --git a/.github/workflows/docs-toc.yml b/.github/workflows/docs-toc.yml index 99570487a..589be02c6 100644 --- a/.github/workflows/docs-toc.yml +++ b/.github/workflows/docs-toc.yml @@ -1,21 +1,20 @@ -# TODO(thane): Re-enable once we've pulled in the ADRs and RFCs from master. # Verify that important design docs have ToC entries. -#name: Check documentation ToC -#on: -# pull_request: -# push: -# branches: -# - main -# -#jobs: -# check: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - uses: technote-space/get-diff-action@v6 -# with: -# PATTERNS: | -# docs/architecture/** -# docs/rfc/** -# - run: ./docs/presubmit.sh -# if: env.GIT_DIFF +name: Check documentation ToC +on: + pull_request: + push: + branches: + - main + +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: technote-space/get-diff-action@v6 + with: + PATTERNS: | + docs/architecture/** + docs/rfc/** + - run: make check-docs-toc + if: env.GIT_DIFF diff --git a/.github/workflows/e2e-manual.yml b/.github/workflows/e2e-manual.yml index e678cd812..c0eade4bd 100644 --- a/.github/workflows/e2e-manual.yml +++ b/.github/workflows/e2e-manual.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: actions/checkout@v3 diff --git a/.github/workflows/e2e-nightly-34x.yml b/.github/workflows/e2e-nightly-34x.yml index 386982c37..124a401f3 100644 --- a/.github/workflows/e2e-nightly-34x.yml +++ b/.github/workflows/e2e-nightly-34x.yml @@ -15,18 +15,24 @@ jobs: strategy: fail-fast: false matrix: - group: ['00', '01', '02', '03', "04"] + group: ['00', '01'] runs-on: ubuntu-latest timeout-minutes: 60 steps: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: actions/checkout@v3 with: ref: 'v0.34.x' + - name: Capture git repo info + id: git-info + run: | + echo "::set-output name=branch::`git branch --show-current`" + echo "::set-output name=commit::`git rev-parse HEAD`" + - name: Build working-directory: test/e2e # Run make jobs in parallel, since we can't run steps in parallel. @@ -41,18 +47,58 @@ jobs: working-directory: test/e2e run: ./run-multiple.sh networks/nightly/*-group${{ matrix.group }}-*.toml + outputs: + git-branch: ${{ steps.git-info.outputs.branch }} + git-commit: ${{ steps.git-info.outputs.commit }} + e2e-nightly-fail: needs: e2e-nightly-test if: ${{ failure() }} runs-on: ubuntu-latest steps: - name: Notify Slack on failure - uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7 + uses: slackapi/slack-github-action@v1.21.0 env: - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - SLACK_CHANNEL: tendermint-internal - SLACK_USERNAME: Nightly E2E Tests - SLACK_ICON_EMOJI: ':skull:' - SLACK_COLOR: danger - SLACK_MESSAGE: Nightly E2E tests failed on v0.34.x - SLACK_FOOTER: '' + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK + BRANCH: ${{ needs.e2e-nightly-test.outputs.git-branch }} + RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + COMMIT_URL: "${{ github.server_url }}/${{ github.repository }}/commit/${{ needs.e2e-nightly-test.outputs.git-commit }}" + with: + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure." + } + } + ] + } + + e2e-nightly-success: # may turn this off once they seem to pass consistently + needs: e2e-nightly-test + if: ${{ success() }} + runs-on: ubuntu-latest + steps: + - name: Notify Slack on success + uses: slackapi/slack-github-action@v1.21.0 + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK + BRANCH: ${{ needs.e2e-nightly-test.outputs.git-branch }} + with: + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": ":white_check_mark: Nightly E2E tests for `${{ env.BRANCH }}` passed." + } + } + ] + } diff --git a/.github/workflows/e2e-nightly-main.yml b/.github/workflows/e2e-nightly-main.yml deleted file mode 100644 index 467909981..000000000 --- a/.github/workflows/e2e-nightly-main.yml +++ /dev/null @@ -1,73 +0,0 @@ -# Runs randomly generated E2E testnets nightly on main - -# !! Relevant changes to this file should be propagated to the e2e-nightly-x -# files for the supported backport branches, when appropriate, modulo version -# markers. - -name: e2e-nightly-main -on: - schedule: - - cron: '0 2 * * *' - -jobs: - e2e-nightly-test: - # Run parallel jobs for the listed testnet groups (must match the - # ./build/generator -g flag) - strategy: - fail-fast: false - matrix: - group: ['00', '01', '02', '03', "04"] - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/setup-go@v3 - with: - go-version: '1.17' - - - uses: actions/checkout@v3 - - - name: Build - working-directory: test/e2e - # Run make jobs in parallel, since we can't run steps in parallel. - run: make -j2 docker generator runner tests - - - name: Generate testnets - working-directory: test/e2e - # When changing -g, also change the matrix groups above - run: ./build/generator -g 5 -d networks/nightly/ - - - name: Run ${{ matrix.p2p }} p2p testnets - working-directory: test/e2e - run: ./run-multiple.sh networks/nightly/*-group${{ matrix.group }}-*.toml - - e2e-nightly-fail: - needs: e2e-nightly-test - if: ${{ failure() }} - runs-on: ubuntu-latest - steps: - - name: Notify Slack on failure - uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7 - env: - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - SLACK_CHANNEL: tendermint-internal - SLACK_USERNAME: Nightly E2E Tests - SLACK_ICON_EMOJI: ':skull:' - SLACK_COLOR: danger - SLACK_MESSAGE: Nightly E2E tests failed on main - SLACK_FOOTER: '' - - e2e-nightly-success: # may turn this off once they seem to pass consistently - needs: e2e-nightly-test - if: ${{ success() }} - runs-on: ubuntu-latest - steps: - - name: Notify Slack on success - uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7 - env: - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - SLACK_CHANNEL: tendermint-internal - SLACK_USERNAME: Nightly E2E Tests - SLACK_ICON_EMOJI: ':white_check_mark:' - SLACK_COLOR: good - SLACK_MESSAGE: Nightly E2E tests passed on main - SLACK_FOOTER: '' diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index a84c701d6..7db220b43 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -16,7 +16,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v6 with: diff --git a/.github/workflows/fuzz-nightly.yml b/.github/workflows/fuzz-nightly.yml index 49df2dd41..1673e99db 100644 --- a/.github/workflows/fuzz-nightly.yml +++ b/.github/workflows/fuzz-nightly.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: actions/checkout@v3 @@ -75,13 +75,24 @@ jobs: if: ${{ needs.fuzz-nightly-test.outputs.crashers-count != 0 }} runs-on: ubuntu-latest steps: - - name: Notify Slack if any crashers - uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7 + - name: Notify Slack on failure + uses: slackapi/slack-github-action@v1.21.0 env: - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - SLACK_CHANNEL: tendermint-internal - SLACK_USERNAME: Nightly Fuzz Tests - SLACK_ICON_EMOJI: ':firecracker:' - SLACK_COLOR: danger - SLACK_MESSAGE: Crashers found in Nightly Fuzz tests - SLACK_FOOTER: '' + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK + BRANCH: ${{ github.ref_name }} + CRASHERS: ${{ needs.fuzz-nightly-test.outputs.crashers-count }} + RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + with: + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": ":skull: Nightly fuzz tests for `${{ env.BRANCH }}` failed with ${{ env.CRASHERS }} crasher(s). See the <${{ env.RUN_URL }}|run details>." + } + } + ] + } diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index be1dde8b8..bc038daf9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - uses: technote-space/get-diff-action@v6 with: PATTERNS: | @@ -34,7 +34,7 @@ jobs: # Required: the version of golangci-lint is required and # must be specified without patch version: we always use the # latest patch version. - version: v1.45 + version: v1.47.3 args: --timeout 10m github-token: ${{ secrets.github_token }} if: env.GIT_DIFF diff --git a/.github/workflows/proto-lint.yml b/.github/workflows/proto-lint.yml index 6e078c6f7..1ef085606 100644 --- a/.github/workflows/proto-lint.yml +++ b/.github/workflows/proto-lint.yml @@ -15,7 +15,7 @@ jobs: timeout-minutes: 5 steps: - uses: actions/checkout@v3 - - uses: bufbuild/buf-setup-action@v1.6.0 + - uses: bufbuild/buf-setup-action@v1.7.0 - uses: bufbuild/buf-lint-action@v1 with: input: 'proto' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4197d0243..414a92546 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' - name: Build uses: goreleaser/goreleaser-action@v3 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1904c37ed..3ab6dd87a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: "1.17" + go-version: "1.18" - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v6 with: diff --git a/.gitignore b/.gitignore index 29dec09b5..7a1c528b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.bak *.iml +*.log *.swo *.swp */.glide @@ -50,7 +51,6 @@ proto/spec/**/*.pb.go *.aux *.bbl *.blg -*.log *.pdf *.gz *.dvi diff --git a/.golangci.yml b/.golangci.yml index 46aa61d6c..7fa9f28aa 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -21,7 +21,7 @@ linters: - nolintlint - prealloc - staticcheck - - structcheck + # - structcheck // to be fixed by golangci-lint - stylecheck - typecheck - unconvert diff --git a/.markdownlint.yml b/.markdownlint.yml index 3d950fbb8..4cd68eaf8 100644 --- a/.markdownlint.yml +++ b/.markdownlint.yml @@ -8,8 +8,10 @@ default: true MD001: false MD007: {indent: 4} MD013: false -MD024: { siblings_only: true } +MD024: {siblings_only: true} MD025: false -MD033: { no-inline-html: false } -no-hard-tabs: false -whitespace: false +MD033: false +MD036: false +MD010: false +MD012: false +MD028: false diff --git a/CHANGELOG.md b/CHANGELOG.md index e37ab67cc..883576bb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1283,7 +1283,7 @@ This release contains a minor enhancement to the ABCI and some breaking changes - [p2p] [\#3338](https://github.com/tendermint/tendermint/issues/3338) Prevent "sent next PEX request too soon" errors by not calling ensurePeers outside of ensurePeersRoutine -- [behaviour] [\3772](https://github.com/tendermint/tendermint/pull/3772) Return correct reason in MessageOutOfOrder (@jim380) +- [behavior] [\3772](https://github.com/tendermint/tendermint/pull/3772) Return correct reason in MessageOutOfOrder (@jim380) - [config] [\#3723](https://github.com/tendermint/tendermint/issues/3723) Add consensus_params to testnet config generation; document time_iota_ms (@ashleyvega) @@ -1603,7 +1603,7 @@ It brings back `NetAddress()` to `NodeInfo` and uses it instead of `SocketAddr` Additionally, it improves response time on the `/validators` or `/status` RPC endpoints. As a side-effect it makes these RPC endpoint more difficult to DoS and fixes a performance degradation in `ExecCommitBlock`. Also, it contains an [ADR](https://github.com/tendermint/tendermint/pull/3539) that proposes decoupling the -responsibility for peer behaviour from the `p2p.Switch` (by @brapse). +responsibility for peer behavior from the `p2p.Switch` (by @brapse). Special thanks to external contributors on this release: @brapse, @guagualvcha, @mydring @@ -2271,8 +2271,8 @@ Special thanks to external contributors on this release: - [blockchain] [\#2731](https://github.com/tendermint/tendermint/issues/2731) Retry both blocks if either is bad to avoid getting stuck during fast sync (@goolAdapter) - [consensus] [\#2893](https://github.com/tendermint/tendermint/issues/2893) Use genDoc.Validators instead of state.NextValidators on replay when appHeight==0 (@james-ray) - [log] [\#2868](https://github.com/tendermint/tendermint/issues/2868) Fix `module=main` setting overriding all others - - NOTE: this changes the default logging behaviour to be much less verbose. - Set `log_level="info"` to restore the previous behaviour. + - NOTE: this changes the default logging behavior to be much less verbose. + Set `log_level="info"` to restore the previous behavior. - [rpc] [\#2808](https://github.com/tendermint/tendermint/issues/2808) Fix `accum` field in `/validators` by calling `IncrementAccum` if necessary - [rpc] [\#2811](https://github.com/tendermint/tendermint/issues/2811) Allow integer IDs in JSON-RPC requests (@tomtau) - [txindex/kv] [\#2759](https://github.com/tendermint/tendermint/issues/2759) Fix tx.height range queries @@ -2403,7 +2403,7 @@ increasing attention to backwards compatibility. Thanks for bearing with us! * [state] [\#2644](https://github.com/tendermint/tendermint/issues/2644) Add Version field to State, breaking the format of State as encoded on disk. * [rpc] [\#2298](https://github.com/tendermint/tendermint/issues/2298) `/abci_query` takes `prove` argument instead of `trusted` and switches the default - behaviour to `prove=false` + behavior to `prove=false` * [rpc] [\#2654](https://github.com/tendermint/tendermint/issues/2654) Remove all `node_info.other.*_version` fields in `/status` and `/net_info` * [rpc] [\#2636](https://github.com/tendermint/tendermint/issues/2636) Remove @@ -2548,7 +2548,7 @@ FEATURES: - [libs] [\#2286](https://github.com/tendermint/tendermint/issues/2286) Panic if `autofile` or `db/fsdb` permissions change from 0600. IMPROVEMENTS: -- [libs/db] [\#2371](https://github.com/tendermint/tendermint/issues/2371) Output error instead of panic when the given `db_backend` is not initialised (@bradyjoestar) +- [libs/db] [\#2371](https://github.com/tendermint/tendermint/issues/2371) Output error instead of panic when the given `db_backend` is not initialized (@bradyjoestar) - [mempool] [\#2399](https://github.com/tendermint/tendermint/issues/2399) Make mempool cache a proper LRU (@bradyjoestar) - [p2p] [\#2126](https://github.com/tendermint/tendermint/issues/2126) Introduce PeerTransport interface to improve isolation of concerns - [libs/common] [\#2326](https://github.com/tendermint/tendermint/issues/2326) Service returns ErrNotStarted diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 288ad9d28..6db63b985 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -1,6 +1,6 @@ # Unreleased Changes -## v0.34.21 +## v0.37.0 Special thanks to external contributors on this release: @@ -12,18 +12,25 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi - Apps + - [abci/counter] \#6684 Delete counter example app + - [txResults] \#9175 Remove `gas_used` & `gas_wanted` from being merkelized in the lastresulthash in the header + - [abci] \#5783 Make length delimiter encoding consistent (`uint64`) between ABCI and P2P wire-level protocols + - P2P Protocol - Go API + - [all] \#9144 Change spelling from British English to American (@cmwaters) + - Rename "Subscription.Cancelled()" to "Subscription.Canceled()" in libs/pubsub + - Blockchain Protocol ### FEATURES -- [#9083] backport cli command to reindex missed events (@cmwaters) - ### IMPROVEMENTS - [abci] \#5706 Added `AbciVersion` to `RequestInfo` allowing applications to check ABCI version when connecting to Tendermint. (@marbar3778) ### BUG FIXES + +[docker] \#9073 enable cross platform build using docker buildx diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 8c5a99203..21cf736ba 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,59 +1,109 @@ # The Tendermint Code of Conduct -This code of conduct applies to all projects run by the Tendermint/COSMOS team and hence to tendermint. - +This code of conduct applies to all projects run by the Tendermint/COSMOS team +and hence to Tendermint. ---- - # Conduct ## Contact: conduct@tendermint.com -* We are committed to providing a friendly, safe and welcoming environment for all, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, nationality, or other similar characteristic. +* We are committed to providing a friendly, safe and welcoming environment for + all, regardless of level of experience, gender, gender identity and + expression, sexual orientation, disability, personal appearance, body size, + race, ethnicity, age, religion, nationality, or other similar characteristic. -* On Slack, please avoid using overtly sexual nicknames or other nicknames that might detract from a friendly, safe and welcoming environment for all. +* On Slack, please avoid using overtly sexual nicknames or other nicknames that + might detract from a friendly, safe and welcoming environment for all. * Please be kind and courteous. There’s no need to be mean or rude. -* Respect that people have differences of opinion and that every design or implementation choice carries a trade-off and numerous costs. There is seldom a right answer. +* Respect that people have differences of opinion and that every design or + implementation choice carries a trade-off and numerous costs. There is seldom + a right answer. -* Please keep unstructured critique to a minimum. If you have solid ideas you want to experiment with, make a fork and see how it works. +* Please keep unstructured critique to a minimum. If you have solid ideas you + want to experiment with, make a fork and see how it works. -* We will exclude you from interaction if you insult, demean or harass anyone. That is not welcome behaviour. We interpret the term “harassment” as including the definition in the [Citizen Code of Conduct](http://citizencodeofconduct.org/); if you have any lack of clarity about what might be included in that concept, please read their definition. In particular, we don’t tolerate behavior that excludes people in socially marginalized groups. +* We will exclude you from interaction if you insult, demean or harass anyone. + That is not welcome behavior. We interpret the term “harassment” as including + the definition in the [Citizen Code of Conduct][ccoc]; if you have any lack of + clarity about what might be included in that concept, please read their + definition. In particular, we don’t tolerate behavior that excludes people in + socially marginalized groups. -* Private harassment is also unacceptable. No matter who you are, if you feel you have been or are being harassed or made uncomfortable by a community member, please contact one of the channel admins or the person mentioned above immediately. Whether you’re a regular contributor or a newcomer, we care about making this community a safe place for you and we’ve got your back. - -* Likewise any spamming, trolling, flaming, baiting or other attention-stealing behaviour is not welcome. +* Private harassment is also unacceptable. No matter who you are, if you feel + you have been or are being harassed or made uncomfortable by a community + member, please contact one of the channel admins or the person mentioned above + immediately. Whether you’re a regular contributor or a newcomer, we care about + making this community a safe place for you and we’ve got your back. +* Likewise any spamming, trolling, flaming, baiting or other attention-stealing + behavior is not welcome. ---- - # Moderation -These are the policies for upholding our community’s standards of conduct. If you feel that a thread needs moderation, please contact the above mentioned person. +These are the policies for upholding our community’s standards of conduct. If +you feel that a thread needs moderation, please contact the above mentioned +person. -1. Remarks that violate the Tendermint/COSMOS standards of conduct, including hateful, hurtful, oppressive, or exclusionary remarks, are not allowed. (Cursing is allowed, but never targeting another user, and never in a hateful manner.) +1. Remarks that violate the Tendermint/COSMOS standards of conduct, including + hateful, hurtful, oppressive, or exclusionary remarks, are not allowed. + (Cursing is allowed, but never targeting another user, and never in a hateful + manner.) -2. Remarks that moderators find inappropriate, whether listed in the code of conduct or not, are also not allowed. +2. Remarks that moderators find inappropriate, whether listed in the code of + conduct or not, are also not allowed. 3. Moderators will first respond to such remarks with a warning. -4. If the warning is unheeded, the user will be “kicked,” i.e., kicked out of the communication channel to cool off. +4. If the warning is unheeded, the user will be “kicked,” i.e., kicked out of + the communication channel to cool off. -5. If the user comes back and continues to make trouble, they will be banned, i.e., indefinitely excluded. +5. If the user comes back and continues to make trouble, they will be banned, + i.e., indefinitely excluded. -6. Moderators may choose at their discretion to un-ban the user if it was a first offense and they offer the offended party a genuine apology. +6. Moderators may choose at their discretion to un-ban the user if it was a + first offense and they offer the offended party a genuine apology. -7. If a moderator bans someone and you think it was unjustified, please take it up with that moderator, or with a different moderator, in private. Complaints about bans in-channel are not allowed. +7. If a moderator bans someone and you think it was unjustified, please take it + up with that moderator, or with a different moderator, in private. Complaints + about bans in-channel are not allowed. -8. Moderators are held to a higher standard than other community members. If a moderator creates an inappropriate situation, they should expect less leeway than others. +8. Moderators are held to a higher standard than other community members. If a + moderator creates an inappropriate situation, they should expect less leeway + than others. -In the Tendermint/COSMOS community we strive to go the extra step to look out for each other. Don’t just aim to be technically unimpeachable, try to be your best self. In particular, avoid flirting with offensive or sensitive issues, particularly if they’re off-topic; this all too often leads to unnecessary fights, hurt feelings, and damaged trust; worse, it can drive people away from the community entirely. +In the Tendermint/COSMOS community we strive to go the extra step to look out +for each other. Don’t just aim to be technically unimpeachable, try to be your +best self. In particular, avoid flirting with offensive or sensitive issues, +particularly if they’re off-topic; this all too often leads to unnecessary +fights, hurt feelings, and damaged trust; worse, it can drive people away +from the community entirely. -And if someone takes issue with something you said or did, resist the urge to be defensive. Just stop doing what it was they complained about and apologize. Even if you feel you were misinterpreted or unfairly accused, chances are good there was something you could’ve communicated better — remember that it’s your responsibility to make your fellow Cosmonauts comfortable. Everyone wants to get along and we are all here first and foremost because we want to talk about cool technology. You will find that people will be eager to assume good intent and forgive as long as you earn their trust. +And if someone takes issue with something you said or did, resist the urge to be +defensive. Just stop doing what it was they complained about and apologize. Even +if you feel you were misinterpreted or unfairly accused, chances are good there +was something you could’ve communicated better — remember that it’s your +responsibility to make your fellow Cosmonauts comfortable. Everyone wants to +get along and we are all here first and foremost because we want to talk +about cool technology. You will find that people will be eager to assume +good intent and forgive as long as you earn their trust. -The enforcement policies listed above apply to all official Tendermint/COSMOS venues.For other projects adopting the Tendermint/COSMOS Code of Conduct, please contact the maintainers of those projects for enforcement. If you wish to use this code of conduct for your own project, consider explicitly mentioning your moderation policy or making a copy with your own moderation policy so as to avoid confusion. +The enforcement policies listed above apply to all official Tendermint/COSMOS +venues. For other projects adopting the Tendermint/COSMOS Code of Conduct, +please contact the maintainers of those projects for enforcement. If you wish to +use this code of conduct for your own project, consider explicitly mentioning +your moderation policy or making a copy with your own moderation policy so as to +avoid confusion. -*Adapted from the [Node.js Policy on Trolling](http://blog.izs.me/post/30036893703/policy-on-trolling), the [Contributor Covenant v1.3.0](http://contributor-covenant.org/version/1/3/0/) and the [Rust Code of Conduct](https://www.rust-lang.org/en-US/conduct.html). +\*Adapted from the [Node.js Policy on Trolling][node-trolling-policy], the +[Contributor Covenant v1.3.0][ccov] and the [Rust Code of Conduct][rust-coc]. + +[ccoc]: https://github.com/stumpsyn/policies/blob/master/citizen_code_of_conduct.md +[node-trolling-policy]: http://blog.izs.me/post/30036893703/policy-on-trolling +[ccov]: http://contributor-covenant.org/version/1/3/0/ +[rust-coc]: https://www.rust-lang.org/en-US/conduct.html diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 715019e90..bfa56bea6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,7 +26,8 @@ will indicate their support with a heartfelt emoji. If the issue would benefit from thorough discussion, maintainers may request that you create a [Request For -Comment](https://github.com/tendermint/spec/tree/master/rfc). Discussion +Comment](https://github.com/tendermint/spec/tree/master/rfc) +in the Tendermint spec repo. Discussion at the RFC stage will build collective understanding of the dimensions of the problems and help structure conversations around trade-offs. @@ -104,24 +105,47 @@ specify exactly the dependency you want to update, eg. ## Protobuf -We use [Protocol Buffers](https://developers.google.com/protocol-buffers) along with [gogoproto](https://github.com/gogo/protobuf) to generate code for use across Tendermint Core. +We use [Protocol Buffers](https://developers.google.com/protocol-buffers) along +with [`gogoproto`](https://github.com/gogo/protobuf) to generate code for use +across Tendermint Core. -For linting and checking breaking changes, we use [buf](https://buf.build/). If you would like to run linting and check if the changes you have made are breaking then you will need to have docker running locally. Then the linting cmd will be `make proto-lint` and the breaking changes check will be `make proto-check-breaking`. +To generate proto stubs, lint, and check protos for breaking changes, you will +need to install [buf](https://buf.build/) and `gogoproto`. Then, from the root +of the repository, run: -We use [Docker](https://www.docker.com/) to generate the protobuf stubs. To generate the stubs yourself, make sure docker is running then run `make proto-gen`. +```bash +# Lint all of the .proto files in proto/tendermint +make proto-lint -## Vagrant +# Check if any of your local changes (prior to committing to the Git repository) +# are breaking +make proto-check-breaking -If you are a [Vagrant](https://www.vagrantup.com/) user, you can get started -hacking Tendermint with the commands below. +# Generate Go code from the .proto files in proto/tendermint +make proto-gen +``` -NOTE: In case you installed Vagrant in 2017, you might need to run -`vagrant box update` to upgrade to the latest `ubuntu/xenial64`. +To automatically format `.proto` files, you will need +[`clang-format`](https://clang.llvm.org/docs/ClangFormat.html) installed. Once +installed, you can run: -```sh -vagrant up -vagrant ssh -make test +```bash +make proto-format +``` + +### Visual Studio Code + +If you are a VS Code user, you may want to add the following to your `.vscode/settings.json`: + +```json +{ + "protoc": { + "options": [ + "--proto_path=${workspaceRoot}/proto", + "--proto_path=${workspaceRoot}/third_party/proto" + ] + } +} ``` ## Changelog @@ -225,89 +249,6 @@ Fixes #nnnn Each PR should have one commit once it lands on `master`; this can be accomplished by using the "squash and merge" button on Github. Be sure to edit your commit message, though! -### Release Procedure - -#### Major Release - -1. Start on `master` -2. Run integration tests (see `test_integrations` in Makefile) -3. Prepare release in a pull request against `master` (to be squash merged): - - Copy `CHANGELOG_PENDING.md` to top of `CHANGELOG.md`; if this release - had release candidates, squash all the RC updates into one - - Run `python ./scripts/linkify_changelog.py CHANGELOG.md` to add links for - all issues - - run `bash ./scripts/authors.sh` to get a list of authors since the latest - release, and add the github aliases of external contributors to the top of - the changelog. To lookup an alias from an email, try `bash ./scripts/authors.sh ` - - Reset the `CHANGELOG_PENDING.md` - - Bump TMVersionDefault version in `version.go` - - Bump P2P and block protocol versions in `version.go`, if necessary - - Bump ABCI protocol version in `version.go`, if necessary - - Make sure all significant breaking changes are covered in `UPGRADING.md` - - Add any release notes you would like to be added to the body of the release to `release_notes.md`. -4. Push a tag with prepared release details (this will trigger the release `vX.X.0`) - - `git tag -a vX.X.x -m 'Release vX.X.x'` - - `git push origin vX.X.x` -5. Update the changelog.md file on master with the releases changelog. -6. Delete any RC branches and tags for this release (if applicable) - -#### Minor Release - -Minor releases are done differently from major releases: They are built off of long-lived release candidate branches, rather than from master. - -1. Checkout the long-lived release candidate branch: `git checkout rcX/vX.X.X` -2. Run integration tests: `make test_integrations` -3. Prepare the release: - - copy `CHANGELOG_PENDING.md` to top of `CHANGELOG.md` - - run `python ./scripts/linkify_changelog.py CHANGELOG.md` to add links for all issues - - run `bash ./scripts/authors.sh` to get a list of authors since the latest release, and add the GitHub aliases of external contributors to the top of the CHANGELOG. To lookup an alias from an email, try `bash ./scripts/authors.sh ` - - reset the `CHANGELOG_PENDING.md` - - bump P2P and block protocol versions in `version.go`, if necessary - - bump ABCI protocol version in `version.go`, if necessary - - make sure all significant breaking changes are covered in `UPGRADING.md` - - Add any release notes you would like to be added to the body of the release to `release_notes.md`. -4. Create a release branch `release/vX.X.x` off the release candidate branch: - - `git checkout -b release/vX.X.x` - - `git push -u origin release/vX.X.x` - - Note that all branches prefixed with `release` are protected once pushed. You will need admin help to make any changes to the branch. -5. Once the release branch has been approved, make sure to pull it locally, then push a tag. - - `git tag -a vX.X.x -m 'Release vX.X.x'` - - `git push origin vX.X.x` -6. Create a pull request back to master with the CHANGELOG & version changes from the latest release. - - Remove all `R:minor` labels from the pull requests that were included in the release. - - Do not merge the release branch into master. -7. Delete the former long lived release candidate branch once the release has been made. -8. Create a new release candidate branch to be used for the next release. - -#### Backport Release - -1. start from the existing release branch you want to backport changes to (e.g. v0.30) - Branch to a release/vX.X.X branch locally (e.g. release/v0.30.7) -2. Cherry pick the commit(s) that contain the changes you want to backport (usually these commits are from squash-merged PRs which were already reviewed) -3. Follow steps 2 and 3 from [Major Release](#major-release) -4. Push changes to release/vX.X.X branch -5. Open a PR against the existing vX.X branch - -#### Release Candidates - -Before creating an official release, especially a major release, we may want to create a -release candidate (RC) for our friends and partners to test out. We use git tags to -create RCs, and we build them off of RC branches. RC branches typically have names formatted -like `RCX/vX.X.X` (or, concretely, `RC0/v0.34.0`), while the tags themselves follow -the "standard" release naming conventions, with `-rcX` at the end (`vX.X.X-rcX`). - -(Note that branches and tags _cannot_ have the same names, so it's important that these branches -have distinct names from the tags/release names.) - -1. Start from the RC branch (e.g. `RC0/v0.34.0`). -2. Create the new tag, specifying a name and a tag "message": - `git tag -a v0.34.0-rc0 -m "Release Candidate v0.34.0-rc0` -3. Push the tag back up to origin: - `git push origin v0.34.0-rc4` - Now the tag should be available on the repo's releases page. -4. Create a new release candidate branch for any possible updates to the RC: - `git checkout -b RC1/v0.34.0; git push origin RC1/v0.34.0` - ## Testing ### Unit tests @@ -340,15 +281,6 @@ cd test/e2e && \ ./build/runner -f networks/ci.toml ``` -### Maverick - -**If you're changing the code in `consensus` package, please make sure to -replicate all the changes in `./test/maverick/consensus`**. Maverick is a -byzantine node used to assert that the validator gets punished for malicious -behavior. - -See [README](./test/maverick/README.md) for details. - ### Model-based tests (ADVANCED) *NOTE: if you're just submitting your first PR, you won't need to touch these @@ -393,8 +325,10 @@ information. ### RPC Testing -If you contribute to the RPC endpoints it's important to document your changes in the [Openapi file](./rpc/openapi/openapi.yaml) -To test your changes you should install `nodejs` and run: +**If you contribute to the RPC endpoints it's important to document your +changes in the [Openapi file](./rpc/openapi/openapi.yaml)**. + +To test your changes you must install `nodejs` and run: ```bash npm i -g dredd diff --git a/DOCKER/Dockerfile b/DOCKER/Dockerfile index 1d8878bff..77d2ad991 100644 --- a/DOCKER/Dockerfile +++ b/DOCKER/Dockerfile @@ -1,11 +1,11 @@ # stage 1 Generate Tendermint Binary -FROM golang:1.15-alpine as builder +FROM --platform=$BUILDPLATFORM golang:1.18-alpine as builder RUN apk update && \ apk upgrade && \ apk --no-cache add make COPY / /tendermint WORKDIR /tendermint -RUN make build-linux +RUN TARGETPLATFORM=$TARGETPLATFORM make build-linux # stage 2 FROM golang:1.15-alpine diff --git a/Makefile b/Makefile index a44aa6b3c..ab81a6e66 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,67 @@ endif # allow users to pass additional flags via the conventional LDFLAGS variable LD_FLAGS += $(LDFLAGS) +# Process Docker environment varible TARGETPLATFORM +# in order to build binary with correspondent ARCH +# by default will always build for linux/amd64 +TARGETPLATFORM ?= +GOOS ?= linux +GOARCH ?= amd64 +GOARM ?= + +ifeq (linux/arm,$(findstring linux/arm,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=arm + GOARM=7 +endif + +ifeq (linux/arm/v6,$(findstring linux/arm/v6,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=arm + GOARM=6 +endif + +ifeq (linux/arm64,$(findstring linux/arm64,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=arm64 + GOARM=7 +endif + +ifeq (linux/386,$(findstring linux/386,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=386 +endif + +ifeq (linux/amd64,$(findstring linux/amd64,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=amd64 +endif + +ifeq (linux/mips,$(findstring linux/mips,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=mips +endif + +ifeq (linux/mipsle,$(findstring linux/mipsle,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=mipsle +endif + +ifeq (linux/mips64,$(findstring linux/mips64,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=mips64 +endif + +ifeq (linux/mips64le,$(findstring linux/mips64le,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=mips64le +endif + +ifeq (linux/riscv64,$(findstring linux/riscv64,$(TARGETPLATFORM))) + GOOS=linux + GOARCH=riscv64 +endif + all: check build test install .PHONY: all @@ -70,14 +131,28 @@ install: CGO_ENABLED=$(CGO_ENABLED) go install $(BUILD_FLAGS) -tags $(BUILD_TAGS) ./cmd/tendermint .PHONY: install +############################################################################### +### Metrics ### +############################################################################### + +metrics: testdata-metrics + go generate -run="scripts/metricsgen" ./... +.PHONY: metrics + +# By convention, the go tool ignores subdirectories of directories named +# 'testdata'. This command invokes the generate command on the folder directly +# to avoid this. +testdata-metrics: + ls ./scripts/metricsgen/testdata | xargs -I{} go generate -v -run="scripts/metricsgen" ./scripts/metricsgen/testdata/{} +.PHONY: testdata-metrics ############################################################################### ### Mocks ### ############################################################################### -mockery: +mockery: go generate -run="./scripts/mockery_generate.sh" ./... -.PHONY: mockery +.PHONY: mockery ############################################################################### ### Protobuf ### @@ -228,6 +303,11 @@ sync-docs: aws cloudfront create-invalidation --distribution-id ${CF_DISTRIBUTION_ID} --profile terraform --path "/*" ; .PHONY: sync-docs +# Verify that important design docs have ToC entries. +check-docs-toc: + @./docs/presubmit.sh +.PHONY: check-docs-toc + ############################################################################### ### Docker image ### ############################################################################### @@ -244,7 +324,7 @@ build-docker: build-linux # Build linux binary on other platforms build-linux: - GOOS=linux GOARCH=amd64 $(MAKE) build + GOOS=$(GOOS) GOARCH=$(GOARCH) GOARM=$(GOARM) $(MAKE) build .PHONY: build-linux build-docker-localnode: diff --git a/README.md b/README.md index ac7241c3b..7e1861a43 100644 --- a/README.md +++ b/README.md @@ -2,160 +2,178 @@ ![banner](docs/tendermint-core-image.jpg) -[Byzantine-Fault Tolerant](https://en.wikipedia.org/wiki/Byzantine_fault_tolerance) -[State Machines](https://en.wikipedia.org/wiki/State_machine_replication). -Or [Blockchain](), for short. +[Byzantine-Fault Tolerant][bft] [State Machine Replication][smr]. Or +[Blockchain], for short. -[![version](https://img.shields.io/github/tag/tendermint/tendermint.svg)](https://github.com/tendermint/tendermint/releases/latest) -[![API Reference](https://camo.githubusercontent.com/915b7be44ada53c290eb157634330494ebe3e30a/68747470733a2f2f676f646f632e6f72672f6769746875622e636f6d2f676f6c616e672f6764646f3f7374617475732e737667)](https://pkg.go.dev/github.com/tendermint/tendermint) -[![Go version](https://img.shields.io/badge/go-1.15-blue.svg)](https://github.com/moovweb/gvm) -[![Discord chat](https://img.shields.io/discord/669268347736686612.svg)](https://discord.gg/AzefAFd) -[![license](https://img.shields.io/github/license/tendermint/tendermint.svg)](https://github.com/tendermint/tendermint/blob/master/LICENSE) -[![tendermint/tendermint](https://tokei.rs/b1/github/tendermint/tendermint?category=lines)](https://github.com/tendermint/tendermint) -[![Sourcegraph](https://sourcegraph.com/github.com/tendermint/tendermint/-/badge.svg)](https://sourcegraph.com/github.com/tendermint/tendermint?badge) +[![Version][version-badge]][version-url] +[![API Reference][api-badge]][api-url] +[![Go version][go-badge]][go-url] +[![Discord chat][discord-badge]][discord-url] +[![License][license-badge]][license-url] +[![tendermint/tendermint][loc-badge]][loc-url] +[![Sourcegraph][sg-badge]][sg-url] -| Branch | Tests | Coverage | Linting | -| ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------- | -| master | [![CircleCI](https://circleci.com/gh/tendermint/tendermint/tree/master.svg?style=shield)](https://circleci.com/gh/tendermint/tendermint/tree/master)
![Tests](https://github.com/tendermint/tendermint/workflows/Tests/badge.svg?branch=master) | [![codecov](https://codecov.io/gh/tendermint/tendermint/branch/master/graph/badge.svg)](https://codecov.io/gh/tendermint/tendermint) | ![Lint](https://github.com/tendermint/tendermint/workflows/Lint/badge.svg) | +| Branch | Tests | Coverage | Linting | +|--------|-----------------------|------------------------------------------|---------------------| +| main | ![Tests][tests-badge] | [![codecov][codecov-badge]][codecov-url] | ![Lint][lint-badge] | -Tendermint Core is Byzantine Fault Tolerant (BFT) middleware that takes a state transition machine - written in any programming language - -and securely replicates it on many machines. +Tendermint Core is a Byzantine Fault Tolerant (BFT) middleware that takes a +state transition machine - written in any programming language - and securely +replicates it on many machines. -For protocol details, see [the specification](https://github.com/tendermint/spec). +For protocol details, refer to the [Tendermint Specification](./spec/README.md). -For detailed analysis of the consensus protocol, including safety and liveness proofs, -see our recent paper, "[The latest gossip on BFT consensus](https://arxiv.org/abs/1807.04938)". +For detailed analysis of the consensus protocol, including safety and liveness +proofs, read our paper, "[The latest gossip on BFT +consensus](https://arxiv.org/abs/1807.04938)". + +## Documentation + +Complete documentation can be found on the +[website](https://docs.tendermint.com/). ## Releases -Please do not depend on master as your production branch. Use [releases](https://github.com/tendermint/tendermint/releases) instead. +Please do not depend on `main` as your production branch. Use +[releases](https://github.com/tendermint/tendermint/releases) instead. -Tendermint is being used in production in both private and public environments, -most notably the blockchains of the [Cosmos Network](https://cosmos.network/). -However, we are still making breaking changes to the protocol and the APIs and have not yet released v1.0. -See below for more details about [versioning](#versioning). +Tendermint has been in the production of private and public environments, most +notably the blockchains of the Cosmos Network. we haven't released v1.0 yet +since we are making breaking changes to the protocol and the APIs. See below for +more details about [versioning](#versioning). -In any case, if you intend to run Tendermint in production, we're happy to help. You can -contact us [over email](mailto:hello@interchain.berlin) or [join the chat](https://discord.gg/AzefAFd). +In any case, if you intend to run Tendermint in production, we're happy to help. +You can contact us [over email](mailto:hello@interchain.io) or [join the +chat](https://discord.gg/cosmosnetwork). + +More on how releases are conducted can be found [here](./RELEASES.md). ## Security To report a security vulnerability, see our [bug bounty -program](https://hackerone.com/tendermint). -For examples of the kinds of bugs we're looking for, see [our security policy](SECURITY.md) +program](https://hackerone.com/cosmos). For examples of the kinds of bugs we're +looking for, see [our security policy](SECURITY.md). -We also maintain a dedicated mailing list for security updates. We will only ever use this mailing list -to notify you of vulnerabilities and fixes in Tendermint Core. You can subscribe [here](http://eepurl.com/gZ5hQD). +We also maintain a dedicated mailing list for security updates. We will only +ever use this mailing list to notify you of vulnerabilities and fixes in +Tendermint Core. You can subscribe [here](http://eepurl.com/gZ5hQD). ## Minimum requirements -| Requirement | Notes | -| ----------- | ---------------- | -| Go version | Go1.15 or higher | - -## Documentation - -Complete documentation can be found on the [website](https://docs.tendermint.com/master/). +| Requirement | Notes | +|-------------|-------------------| +| Go version | Go 1.18 or higher | ### Install -See the [install instructions](/docs/introduction/install.md). +See the [install instructions](./docs/introduction/install.md). ### Quick Start -- [Single node](/docs/introduction/quick-start.md) -- [Local cluster using docker-compose](/docs/networks/docker-compose.md) -- [Remote cluster using Terraform and Ansible](/docs/networks/terraform-and-ansible.md) -- [Join the Cosmos testnet](https://cosmos.network/testnet) +- [Single node](./docs/introduction/quick-start.md) +- [Local cluster using docker-compose](./docs/tools/docker-compose.md) +- [Remote cluster using Terraform and Ansible](./docs/tools/terraform-and-ansible.md) ## Contributing Please abide by the [Code of Conduct](CODE_OF_CONDUCT.md) in all interactions. -Before contributing to the project, please take a look at the [contributing guidelines](CONTRIBUTING.md) -and the [style guide](STYLE_GUIDE.md). You may also find it helpful to read the -[specifications](https://github.com/tendermint/spec), watch the [Developer Sessions](/docs/DEV_SESSIONS.md), -and familiarize yourself with our -[Architectural Decision Records](https://github.com/tendermint/tendermint/tree/master/docs/architecture). +Before contributing to the project, please take a look at the [contributing +guidelines](CONTRIBUTING.md) and the [style guide](STYLE_GUIDE.md). You may also +find it helpful to read the [specifications](./spec/README.md), and familiarize +yourself with our [Architectural Decision Records +(ADRs)](./docs/architecture/README.md) and +[Request For Comments (RFCs)](./docs/rfc/README.md). ## Versioning ### Semantic Versioning -Tendermint uses [Semantic Versioning](http://semver.org/) to determine when and how the version changes. -According to SemVer, anything in the public API can change at any time before version 1.0.0 +Tendermint uses [Semantic Versioning](http://semver.org/) to determine when and +how the version changes. According to SemVer, anything in the public API can +change at any time before version 1.0.0 -To provide some stability to Tendermint users in these 0.X.X days, the MINOR version is used -to signal breaking changes across a subset of the total public API. This subset includes all -interfaces exposed to other processes (cli, rpc, p2p, etc.), but does not -include the Go APIs. +To provide some stability to users of 0.X.X versions of Tendermint, the MINOR +version is used to signal breaking changes across Tendermint's API. This API +includes all publicly exposed types, functions, and methods in non-internal Go +packages as well as the types and methods accessible via the Tendermint RPC +interface. -That said, breaking changes in the following packages will be documented in the -CHANGELOG even if they don't lead to MINOR version bumps: - -- crypto -- config -- libs - - bech32 - - bits - - bytes - - json - - log - - math - - net - - os - - protoio - - rand - - sync - - strings - - service -- node -- rpc/client -- types +Breaking changes to these public APIs will be documented in the CHANGELOG. ### Upgrades -In an effort to avoid accumulating technical debt prior to 1.0.0, -we do not guarantee that breaking changes (ie. bumps in the MINOR version) -will work with existing Tendermint blockchains. In these cases you will -have to start a new blockchain, or write something custom to get the old -data into the new chain. However, any bump in the PATCH version should be -compatible with existing blockchain histories. - +In an effort to avoid accumulating technical debt prior to 1.0.0, we do not +guarantee that breaking changes (ie. bumps in the MINOR version) will work with +existing Tendermint blockchains. In these cases you will have to start a new +blockchain, or write something custom to get the old data into the new chain. +However, any bump in the PATCH version should be compatible with existing +blockchain histories. For more information on upgrading, see [UPGRADING.md](./UPGRADING.md). ### Supported Versions -Because we are a small core team, we only ship patch updates, including security updates, -to the most recent minor release and the second-most recent minor release. Consequently, -we strongly recommend keeping Tendermint up-to-date. Upgrading instructions can be found -in [UPGRADING.md](./UPGRADING.md). +Because we are a small core team, we only ship patch updates, including security +updates, to the most recent minor release and the second-most recent minor +release. Consequently, we strongly recommend keeping Tendermint up-to-date. +Upgrading instructions can be found in [UPGRADING.md](./UPGRADING.md). ## Resources -### Tendermint Core +### Libraries -For details about the blockchain data structures and the p2p protocols, see the -[Tendermint specification](https://docs.tendermint.com/master/spec/). - -For details on using the software, see the [documentation](/docs/) which is also -hosted at: - -### Tools - -Benchmarking is provided by [`tm-load-test`](https://github.com/informalsystems/tm-load-test). -Additional tooling can be found in [/docs/tools](/docs/tools). +- [Cosmos SDK](http://github.com/cosmos/cosmos-sdk); A framework for building + applications in Golang +- [Tendermint in Rust](https://github.com/informalsystems/tendermint-rs) +- [ABCI Tower](https://github.com/penumbra-zone/tower-abci) ### Applications -- [Cosmos SDK](http://github.com/cosmos/cosmos-sdk); a cryptocurrency application framework -- [Ethermint](http://github.com/cosmos/ethermint); Ethereum on Tendermint -- [Many more](https://tendermint.com/ecosystem) +- [Cosmos Hub](https://hub.cosmos.network/) +- [Terra](https://www.terra.money/) +- [Celestia](https://celestia.org/) +- [Anoma](https://anoma.network/) +- [Vocdoni](https://docs.vocdoni.io/) ### Research - [The latest gossip on BFT consensus](https://arxiv.org/abs/1807.04938) - [Master's Thesis on Tendermint](https://atrium.lib.uoguelph.ca/xmlui/handle/10214/9769) - [Original Whitepaper: "Tendermint: Consensus Without Mining"](https://tendermint.com/static/docs/tendermint.pdf) -- [Blog](https://blog.cosmos.network/tendermint/home) +- [Tendermint Core Blog](https://medium.com/tendermint/tagged/tendermint-core) +- [Cosmos Blog](https://blog.cosmos.network/tendermint/home) + +## Join us! + +Tendermint Core is maintained by [Interchain GmbH](https://interchain.berlin). +If you'd like to work full-time on Tendermint Core, +[we're hiring](https://interchain-gmbh.breezy.hr/)! + +Funding for Tendermint Core development comes primarily from the +[Interchain Foundation](https://interchain.io), a Swiss non-profit. The +Tendermint trademark is owned by [Tendermint Inc.](https://tendermint.com), the +for-profit entity that also maintains [tendermint.com](https://tendermint.com). + + +[bft]: https://en.wikipedia.org/wiki/Byzantine_fault_tolerance +[smr]: https://en.wikipedia.org/wiki/State_machine_replication +[Blockchain]: https://en.wikipedia.org/wiki/Blockchain +[version-badge]: https://img.shields.io/github/tag/tendermint/tendermint.svg +[version-url]: https://github.com/tendermint/tendermint/releases/latest +[api-badge]: https://camo.githubusercontent.com/915b7be44ada53c290eb157634330494ebe3e30a/68747470733a2f2f676f646f632e6f72672f6769746875622e636f6d2f676f6c616e672f6764646f3f7374617475732e737667 +[api-url]: https://pkg.go.dev/github.com/tendermint/tendermint +[go-badge]: https://img.shields.io/badge/go-1.17-blue.svg +[go-url]: https://github.com/moovweb/gvm +[discord-badge]: https://img.shields.io/discord/669268347736686612.svg +[discord-url]: https://discord.gg/cosmosnetwork +[license-badge]: https://img.shields.io/github/license/tendermint/tendermint.svg +[license-url]: https://github.com/tendermint/tendermint/blob/main/LICENSE +[loc-badge]: https://tokei.rs/b1/github/tendermint/tendermint?category=lines +[loc-url]: https://github.com/tendermint/tendermint +[sg-badge]: https://sourcegraph.com/github.com/tendermint/tendermint/-/badge.svg +[sg-url]: https://sourcegraph.com/github.com/tendermint/tendermint?badge +[tests-badge]: https://github.com/tendermint/tendermint/workflows/Tests/badge.svg?branch=main +[codecov-badge]: https://codecov.io/gh/tendermint/tendermint/branch/main/graph/badge.svg +[codecov-url]: https://codecov.io/gh/tendermint/tendermint +[lint-badge]: https://github.com/tendermint/tendermint/workflows/Lint/badge.svg diff --git a/RELEASES.md b/RELEASES.md new file mode 100644 index 000000000..dc869fbae --- /dev/null +++ b/RELEASES.md @@ -0,0 +1,345 @@ +# Releases + +Tendermint uses modified [semantic versioning](https://semver.org/) with each +release following a `vX.Y.Z` format. Tendermint is currently on major version 0 +and uses the minor version to signal breaking changes. The `main` branch is +used for active development and thus it is not advisable to build against it. + +The latest changes are always initially merged into `main`. Releases are +specified using tags and are built from long-lived "backport" branches that are +cut from `main` when the release process begins. Each release "line" (e.g. +0.34 or 0.33) has its own long-lived backport branch, and the backport branches +have names like `v0.34.x` or `v0.33.x` (literally, `x`; it is not a placeholder +in this case). Tendermint only maintains the last two releases at a time (the +oldest release is predominantly just security patches). + +## Backporting + +As non-breaking changes land on `main`, they should also be backported to +these backport branches. + +We use Mergify's [backport feature](https://mergify.io/features/backports) to +automatically backport to the needed branch. There should be a label for any +backport branch that you'll be targeting. To notify the bot to backport a pull +request, mark the pull request with the label corresponding to the correct +backport branch. For example, to backport to v0.35.x, add the label +`S:backport-to-v0.35.x`. Once the original pull request is merged, the bot will +try to cherry-pick the pull request to the backport branch. If the bot fails to +backport, it will open a pull request. The author of the original pull request +is responsible for solving the conflicts and merging the pull request. + +### Creating a backport branch + +If this is the first release candidate for a minor version release, e.g. +v0.25.0, you get to have the honor of creating the backport branch! + +Note that, after creating the backport branch, you'll also need to update the +tags on `main` so that `go mod` is able to order the branches correctly. You +should tag `main` with a "dev" tag that is "greater than" the backport +branches tags. See [#6072](https://github.com/tendermint/tendermint/pull/6072) +for more context. + +In the following example, we'll assume that we're making a backport branch for +the 0.35.x line. + +1. Start on `main` + +2. Create and push the backport branch: + ```sh + git checkout -b v0.35.x + git push origin v0.35.x + ``` + +3. Create a PR to update the documentation directory for the backport branch. + + We only maintain RFC and ADR documents on main, to avoid confusion. In + addition, we rewrite Markdown URLs pointing to main to point to the + backport branch, so that generated documentation will link to the correct + versions of files elsewhere in the repository. For context on the latter, see + https://github.com/tendermint/tendermint/issues/7675. + + To prepare the PR: + ```sh + # Remove the RFC and ADR documents from the backport. + # We only maintain these on main to avoid confusion. + git rm -r docs/rfc docs/architecture + + # Update absolute links to point to the backport. + go run ./scripts/linkpatch -recur -target v0.35.x -skip-path docs/DOCS_README.md,docs/README.md docs + + # Create and push the PR. + git checkout -b update-docs-v035x + git commit -m "Update docs for v0.35.x backport branch." docs + git push -u origin update-docs-v035x + ``` + + Be sure to merge this PR before making other changes on the newly-created + backport branch. + +After doing these steps, go back to `main` and do the following: + +1. Tag `main` as the dev branch for the _next_ minor version release and push + it up to GitHub. + For example: + ```sh + git tag -a v0.36.0-dev -m "Development base for Tendermint v0.36." + git push origin v0.36.0-dev + ``` + +2. Create a new workflow to run e2e nightlies for the new backport branch. (See + [e2e-nightly-main.yml][e2e] for an example.) + +3. Add a new section to the Mergify config (`.github/mergify.yml`) to enable the + backport bot to work on this branch, and add a corresponding `S:backport-to-v0.35.x` + [label](https://github.com/tendermint/tendermint/labels) so the bot can be triggered. + +4. Add a new section to the Dependabot config (`.github/dependabot.yml`) to + enable automatic update of Go dependencies on this branch. Copy and edit one + of the existing branch configurations to set the correct `target-branch`. + +[e2e]: https://github.com/tendermint/tendermint/blob/main/.github/workflows/e2e-nightly-main.yml + +## Release candidates + +Before creating an official release, especially a minor release, we may want to +create a release candidate (RC) for our friends and partners to test out. We use +git tags to create RCs, and we build them off of backport branches. + +Tags for RCs should follow the "standard" release naming conventions, with +`-rcX` at the end (for example, `v0.35.0-rc0`). + +(Note that branches and tags _cannot_ have the same names, so it's important +that these branches have distinct names from the tags/release names.) + +If this is the first RC for a minor release, you'll have to make a new backport +branch (see above). Otherwise: + +1. Start from the backport branch (e.g. `v0.35.x`). +2. Run the integration tests and the e2e nightlies + (which can be triggered from the Github UI; + e.g., https://github.com/tendermint/tendermint/actions/workflows/e2e-nightly-34x.yml). +3. Prepare the changelog: + - Move the changes included in `CHANGELOG_PENDING.md` into `CHANGELOG.md`. Each RC should have + it's own changelog section. These will be squashed when the final candidate is released. + - Run `python ./scripts/linkify_changelog.py CHANGELOG.md` to add links for + all PRs + - Ensure that `UPGRADING.md` is up-to-date and includes notes on any breaking changes + or other upgrading flows. + - Bump TMVersionDefault version in `version.go` + - Bump P2P and block protocol versions in `version.go`, if necessary. + Check the changelog for breaking changes in these components. + - Bump ABCI protocol version in `version.go`, if necessary +4. Open a PR with these changes against the backport branch. +5. Once these changes have landed on the backport branch, be sure to pull them back down locally. +6. Once you have the changes locally, create the new tag, specifying a name and a tag "message": + `git tag -a v0.35.0-rc0 -m "Release Candidate v0.35.0-rc0` +7. Push the tag back up to origin: + `git push origin v0.35.0-rc0` + Now the tag should be available on the repo's releases page. +8. Future RCs will continue to be built off of this branch. + +Note that this process should only be used for "true" RCs -- release candidates +that, if successful, will be the next release. For more experimental "RCs," +create a new, short-lived branch and tag that instead. + +## Minor release + +This minor release process assumes that this release was preceded by release +candidates. If there were no release candidates, begin by creating a backport +branch, as described above. + +Before performing these steps, be sure the +[Minor Release Checklist](#minor-release-checklist) has been completed. + +1. Start on the backport branch (e.g. `v0.35.x`) +2. Run integration tests (`make test_integrations`) and the e2e nightlies. +3. Prepare the release: + - "Squash" changes from the changelog entries for the RCs into a single entry, + and add all changes included in `CHANGELOG_PENDING.md`. + (Squashing includes both combining all entries, as well as removing or simplifying + any intra-RC changes. It may also help to alphabetize the entries by package name.) + - Run `python ./scripts/linkify_changelog.py CHANGELOG.md` to add links for + all PRs + - Ensure that `UPGRADING.md` is up-to-date and includes notes on any breaking changes + or other upgrading flows. + - Bump TMVersionDefault version in `version.go` + - Bump P2P and block protocol versions in `version.go`, if necessary + - Bump ABCI protocol version in `version.go`, if necessary +4. Open a PR with these changes against the backport branch. +5. Once these changes are on the backport branch, push a tag with prepared release details. + This will trigger the actual release `v0.35.0`. + - `git tag -a v0.35.0 -m 'Release v0.35.0'` + - `git push origin v0.35.0` +6. Make sure that `main` is updated with the latest `CHANGELOG.md`, `CHANGELOG_PENDING.md`, and `UPGRADING.md`. +7. Add the release to the documentation site generator config (see + [DOCS_README.md](./docs/DOCS_README.md) for more details). In summary: + - Start on branch `main`. + - Add a new line at the bottom of [`docs/versions`](./docs/versions) to + ensure the newest release is the default for the landing page. + - Add a new entry to `themeConfig.versions` in + [`docs/.vuepress/config.js`](./docs/.vuepress/config.js) to include the + release in the dropdown versions menu. + - Commit these changes to `main` and backport them into the backport + branch for this release. + +## Patch release + +Patch releases are done differently from minor releases: They are built off of +long-lived backport branches, rather than from main. As non-breaking changes +land on `main`, they should also be backported into these backport branches. + +Patch releases don't have release candidates by default, although any tricky +changes may merit a release candidate. + +To create a patch release: + +1. Checkout the long-lived backport branch: `git checkout v0.35.x` +2. Run integration tests (`make test_integrations`) and the nightlies. +3. Check out a new branch and prepare the release: + - Copy `CHANGELOG_PENDING.md` to top of `CHANGELOG.md` + - Run `python ./scripts/linkify_changelog.py CHANGELOG.md` to add links for all issues + - Run `bash ./scripts/authors.sh` to get a list of authors since the latest release, and add the GitHub aliases of external contributors to the top of the CHANGELOG. To lookup an alias from an email, try `bash ./scripts/authors.sh ` + - Reset the `CHANGELOG_PENDING.md` + - Bump the TMDefaultVersion in `version.go` + - Bump the ABCI version number, if necessary. + (Note that ABCI follows semver, and that ABCI versions are the only versions + which can change during patch releases, and only field additions are valid patch changes.) +4. Open a PR with these changes that will land them back on `v0.35.x` +5. Once this change has landed on the backport branch, make sure to pull it locally, then push a tag. + - `git tag -a v0.35.1 -m 'Release v0.35.1'` + - `git push origin v0.35.1` +6. Create a pull request back to main with the CHANGELOG & version changes from the latest release. + - Remove all `R:patch` labels from the pull requests that were included in the release. + - Do not merge the backport branch into main. + +## Minor Release Checklist + +The following set of steps are performed on all releases that increment the +_minor_ version, e.g. v0.25 to v0.26. These steps ensure that Tendermint is well +tested, stable, and suitable for adoption by the various diverse projects that +rely on Tendermint. + +### Feature Freeze + +Ahead of any minor version release of Tendermint, the software enters 'Feature +Freeze' for at least two weeks. A feature freeze means that _no_ new features +are added to the code being prepared for release. No code changes should be made +to the code being released that do not directly improve pressing issues of code +quality. The following must not be merged during a feature freeze: + +* Refactors that are not related to specific bug fixes. +* Dependency upgrades. +* New test code that does not test a discovered regression. +* New features of any kind. +* Documentation or spec improvements that are not related to the newly developed + code. + +This period directly follows the creation of the [backport +branch](#creating-a-backport-branch). The Tendermint team instead directs all +attention to ensuring that the existing code is stable and reliable. Broken +tests are fixed, flakey-tests are remedied, end-to-end test failures are +thoroughly diagnosed and all efforts of the team are aimed at improving the +quality of the code. During this period, the upgrade harness tests are run +repeatedly and a variety of in-house testnets are run to ensure Tendermint +functions at the scale it will be used by application developers and node +operators. + +### Nightly End-To-End Tests + +The Tendermint team maintains [a set of end-to-end +tests](https://github.com/tendermint/tendermint/blob/main/test/e2e/README.md#L1) +that run each night on the latest commit of the project and on the code in the +tip of each supported backport branch. These tests start a network of +containerized Tendermint processes and run automated checks that the network +functions as expected in both stable and unstable conditions. During the feature +freeze, these tests are run nightly and must pass consistently for a release of +Tendermint to be considered stable. + +### Upgrade Harness + +> TODO(williambanfield): Change to past tense and clarify this section once +> upgrade harness is complete. + +The Tendermint team is creating an upgrade test harness to exercise the workflow +of stopping an instance of Tendermint running one version of the software and +starting up the same application running the next version. To support upgrade +testing, we will add the ability to terminate the Tendermint process at specific +pre-defined points in its execution so that we can verify upgrades work in a +representative sample of stop conditions. + +### Large Scale Testnets + +The Tendermint end-to-end tests run a small network (~10s of nodes) to exercise +basic consensus interactions. Real world deployments of Tendermint often have +over a hundred nodes just in the validator set, with many others acting as full +nodes and sentry nodes. To gain more assurance before a release, we will also +run larger-scale test networks to shake out emergent behaviors at scale. + +Large-scale test networks are run on a set of virtual machines (VMs). Each VM is +equipped with 4 Gigabytes of RAM and 2 CPU cores. The network runs a very simple +key-value store application. The application adds artificial delays to different +ABCI calls to simulate a slow application. Each testnet is briefly run with no +load being generated to collect a baseline performance. Once baseline is +captured, a consistent load is applied across the network. This load takes the +form of 10% of the running nodes all receiving a consistent stream of two +hundred transactions per minute each. + +During each test net, the following metrics are monitored and collected on each +node: + +* Consensus rounds per height +* Maximum connected peers, Minimum connected peers, Rate of change of peer connections +* Memory resident set size +* CPU utilization +* Blocks produced per minute +* Seconds for each step of consensus (Propose, Prevote, Precommit, Commit) +* Latency to receive block proposals + +For these tests we intentionally target low-powered host machines (with low core +counts and limited memory) to ensure we observe similar kinds of resource contention +and limitation that real-world deployments of Tendermint experience in production. + +#### 200 Node Testnet + +To test the stability and performance of Tendermint in a real world scenario, +a 200 node test network is run. The network comprises 5 seed nodes, 100 +validators and 95 non-validating full nodes. All nodes begin by dialing +a subset of the seed nodes to discover peers. The network is run for several +days, with metrics being collected continuously. In cases of changes to performance +critical systems, testnets of larger sizes should be considered. + +#### Rotating Node Testnet + +Real-world deployments of Tendermint frequently see new nodes arrive and old +nodes exit the network. The rotating node testnet ensures that Tendermint is +able to handle this reliably. In this test, a network with 10 validators and +3 seed nodes is started. A rolling set of 25 full nodes are started and each +connects to the network by dialing one of the seed nodes. Once the node is able +to blocksync to the head of the chain and begins producing blocks using +Tendermint consensus it is stopped. Once stopped, a new node is started and +takes its place. This network is run for several days. + +#### Network Partition Testnet + +Tendermint is expected to recover from network partitions. A partition where no +subset of the nodes is left with the super-majority of the stake is expected to +stop making blocks. Upon alleviation of the partition, the network is expected +to once again become fully connected and capable of producing blocks. The +network partition testnet ensures that Tendermint is able to handle this +reliably at scale. In this test, a network with 100 validators and 95 full +nodes is started. All validators have equal stake. Once the network is +producing blocks, a set of firewall rules is deployed to create a partitioned +network with 50% of the stake on one side and 50% on the other. Once the +network stops producing blocks, the firewall rules are removed and the nodes +are monitored to ensure they reconnect and that the network again begins +producing blocks. + +#### Absent Stake Testnet + +Tendermint networks often run with _some_ portion of the voting power offline. +The absent stake testnet ensures that large networks are able to handle this +reliably. A set of 150 validator nodes and three seed nodes is started. The set +of 150 validators is configured to only possess a cumulative stake of 67% of +the total stake. The remaining 33% of the stake is configured to belong to +a validator that is never actually run in the test network. The network is run +for multiple days, ensuring that it is able to produce blocks without issue. diff --git a/SECURITY.md b/SECURITY.md index 351f5606c..61f02e10a 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,98 +2,146 @@ ## Reporting a Bug -As part of our [Coordinated Vulnerability Disclosure -Policy](https://tendermint.com/security), we operate a [bug -bounty](https://hackerone.com/tendermint). -See the policy for more details on submissions and rewards, and see "Example Vulnerabilities" (below) for examples of the kinds of bugs we're most interested in. +As part of our [Coordinated Vulnerability Disclosure Policy](https://tendermint.com/security), +we operate a [bug bounty][hackerone]. See the policy for more +details on submissions and rewards, and see "Example Vulnerabilities" (below) +for examples of the kinds of bugs we're most interested in. -### Guidelines +### Guidelines We require that all researchers: -* Use the bug bounty to disclose all vulnerabilities, and avoid posting vulnerability information in public places, including Github Issues, Discord channels, and Telegram groups -* Make every effort to avoid privacy violations, degradation of user experience, disruption to production systems (including but not limited to the Cosmos Hub), and destruction of data -* Keep any information about vulnerabilities that you’ve discovered confidential between yourself and the Tendermint Core engineering team until the issue has been resolved and disclosed +* Use the bug bounty to disclose all vulnerabilities, and avoid posting + vulnerability information in public places, including Github Issues, Discord + channels, and Telegram groups +* Make every effort to avoid privacy violations, degradation of user experience, + disruption to production systems (including but not limited to the Cosmos + Hub), and destruction of data +* Keep any information about vulnerabilities that you’ve discovered confidential + between yourself and the Tendermint Core engineering team until the issue has + been resolved and disclosed * Avoid posting personally identifiable information, privately or publicly If you follow these guidelines when reporting an issue to us, we commit to: -* Not pursue or support any legal action related to your research on this vulnerability -* Work with you to understand, resolve and ultimately disclose the issue in a timely fashion +* Not pursue or support any legal action related to your research on this + vulnerability +* Work with you to understand, resolve and ultimately disclose the issue in a + timely fashion -## Disclosure Process +## Disclosure Process Tendermint Core uses the following disclosure process: -1. Once a security report is received, the Tendermint Core team works to verify the issue and confirm its severity level using CVSS. -2. The Tendermint Core team collaborates with the Gaia team to determine the vulnerability’s potential impact on the Cosmos Hub. -3. Patches are prepared for eligible releases of Tendermint in private repositories. See “Supported Releases” below for more information on which releases are considered eligible. -4. If it is determined that a CVE-ID is required, we request a CVE through a CVE Numbering Authority. -5. We notify the community that a security release is coming, to give users time to prepare their systems for the update. Notifications can include forum posts, tweets, and emails to partners and validators, including emails sent to the [Tendermint Security Mailing List](https://berlin.us4.list-manage.com/subscribe?u=431b35421ff7edcc77df5df10&id=3fe93307bc). -6. 24 hours following this notification, the fixes are applied publicly and new releases are issued. -7. Cosmos SDK and Gaia update their Tendermint Core dependencies to use these releases, and then themselves issue new releases. -8. Once releases are available for Tendermint Core, Cosmos SDK and Gaia, we notify the community, again, through the same channels as above. We also publish a Security Advisory on Github and publish the CVE, as long as neither the Security Advisory nor the CVE include any information on how to exploit these vulnerabilities beyond what information is already available in the patch itself. -9. Once the community is notified, we will pay out any relevant bug bounties to submitters. -10. One week after the releases go out, we will publish a post with further details on the vulnerability as well as our response to it. +1. Once a security report is received, the Tendermint Core team works to verify + the issue and confirm its severity level using CVSS. +2. The Tendermint Core team collaborates with the Gaia team to determine the + vulnerability’s potential impact on the Cosmos Hub. +3. Patches are prepared for eligible releases of Tendermint in private + repositories. See “Supported Releases” below for more information on which + releases are considered eligible. +4. If it is determined that a CVE-ID is required, we request a CVE through a CVE + Numbering Authority. +5. We notify the community that a security release is coming, to give users time + to prepare their systems for the update. Notifications can include forum + posts, tweets, and emails to partners and validators, including emails sent + to the [Tendermint Security Mailing List][tmsec-mailing]. +6. 24 hours following this notification, the fixes are applied publicly and new + releases are issued. +7. Cosmos SDK and Gaia update their Tendermint Core dependencies to use these + releases, and then themselves issue new releases. +8. Once releases are available for Tendermint Core, Cosmos SDK and Gaia, we + notify the community, again, through the same channels as above. We also + publish a Security Advisory on Github and publish the CVE, as long as neither + the Security Advisory nor the CVE include any information on how to exploit + these vulnerabilities beyond what information is already available in the + patch itself. +9. Once the community is notified, we will pay out any relevant bug bounties to + submitters. +10. One week after the releases go out, we will publish a post with further + details on the vulnerability as well as our response to it. -This process can take some time. Every effort will be made to handle the bug in as timely a manner as possible, however it's important that we follow the process described above to ensure that disclosures are handled consistently and to keep Tendermint Core and its downstream dependent projects--including but not limited to Gaia and the Cosmos Hub--as secure as possible. +This process can take some time. Every effort will be made to handle the bug in +as timely a manner as possible, however it's important that we follow the +process described above to ensure that disclosures are handled consistently and +to keep Tendermint Core and its downstream dependent projects--including but not +limited to Gaia and the Cosmos Hub--as secure as possible. -### Example Timeline +### Example Timeline -The following is an example timeline for the triage and response. The required roles and team members are described in parentheses after each task; however, multiple people can play each role and each person may play multiple roles. +The following is an example timeline for the triage and response. The required +roles and team members are described in parentheses after each task; however, +multiple people can play each role and each person may play multiple roles. -#### > 24 Hours Before Release Time +#### 24+ Hours Before Release Time -1. Request CVE number (ADMIN) -2. Gather emails and other contact info for validators (COMMS LEAD) -3. Test fixes on a testnet (TENDERMINT ENG, COSMOS ENG) -4. Write “Security Advisory” for forum (TENDERMINT LEAD) +1. Request CVE number (ADMIN) +2. Gather emails and other contact info for validators (COMMS LEAD) +3. Create patches in a private security repo, and ensure that PRs are open + targeting all relevant release branches (TENDERMINT ENG, TENDERMINT LEAD) +4. Test fixes on a testnet (TENDERMINT ENG, COSMOS SDK ENG) +5. Write “Security Advisory” for forum (TENDERMINT LEAD) #### 24 Hours Before Release Time -1. Post “Security Advisory” pre-notification on forum (TENDERMINT LEAD) -2. Post Tweet linking to forum post (COMMS LEAD) -3. Announce security advisory/link to post in various other social channels (Telegram, Discord) (COMMS LEAD) -4. Send emails to validators or other users (PARTNERSHIPS LEAD) +1. Post “Security Advisory” pre-notification on forum (TENDERMINT LEAD) +2. Post Tweet linking to forum post (COMMS LEAD) +3. Announce security advisory/link to post in various other social channels + (Telegram, Discord) (COMMS LEAD) +4. Send emails to validators or other users (PARTNERSHIPS LEAD) #### Release Time -1. Cut Tendermint releases for eligible versions (TENDERMINT ENG, TENDERMINT LEAD) +1. Cut Tendermint releases for eligible versions (TENDERMINT ENG, TENDERMINT + LEAD) 2. Cut Cosmos SDK release for eligible versions (COSMOS ENG) 3. Cut Gaia release for eligible versions (GAIA ENG) 4. Post “Security releases” on forum (TENDERMINT LEAD) 5. Post new Tweet linking to forum post (COMMS LEAD) -6. Remind everyone via social channels (Telegram, Discord) that the release is out (COMMS LEAD) -7. Send emails to validators or other users (COMMS LEAD) -8. Publish Security Advisory and CVE, if CVE has no sensitive information (ADMIN) +6. Remind everyone via social channels (Telegram, Discord) that the release is + out (COMMS LEAD) +7. Send emails to validators or other users (COMMS LEAD) +8. Publish Security Advisory and CVE, if CVE has no sensitive information + (ADMIN) #### After Release Time 1. Write forum post with exploit details (TENDERMINT LEAD) -2. Approve pay-out on HackerOne for submitter (ADMIN) +2. Approve pay-out on HackerOne for submitter (ADMIN) #### 7 Days After Release Time -1. Publish CVE if it has not yet been published (ADMIN) +1. Publish CVE if it has not yet been published (ADMIN) 2. Publish forum post with exploit details (TENDERMINT ENG, TENDERMINT LEAD) ## Supported Releases -The Tendermint Core team commits to releasing security patch releases for both the latest minor release as well for the major/minor release that the Cosmos Hub is running. +The Tendermint Core team commits to releasing security patch releases for both +the latest minor release as well for the major/minor release that the Cosmos Hub +is running. -If you are running older versions of Tendermint Core, we encourage you to upgrade at your earliest opportunity so that you can receive security patches directly from the Tendermint repo. While you are welcome to backport security patches to older versions for your own use, we will not publish or promote these backports. +If you are running older versions of Tendermint Core, we encourage you to +upgrade at your earliest opportunity so that you can receive security patches +directly from the Tendermint repo. While you are welcome to backport security +patches to older versions for your own use, we will not publish or promote these +backports. ## Scope -The full scope of our bug bounty program is outlined on our [Hacker One program page](https://hackerone.com/tendermint). Please also note that, in the interest of the safety of our users and staff, a few things are explicitly excluded from scope: +The full scope of our bug bounty program is outlined on our +[Hacker One program page][hackerone]. Please also note that, in the interest of +the safety of our users and staff, a few things are explicitly excluded from +scope: -* Any third-party services -* Findings from physical testing, such as office access +* Any third-party services +* Findings from physical testing, such as office access * Findings derived from social engineering (e.g., phishing) -## Example Vulnerabilities +## Example Vulnerabilities -The following is a list of examples of the kinds of vulnerabilities that we’re most interested in. It is not exhaustive: there are other kinds of issues we may also be interested in! +The following is a list of examples of the kinds of vulnerabilities that we’re +most interested in. It is not exhaustive: there are other kinds of issues we may +also be interested in! ### Specification @@ -105,7 +153,8 @@ The following is a list of examples of the kinds of vulnerabilities that we’re Assuming less than 1/3 of the voting power is Byzantine (malicious): -* Validation of blockchain data structures, including blocks, block parts, votes, and so on +* Validation of blockchain data structures, including blocks, block parts, + votes, and so on * Execution of blocks * Validator set changes * Proposer round robin @@ -114,6 +163,9 @@ Assuming less than 1/3 of the voting power is Byzantine (malicious): * A node halting (liveness failure) * Syncing new and old nodes +Assuming more than 1/3 the voting power is Byzantine: + +* Attacks that go unpunished (unhandled evidence) ### Networking @@ -139,7 +191,7 @@ Attacks may come through the P2P network or the RPC layer: ### Libraries -* Serialization (Amino) +* Serialization * Reading/Writing files and databases ### Cryptography @@ -150,5 +202,8 @@ Attacks may come through the P2P network or the RPC layer: ### Light Client -* Core verification +* Core verification * Bisection/sequential algorithms + +[hackerone]: https://hackerone.com/cosmos +[tmsec-mailing]: https://berlin.us4.list-manage.com/subscribe?u=431b35421ff7edcc77df5df10&id=3fe93307bc diff --git a/UPGRADING.md b/UPGRADING.md index b4fc34e09..710807615 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -1,6 +1,21 @@ # Upgrading Tendermint Core -This guide provides instructions for upgrading to specific versions of Tendermint Core. +This guide provides instructions for upgrading to specific versions of +Tendermint Core. + +## v0.37 (Unreleased) + +This version requires a coordinated network upgrade. It alters the elements in +the predigest of the `LastResultsHash` and thus all nodes must upgrade together +(see [\#9175](https://github.com/tendermint/tendermint/pull/9175)). + +NOTE: v0.35 was recalled and v0.36 was skipped + +### ABCI Changes + +* In v0.34, messages on the wire used to be length-delimited with `int64` varint + values, which was inconsistent with the `uint64` varint length delimiters used + in the P2P layer. Both now consistently use `uint64` varint length delimiters. ## Unreleased @@ -443,7 +458,7 @@ use `make build_c` / `make install_c` (full instructions can be found at ## v0.31.0 -This release contains a breaking change to the behaviour of the pubsub system. +This release contains a breaking change to the behavior of the pubsub system. It also contains some minor breaking changes in the Go API and ABCI. There are no changes to the block or p2p protocols, so v0.31.0 should work fine with blockchains created from the v0.30 series. @@ -461,7 +476,7 @@ In this case, the WS client will receive an error with description: "error": { "code": -32000, "msg": "Server error", - "data": "subscription was cancelled (reason: client is not pulling messages fast enough)" // or "subscription was cancelled (reason: Tendermint exited)" + "data": "subscription was canceled (reason: client is not pulling messages fast enough)" // or "subscription was canceled (reason: Tendermint exited)" } } @@ -642,7 +657,7 @@ to `timeout_propose = "3s"`. ### RPC Changes -The default behaviour of `/abci_query` has been changed to not return a proof, +The default behavior of `/abci_query` has been changed to not return a proof, and the name of the parameter that controls this has been changed from `trusted` to `prove`. To get proofs with your queries, ensure you set `prove=true`. diff --git a/abci/client/client.go b/abci/client/client.go index 0c63d0ce5..2fdc4cc7e 100644 --- a/abci/client/client.go +++ b/abci/client/client.go @@ -14,7 +14,7 @@ const ( echoRetryIntervalSeconds = 1 ) -//go:generate mockery --case underscore --name Client +//go:generate ../../scripts/mockery_generate.sh Client // Client defines an interface for an ABCI client. // All `Async` methods return a `ReqRes` object. diff --git a/abci/client/mocks/client.go b/abci/client/mocks/client.go index c25c49542..f2ae585a2 100644 --- a/abci/client/mocks/client.go +++ b/abci/client/mocks/client.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/abci/cmd/abci-cli/abci-cli.go b/abci/cmd/abci-cli/abci-cli.go index dba950916..c78f525f8 100644 --- a/abci/cmd/abci-cli/abci-cli.go +++ b/abci/cmd/abci-cli/abci-cli.go @@ -16,7 +16,6 @@ import ( abcicli "github.com/tendermint/tendermint/abci/client" "github.com/tendermint/tendermint/abci/example/code" - "github.com/tendermint/tendermint/abci/example/counter" "github.com/tendermint/tendermint/abci/example/kvstore" "github.com/tendermint/tendermint/abci/server" servertest "github.com/tendermint/tendermint/abci/tests/server" @@ -44,9 +43,6 @@ var ( flagHeight int flagProve bool - // counter - flagSerial bool - // kvstore flagPersist string ) @@ -58,9 +54,7 @@ var RootCmd = &cobra.Command{ PersistentPreRunE: func(cmd *cobra.Command, args []string) error { switch cmd.Use { - case "counter", "kvstore": // for the examples apps, don't pre-run - return nil - case "version": // skip running for version command + case "kvstore", "version": return nil } @@ -135,10 +129,6 @@ func addQueryFlags() { "whether or not to return a merkle proof of the query result") } -func addCounterFlags() { - counterCmd.PersistentFlags().BoolVarP(&flagSerial, "serial", "", false, "enforce incrementing (serial) transactions") -} - func addKVStoreFlags() { kvstoreCmd.PersistentFlags().StringVarP(&flagPersist, "persist", "", "", "directory to use for a database") } @@ -158,8 +148,6 @@ func addCommands() { RootCmd.AddCommand(queryCmd) // examples - addCounterFlags() - RootCmd.AddCommand(counterCmd) addKVStoreFlags() RootCmd.AddCommand(kvstoreCmd) } @@ -267,14 +255,6 @@ var queryCmd = &cobra.Command{ RunE: cmdQuery, } -var counterCmd = &cobra.Command{ - Use: "counter", - Short: "ABCI demo example", - Long: "ABCI demo example", - Args: cobra.ExactArgs(0), - RunE: cmdCounter, -} - var kvstoreCmd = &cobra.Command{ Use: "kvstore", Short: "ABCI demo example", @@ -625,32 +605,6 @@ func cmdQuery(cmd *cobra.Command, args []string) error { return nil } -func cmdCounter(cmd *cobra.Command, args []string) error { - app := counter.NewApplication(flagSerial) - logger := log.NewTMLogger(log.NewSyncWriter(os.Stdout)) - - // Start the listener - srv, err := server.NewServer(flagAddress, flagAbci, app) - if err != nil { - return err - } - srv.SetLogger(logger.With("module", "abci-server")) - if err := srv.Start(); err != nil { - return err - } - - // Stop upon receiving SIGTERM or CTRL-C. - tmos.TrapSignal(logger, func() { - // Cleanup - if err := srv.Stop(); err != nil { - logger.Error("Error while stopping server", "err", err) - } - }) - - // Run forever. - select {} -} - func cmdKVStore(cmd *cobra.Command, args []string) error { logger := log.NewTMLogger(log.NewSyncWriter(os.Stdout)) diff --git a/abci/example/counter/counter.go b/abci/example/counter/counter.go deleted file mode 100644 index 58f8aabb9..000000000 --- a/abci/example/counter/counter.go +++ /dev/null @@ -1,103 +0,0 @@ -package counter - -import ( - "encoding/binary" - "fmt" - - "github.com/tendermint/tendermint/abci/example/code" - "github.com/tendermint/tendermint/abci/types" -) - -type Application struct { - types.BaseApplication - - hashCount int - txCount int - serial bool -} - -func NewApplication(serial bool) *Application { - return &Application{serial: serial} -} - -func (app *Application) Info(req types.RequestInfo) types.ResponseInfo { - return types.ResponseInfo{Data: fmt.Sprintf("{\"hashes\":%v,\"txs\":%v}", app.hashCount, app.txCount)} -} - -func (app *Application) SetOption(req types.RequestSetOption) types.ResponseSetOption { - key, value := req.Key, req.Value - if key == "serial" && value == "on" { - app.serial = true - } else { - /* - TODO Panic and have the ABCI server pass an exception. - The client can call SetOptionSync() and get an `error`. - return types.ResponseSetOption{ - Error: fmt.Sprintf("Unknown key (%s) or value (%s)", key, value), - } - */ - return types.ResponseSetOption{} - } - - return types.ResponseSetOption{} -} - -func (app *Application) DeliverTx(req types.RequestDeliverTx) types.ResponseDeliverTx { - if app.serial { - if len(req.Tx) > 8 { - return types.ResponseDeliverTx{ - Code: code.CodeTypeEncodingError, - Log: fmt.Sprintf("Max tx size is 8 bytes, got %d", len(req.Tx))} - } - tx8 := make([]byte, 8) - copy(tx8[len(tx8)-len(req.Tx):], req.Tx) - txValue := binary.BigEndian.Uint64(tx8) - if txValue != uint64(app.txCount) { - return types.ResponseDeliverTx{ - Code: code.CodeTypeBadNonce, - Log: fmt.Sprintf("Invalid nonce. Expected %v, got %v", app.txCount, txValue)} - } - } - app.txCount++ - return types.ResponseDeliverTx{Code: code.CodeTypeOK} -} - -func (app *Application) CheckTx(req types.RequestCheckTx) types.ResponseCheckTx { - if app.serial { - if len(req.Tx) > 8 { - return types.ResponseCheckTx{ - Code: code.CodeTypeEncodingError, - Log: fmt.Sprintf("Max tx size is 8 bytes, got %d", len(req.Tx))} - } - tx8 := make([]byte, 8) - copy(tx8[len(tx8)-len(req.Tx):], req.Tx) - txValue := binary.BigEndian.Uint64(tx8) - if txValue < uint64(app.txCount) { - return types.ResponseCheckTx{ - Code: code.CodeTypeBadNonce, - Log: fmt.Sprintf("Invalid nonce. Expected >= %v, got %v", app.txCount, txValue)} - } - } - return types.ResponseCheckTx{Code: code.CodeTypeOK} -} - -func (app *Application) Commit() (resp types.ResponseCommit) { - app.hashCount++ - if app.txCount == 0 { - return types.ResponseCommit{} - } - hash := make([]byte, 8) - binary.BigEndian.PutUint64(hash, uint64(app.txCount)) - return types.ResponseCommit{Data: hash} -} - -func (app *Application) Query(reqQuery types.RequestQuery) types.ResponseQuery { - switch reqQuery.Path { - case "hash": - return types.ResponseQuery{Value: []byte(fmt.Sprintf("%v", app.hashCount))} - case "tx": - return types.ResponseQuery{Value: []byte(fmt.Sprintf("%v", app.txCount))} - default: - return types.ResponseQuery{Log: fmt.Sprintf("Invalid query path. Expected hash or tx, got %v", reqQuery.Path)} - } -} diff --git a/abci/example/kvstore/kvstore_test.go b/abci/example/kvstore/kvstore_test.go index dafa88237..0a7802aab 100644 --- a/abci/example/kvstore/kvstore_test.go +++ b/abci/example/kvstore/kvstore_test.go @@ -2,7 +2,7 @@ package kvstore import ( "fmt" - "io/ioutil" + "os" "sort" "testing" @@ -71,7 +71,7 @@ func TestKVStoreKV(t *testing.T) { } func TestPersistentKVStoreKV(t *testing.T) { - dir, err := ioutil.TempDir("/tmp", "abci-kvstore-test") // TODO + dir, err := os.MkdirTemp("/tmp", "abci-kvstore-test") // TODO if err != nil { t.Fatal(err) } @@ -87,7 +87,7 @@ func TestPersistentKVStoreKV(t *testing.T) { } func TestPersistentKVStoreInfo(t *testing.T) { - dir, err := ioutil.TempDir("/tmp", "abci-kvstore-test") // TODO + dir, err := os.MkdirTemp("/tmp", "abci-kvstore-test") // TODO if err != nil { t.Fatal(err) } @@ -119,7 +119,7 @@ func TestPersistentKVStoreInfo(t *testing.T) { // add a validator, remove a validator, update a validator func TestValUpdates(t *testing.T) { - dir, err := ioutil.TempDir("/tmp", "abci-kvstore-test") // TODO + dir, err := os.MkdirTemp("/tmp", "abci-kvstore-test") // TODO if err != nil { t.Fatal(err) } diff --git a/abci/tests/client_server_test.go b/abci/tests/client_server_test.go index 2ef64e66a..e975f47f5 100644 --- a/abci/tests/client_server_test.go +++ b/abci/tests/client_server_test.go @@ -19,9 +19,12 @@ func TestClientServerNoAddrPrefix(t *testing.T) { assert.NoError(t, err, "expected no error on NewServer") err = server.Start() assert.NoError(t, err, "expected no error on server.Start") + defer func() { _ = server.Stop() }() client, err := abciclient.NewClient(addr, transport, true) assert.NoError(t, err, "expected no error on NewClient") err = client.Start() assert.NoError(t, err, "expected no error on client.Start") + + _ = client.Stop() } diff --git a/abci/tests/test_app/app.go b/abci/tests/test_app/app.go deleted file mode 100644 index 9c32fcc7d..000000000 --- a/abci/tests/test_app/app.go +++ /dev/null @@ -1,78 +0,0 @@ -package main - -import ( - "bytes" - "fmt" - "os" - - abcicli "github.com/tendermint/tendermint/abci/client" - "github.com/tendermint/tendermint/abci/types" - "github.com/tendermint/tendermint/libs/log" -) - -func startClient(abciType string) abcicli.Client { - // Start client - client, err := abcicli.NewClient("tcp://127.0.0.1:26658", abciType, true) - if err != nil { - panic(err.Error()) - } - logger := log.NewTMLogger(log.NewSyncWriter(os.Stdout)) - client.SetLogger(logger.With("module", "abcicli")) - if err := client.Start(); err != nil { - panicf("connecting to abci_app: %v", err.Error()) - } - - return client -} - -func setOption(client abcicli.Client, key, value string) { - _, err := client.SetOptionSync(types.RequestSetOption{Key: key, Value: value}) - if err != nil { - panicf("setting %v=%v: \nerr: %v", key, value, err) - } -} - -func commit(client abcicli.Client, hashExp []byte) { - res, err := client.CommitSync() - if err != nil { - panicf("client error: %v", err) - } - if !bytes.Equal(res.Data, hashExp) { - panicf("Commit hash was unexpected. Got %X expected %X", res.Data, hashExp) - } -} - -func deliverTx(client abcicli.Client, txBytes []byte, codeExp uint32, dataExp []byte) { - res, err := client.DeliverTxSync(types.RequestDeliverTx{Tx: txBytes}) - if err != nil { - panicf("client error: %v", err) - } - if res.Code != codeExp { - panicf("DeliverTx response code was unexpected. Got %v expected %v. Log: %v", res.Code, codeExp, res.Log) - } - if !bytes.Equal(res.Data, dataExp) { - panicf("DeliverTx response data was unexpected. Got %X expected %X", res.Data, dataExp) - } -} - -/*func checkTx(client abcicli.Client, txBytes []byte, codeExp uint32, dataExp []byte) { - res, err := client.CheckTxSync(txBytes) - if err != nil { - panicf("client error: %v", err) - } - if res.IsErr() { - panicf("checking tx %X: %v\nlog: %v", txBytes, res.Log) - } - if res.Code != codeExp { - panicf("CheckTx response code was unexpected. Got %v expected %v. Log: %v", - res.Code, codeExp, res.Log) - } - if !bytes.Equal(res.Data, dataExp) { - panicf("CheckTx response data was unexpected. Got %X expected %X", - res.Data, dataExp) - } -}*/ - -func panicf(format string, a ...interface{}) { - panic(fmt.Sprintf(format, a...)) -} diff --git a/abci/tests/test_app/main.go b/abci/tests/test_app/main.go deleted file mode 100644 index 1dcab1de1..000000000 --- a/abci/tests/test_app/main.go +++ /dev/null @@ -1,95 +0,0 @@ -package main - -import ( - "fmt" - "log" - "os" - "os/exec" - "time" - - "github.com/tendermint/tendermint/abci/example/code" - "github.com/tendermint/tendermint/abci/types" -) - -var abciType string - -func init() { - abciType = os.Getenv("ABCI") - if abciType == "" { - abciType = "socket" - } -} - -func main() { - testCounter() -} - -const ( - maxABCIConnectTries = 10 -) - -func ensureABCIIsUp(typ string, n int) error { - var err error - cmdString := "abci-cli echo hello" - if typ == "grpc" { - cmdString = "abci-cli --abci grpc echo hello" - } - - for i := 0; i < n; i++ { - cmd := exec.Command("bash", "-c", cmdString) - _, err = cmd.CombinedOutput() - if err == nil { - break - } - <-time.After(500 * time.Millisecond) - } - return err -} - -func testCounter() { - abciApp := os.Getenv("ABCI_APP") - if abciApp == "" { - panic("No ABCI_APP specified") - } - - fmt.Printf("Running %s test with abci=%s\n", abciApp, abciType) - subCommand := fmt.Sprintf("abci-cli %s", abciApp) - cmd := exec.Command("bash", "-c", subCommand) - cmd.Stdout = os.Stdout - if err := cmd.Start(); err != nil { - log.Fatalf("starting %q err: %v", abciApp, err) - } - defer func() { - if err := cmd.Process.Kill(); err != nil { - log.Printf("error on process kill: %v", err) - } - if err := cmd.Wait(); err != nil { - log.Printf("error while waiting for cmd to exit: %v", err) - } - }() - - if err := ensureABCIIsUp(abciType, maxABCIConnectTries); err != nil { - log.Fatalf("echo failed: %v", err) //nolint:gocritic - } - - client := startClient(abciType) - defer func() { - if err := client.Stop(); err != nil { - log.Printf("error trying client stop: %v", err) - } - }() - - setOption(client, "serial", "on") - commit(client, nil) - deliverTx(client, []byte("abc"), code.CodeTypeBadNonce, nil) - commit(client, nil) - deliverTx(client, []byte{0x00}, types.CodeTypeOK, nil) - commit(client, []byte{0, 0, 0, 0, 0, 0, 0, 1}) - deliverTx(client, []byte{0x00}, code.CodeTypeBadNonce, nil) - deliverTx(client, []byte{0x01}, types.CodeTypeOK, nil) - deliverTx(client, []byte{0x00, 0x02}, types.CodeTypeOK, nil) - deliverTx(client, []byte{0x00, 0x03}, types.CodeTypeOK, nil) - deliverTx(client, []byte{0x00, 0x00, 0x04}, types.CodeTypeOK, nil) - deliverTx(client, []byte{0x00, 0x00, 0x06}, code.CodeTypeBadNonce, nil) - commit(client, []byte{0, 0, 0, 0, 0, 0, 0, 5}) -} diff --git a/abci/tests/test_app/test.sh b/abci/tests/test_app/test.sh deleted file mode 100755 index 0d8301831..000000000 --- a/abci/tests/test_app/test.sh +++ /dev/null @@ -1,28 +0,0 @@ -#! /bin/bash -set -e - -# These tests spawn the counter app and server by execing the ABCI_APP command and run some simple client tests against it - -# Get the directory of where this script is. -export PATH="$GOBIN:$PATH" -SOURCE="${BASH_SOURCE[0]}" -while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done -DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - -# Change into that dir because we expect that. -cd "$DIR" - -echo "RUN COUNTER OVER SOCKET" -# test golang counter -ABCI_APP="counter" go run -mod=readonly ./*.go -echo "----------------------" - - -echo "RUN COUNTER OVER GRPC" -# test golang counter via grpc -ABCI_APP="counter --abci=grpc" ABCI="grpc" go run -mod=readonly ./*.go -echo "----------------------" - -# test nodejs counter -# TODO: fix node app -#ABCI_APP="node $GOPATH/src/github.com/tendermint/js-abci/example/app.js" go test -test.run TestCounter diff --git a/abci/tests/test_cli/test.sh b/abci/tests/test_cli/test.sh index cc880603d..9c02ce6f5 100755 --- a/abci/tests/test_cli/test.sh +++ b/abci/tests/test_cli/test.sh @@ -37,7 +37,6 @@ function testExample() { } testExample 1 tests/test_cli/ex1.abci abci-cli kvstore -testExample 2 tests/test_cli/ex2.abci abci-cli counter echo "" echo "PASS" diff --git a/abci/types/messages.go b/abci/types/messages.go index 4314b5bfc..4fe9f19d1 100644 --- a/abci/types/messages.go +++ b/abci/types/messages.go @@ -1,11 +1,10 @@ package types import ( - "bufio" - "encoding/binary" "io" "github.com/gogo/protobuf/proto" + "github.com/tendermint/tendermint/libs/protoio" ) const ( @@ -14,57 +13,19 @@ const ( // WriteMessage writes a varint length-delimited protobuf message. func WriteMessage(msg proto.Message, w io.Writer) error { - bz, err := proto.Marshal(msg) + protoWriter := protoio.NewDelimitedWriter(w) + _, err := protoWriter.WriteMsg(msg) if err != nil { return err } - return encodeByteSlice(w, bz) + + return nil } // ReadMessage reads a varint length-delimited protobuf message. func ReadMessage(r io.Reader, msg proto.Message) error { - return readProtoMsg(r, msg, maxMsgSize) -} - -func readProtoMsg(r io.Reader, msg proto.Message, maxSize int) error { - // binary.ReadVarint takes an io.ByteReader, eg. a bufio.Reader - reader, ok := r.(*bufio.Reader) - if !ok { - reader = bufio.NewReader(r) - } - length64, err := binary.ReadVarint(reader) - if err != nil { - return err - } - length := int(length64) - if length < 0 || length > maxSize { - return io.ErrShortBuffer - } - buf := make([]byte, length) - if _, err := io.ReadFull(reader, buf); err != nil { - return err - } - return proto.Unmarshal(buf, msg) -} - -//----------------------------------------------------------------------- -// NOTE: we copied wire.EncodeByteSlice from go-wire rather than keep -// go-wire as a dep - -func encodeByteSlice(w io.Writer, bz []byte) (err error) { - err = encodeVarint(w, int64(len(bz))) - if err != nil { - return - } - _, err = w.Write(bz) - return -} - -func encodeVarint(w io.Writer, i int64) (err error) { - var buf [10]byte - n := binary.PutVarint(buf[:], i) - _, err = w.Write(buf[0:n]) - return + _, err := protoio.NewDelimitedReader(r, maxMsgSize).ReadMsg(msg) + return err } //---------------------------------------- diff --git a/behaviour/doc.go b/behaviour/doc.go deleted file mode 100644 index 10c8d3d53..000000000 --- a/behaviour/doc.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Package Behavior provides a mechanism for reactors to report behavior of peers. - -Instead of a reactor calling the switch directly it will call the behavior module which will -handle the stoping and marking peer as good on behalf of the reactor. - -There are four different behaviors a reactor can report. - -1. bad message - -type badMessage struct { - explanation string -} - -This message will request the peer be stopped for an error - -2. message out of order - -type messageOutOfOrder struct { - explanation string -} - -This message will request the peer be stopped for an error - -3. consesnsus Vote - -type consensusVote struct { - explanation string -} - -This message will request the peer be marked as good - -4. block part - -type blockPart struct { - explanation string -} - -This message will request the peer be marked as good - -*/ -package behaviour //nolint:misspell diff --git a/behaviour/peer_behaviour.go b/behaviour/peer_behaviour.go deleted file mode 100644 index b00b03a8d..000000000 --- a/behaviour/peer_behaviour.go +++ /dev/null @@ -1,49 +0,0 @@ -package behaviour //nolint:misspell - -import ( - "github.com/tendermint/tendermint/p2p" -) - -// PeerBehaviour is a struct describing a behavior a peer performed. -// `peerID` identifies the peer and reason characterizes the specific -// behavior performed by the peer. -type PeerBehaviour struct { - peerID p2p.ID - reason interface{} -} - -type badMessage struct { - explanation string -} - -// BadMessage returns a badMessage PeerBehaviour. -func BadMessage(peerID p2p.ID, explanation string) PeerBehaviour { - return PeerBehaviour{peerID: peerID, reason: badMessage{explanation}} -} - -type messageOutOfOrder struct { - explanation string -} - -// MessageOutOfOrder returns a messagOutOfOrder PeerBehaviour. -func MessageOutOfOrder(peerID p2p.ID, explanation string) PeerBehaviour { - return PeerBehaviour{peerID: peerID, reason: messageOutOfOrder{explanation}} -} - -type consensusVote struct { - explanation string -} - -// ConsensusVote returns a consensusVote PeerBehaviour. -func ConsensusVote(peerID p2p.ID, explanation string) PeerBehaviour { - return PeerBehaviour{peerID: peerID, reason: consensusVote{explanation}} -} - -type blockPart struct { - explanation string -} - -// BlockPart returns blockPart PeerBehaviour. -func BlockPart(peerID p2p.ID, explanation string) PeerBehaviour { - return PeerBehaviour{peerID: peerID, reason: blockPart{explanation}} -} diff --git a/behaviour/reporter.go b/behaviour/reporter.go deleted file mode 100644 index 8540f490b..000000000 --- a/behaviour/reporter.go +++ /dev/null @@ -1,86 +0,0 @@ -package behaviour //nolint:misspell - -import ( - "errors" - - tmsync "github.com/tendermint/tendermint/libs/sync" - "github.com/tendermint/tendermint/p2p" -) - -// Reporter provides an interface for reactors to report the behavior -// of peers synchronously to other components. -type Reporter interface { - Report(behavior PeerBehaviour) error -} - -// SwitchReporter reports peer behavior to an internal Switch. -type SwitchReporter struct { - sw *p2p.Switch -} - -// NewSwitchReporter return a new SwitchReporter instance which wraps the Switch. -func NewSwitchReporter(sw *p2p.Switch) *SwitchReporter { - return &SwitchReporter{ - sw: sw, - } -} - -// Report reports the behavior of a peer to the Switch. -func (spbr *SwitchReporter) Report(behavior PeerBehaviour) error { - peer := spbr.sw.Peers().Get(behavior.peerID) - if peer == nil { - return errors.New("peer not found") - } - - switch reason := behavior.reason.(type) { - case consensusVote, blockPart: - spbr.sw.MarkPeerAsGood(peer) - case badMessage: - spbr.sw.StopPeerForError(peer, reason.explanation) - case messageOutOfOrder: - spbr.sw.StopPeerForError(peer, reason.explanation) - default: - return errors.New("unknown reason reported") - } - - return nil -} - -// MockReporter is a concrete implementation of the Reporter -// interface used in reactor tests to ensure reactors report the correct -// behavior in manufactured scenarios. -type MockReporter struct { - mtx tmsync.RWMutex - pb map[p2p.ID][]PeerBehaviour -} - -// NewMockReporter returns a Reporter which records all reported -// behaviors in memory. -func NewMockReporter() *MockReporter { - return &MockReporter{ - pb: map[p2p.ID][]PeerBehaviour{}, - } -} - -// Report stores the PeerBehaviour produced by the peer identified by peerID. -func (mpbr *MockReporter) Report(behavior PeerBehaviour) error { - mpbr.mtx.Lock() - defer mpbr.mtx.Unlock() - mpbr.pb[behavior.peerID] = append(mpbr.pb[behavior.peerID], behavior) - - return nil -} - -// GetBehaviours returns all behaviors reported on the peer identified by peerID. -func (mpbr *MockReporter) GetBehaviours(peerID p2p.ID) []PeerBehaviour { - mpbr.mtx.RLock() - defer mpbr.mtx.RUnlock() - if items, ok := mpbr.pb[peerID]; ok { - result := make([]PeerBehaviour, len(items)) - copy(result, items) - - return result - } - - return []PeerBehaviour{} -} diff --git a/behaviour/reporter_test.go b/behaviour/reporter_test.go deleted file mode 100644 index 38a5870a4..000000000 --- a/behaviour/reporter_test.go +++ /dev/null @@ -1,205 +0,0 @@ -package behaviour_test //nolint:misspell - -import ( - "sync" - "testing" - - bh "github.com/tendermint/tendermint/behaviour" //nolint:misspell - "github.com/tendermint/tendermint/p2p" -) - -// TestMockReporter tests the MockReporter's ability to store reported -// peer behavior in memory indexed by the peerID. -func TestMockReporter(t *testing.T) { - var peerID p2p.ID = "MockPeer" - pr := bh.NewMockReporter() - - behaviors := pr.GetBehaviours(peerID) - if len(behaviors) != 0 { - t.Error("Expected to have no behaviors reported") - } - - badMessage := bh.BadMessage(peerID, "bad message") - if err := pr.Report(badMessage); err != nil { - t.Error(err) - } - behaviors = pr.GetBehaviours(peerID) - if len(behaviors) != 1 { - t.Error("Expected the peer have one reported behavior") - } - - if behaviors[0] != badMessage { - t.Error("Expected Bad Message to have been reported") - } -} - -type scriptItem struct { - peerID p2p.ID - behavior bh.PeerBehaviour -} - -// equalBehaviours returns true if a and b contain the same PeerBehaviours with -// the same freequencies and otherwise false. -func equalBehaviours(a []bh.PeerBehaviour, b []bh.PeerBehaviour) bool { - aHistogram := map[bh.PeerBehaviour]int{} - bHistogram := map[bh.PeerBehaviour]int{} - - for _, behavior := range a { - aHistogram[behavior]++ - } - - for _, behavior := range b { - bHistogram[behavior]++ - } - - if len(aHistogram) != len(bHistogram) { - return false - } - - for _, behavior := range a { - if aHistogram[behavior] != bHistogram[behavior] { - return false - } - } - - for _, behavior := range b { - if bHistogram[behavior] != aHistogram[behavior] { - return false - } - } - - return true -} - -// TestEqualPeerBehaviours tests that equalBehaviours can tell that two slices -// of peer behaviors can be compared for the behaviors they contain and the -// freequencies that those behaviors occur. -func TestEqualPeerBehaviours(t *testing.T) { - var ( - peerID p2p.ID = "MockPeer" - consensusVote = bh.ConsensusVote(peerID, "voted") - blockPart = bh.BlockPart(peerID, "blocked") - equals = []struct { - left []bh.PeerBehaviour - right []bh.PeerBehaviour - }{ - // Empty sets - {[]bh.PeerBehaviour{}, []bh.PeerBehaviour{}}, - // Single behaviors - {[]bh.PeerBehaviour{consensusVote}, []bh.PeerBehaviour{consensusVote}}, - // Equal Frequencies - {[]bh.PeerBehaviour{consensusVote, consensusVote}, - []bh.PeerBehaviour{consensusVote, consensusVote}}, - // Equal frequencies different orders - {[]bh.PeerBehaviour{consensusVote, blockPart}, - []bh.PeerBehaviour{blockPart, consensusVote}}, - } - unequals = []struct { - left []bh.PeerBehaviour - right []bh.PeerBehaviour - }{ - // Comparing empty sets to non empty sets - {[]bh.PeerBehaviour{}, []bh.PeerBehaviour{consensusVote}}, - // Different behaviors - {[]bh.PeerBehaviour{consensusVote}, []bh.PeerBehaviour{blockPart}}, - // Same behavior with different frequencies - {[]bh.PeerBehaviour{consensusVote}, - []bh.PeerBehaviour{consensusVote, consensusVote}}, - } - ) - - for _, test := range equals { - if !equalBehaviours(test.left, test.right) { - t.Errorf("expected %#v and %#v to be equal", test.left, test.right) - } - } - - for _, test := range unequals { - if equalBehaviours(test.left, test.right) { - t.Errorf("expected %#v and %#v to be unequal", test.left, test.right) - } - } -} - -// TestPeerBehaviourConcurrency constructs a scenario in which -// multiple goroutines are using the same MockReporter instance. -// This test reproduces the conditions in which MockReporter will -// be used within a Reactor `Receive` method tests to ensure thread safety. -func TestMockPeerBehaviourReporterConcurrency(t *testing.T) { - var ( - behaviourScript = []struct { - peerID p2p.ID - behaviors []bh.PeerBehaviour - }{ - {"1", []bh.PeerBehaviour{bh.ConsensusVote("1", "")}}, - {"2", []bh.PeerBehaviour{bh.ConsensusVote("2", ""), bh.ConsensusVote("2", ""), bh.ConsensusVote("2", "")}}, - { - "3", - []bh.PeerBehaviour{bh.BlockPart("3", ""), - bh.ConsensusVote("3", ""), - bh.BlockPart("3", ""), - bh.ConsensusVote("3", "")}}, - { - "4", - []bh.PeerBehaviour{bh.ConsensusVote("4", ""), - bh.ConsensusVote("4", ""), - bh.ConsensusVote("4", ""), - bh.ConsensusVote("4", "")}}, - { - "5", - []bh.PeerBehaviour{bh.BlockPart("5", ""), - bh.ConsensusVote("5", ""), - bh.BlockPart("5", ""), - bh.ConsensusVote("5", "")}}, - } - ) - - var receiveWg sync.WaitGroup - pr := bh.NewMockReporter() - scriptItems := make(chan scriptItem) - done := make(chan int) - numConsumers := 3 - for i := 0; i < numConsumers; i++ { - receiveWg.Add(1) - go func() { - defer receiveWg.Done() - for { - select { - case pb := <-scriptItems: - if err := pr.Report(pb.behavior); err != nil { - t.Error(err) - } - case <-done: - return - } - } - }() - } - - var sendingWg sync.WaitGroup - sendingWg.Add(1) - go func() { - defer sendingWg.Done() - for _, item := range behaviourScript { - for _, reason := range item.behaviors { - scriptItems <- scriptItem{item.peerID, reason} - } - } - }() - - sendingWg.Wait() - - for i := 0; i < numConsumers; i++ { - done <- 1 - } - - receiveWg.Wait() - - for _, items := range behaviourScript { - reported := pr.GetBehaviours(items.peerID) - if !equalBehaviours(reported, items.behaviors) { - t.Errorf("expected peer %s to have behaved \nExpected: %#v \nGot %#v \n", - items.peerID, items.behaviors, reported) - } - } -} diff --git a/blockchain/v0/pool.go b/blockchain/pool.go similarity index 99% rename from blockchain/v0/pool.go rename to blockchain/pool.go index 69e0b55c4..ceab887ad 100644 --- a/blockchain/v0/pool.go +++ b/blockchain/pool.go @@ -1,4 +1,4 @@ -package v0 +package blockchain import ( "errors" diff --git a/blockchain/v0/pool_test.go b/blockchain/pool_test.go similarity index 99% rename from blockchain/v0/pool_test.go rename to blockchain/pool_test.go index 1653fe74a..c67da0afd 100644 --- a/blockchain/v0/pool_test.go +++ b/blockchain/pool_test.go @@ -1,4 +1,4 @@ -package v0 +package blockchain import ( "fmt" diff --git a/blockchain/v0/reactor.go b/blockchain/reactor.go similarity index 86% rename from blockchain/v0/reactor.go rename to blockchain/reactor.go index 93a1e7174..7302b5be3 100644 --- a/blockchain/v0/reactor.go +++ b/blockchain/reactor.go @@ -1,11 +1,10 @@ -package v0 +package blockchain import ( "fmt" "reflect" "time" - bc "github.com/tendermint/tendermint/blockchain" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" @@ -45,8 +44,8 @@ func (e peerError) Error() string { return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error()) } -// BlockchainReactor handles long-term catchup syncing. -type BlockchainReactor struct { +// Reactor handles long-term catchup syncing. +type Reactor struct { p2p.BaseReactor // immutable @@ -61,9 +60,9 @@ type BlockchainReactor struct { errorsCh <-chan peerError } -// NewBlockchainReactor returns new reactor instance. -func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore, - fastSync bool) *BlockchainReactor { +// NewReactor returns new reactor instance. +func NewReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore, + fastSync bool) *Reactor { if state.LastBlockHeight != store.Height() { panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, @@ -81,7 +80,7 @@ func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *st } pool := NewBlockPool(startHeight, requestsCh, errorsCh) - bcR := &BlockchainReactor{ + bcR := &Reactor{ initialState: state, blockExec: blockExec, store: store, @@ -90,18 +89,18 @@ func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *st requestsCh: requestsCh, errorsCh: errorsCh, } - bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR) + bcR.BaseReactor = *p2p.NewBaseReactor("Reactor", bcR) return bcR } // SetLogger implements service.Service by setting the logger on reactor and pool. -func (bcR *BlockchainReactor) SetLogger(l log.Logger) { +func (bcR *Reactor) SetLogger(l log.Logger) { bcR.BaseService.Logger = l bcR.pool.Logger = l } // OnStart implements service.Service. -func (bcR *BlockchainReactor) OnStart() error { +func (bcR *Reactor) OnStart() error { if bcR.fastSync { err := bcR.pool.Start() if err != nil { @@ -113,7 +112,7 @@ func (bcR *BlockchainReactor) OnStart() error { } // SwitchToFastSync is called by the state sync reactor when switching to fast sync. -func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error { +func (bcR *Reactor) SwitchToFastSync(state sm.State) error { bcR.fastSync = true bcR.initialState = state @@ -127,7 +126,7 @@ func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error { } // OnStop implements service.Service. -func (bcR *BlockchainReactor) OnStop() { +func (bcR *Reactor) OnStop() { if bcR.fastSync { if err := bcR.pool.Stop(); err != nil { bcR.Logger.Error("Error stopping pool", "err", err) @@ -136,21 +135,21 @@ func (bcR *BlockchainReactor) OnStop() { } // GetChannels implements Reactor -func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor { +func (bcR *Reactor) GetChannels() []*p2p.ChannelDescriptor { return []*p2p.ChannelDescriptor{ { ID: BlockchainChannel, Priority: 5, SendQueueCapacity: 1000, RecvBufferCapacity: 50 * 4096, - RecvMessageCapacity: bc.MaxMsgSize, + RecvMessageCapacity: MaxMsgSize, }, } } // AddPeer implements Reactor by sending our state to peer. -func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) { - msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{ +func (bcR *Reactor) AddPeer(peer p2p.Peer) { + msgBytes, err := EncodeMsg(&bcproto.StatusResponse{ Base: bcR.store.Base(), Height: bcR.store.Height()}) if err != nil { @@ -166,13 +165,13 @@ func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) { } // RemovePeer implements Reactor by removing peer from the pool. -func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) { +func (bcR *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) { bcR.pool.RemovePeer(peer.ID()) } // respondToPeer loads a block and sends it to the requesting peer, // if we have it. Otherwise, we'll respond saying we don't have it. -func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest, +func (bcR *Reactor) respondToPeer(msg *bcproto.BlockRequest, src p2p.Peer) (queued bool) { block := bcR.store.LoadBlock(msg.Height) @@ -183,7 +182,7 @@ func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest, return false } - msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: bl}) + msgBytes, err := EncodeMsg(&bcproto.BlockResponse{Block: bl}) if err != nil { bcR.Logger.Error("could not marshal msg", "err", err) return false @@ -194,7 +193,7 @@ func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest, bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height) - msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height}) + msgBytes, err := EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height}) if err != nil { bcR.Logger.Error("could not convert msg to protobuf", "err", err) return false @@ -204,15 +203,15 @@ func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest, } // Receive implements Reactor by handling 4 types of messages (look below). -func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { - msg, err := bc.DecodeMsg(msgBytes) +func (bcR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { + msg, err := DecodeMsg(msgBytes) if err != nil { bcR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err) bcR.Switch.StopPeerForError(src, err) return } - if err = bc.ValidateMsg(msg); err != nil { + if err = ValidateMsg(msg); err != nil { bcR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err) bcR.Switch.StopPeerForError(src, err) return @@ -232,7 +231,7 @@ func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) bcR.pool.AddBlock(src.ID(), bi, len(msgBytes)) case *bcproto.StatusRequest: // Send peer our state. - msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{ + msgBytes, err := EncodeMsg(&bcproto.StatusResponse{ Height: bcR.store.Height(), Base: bcR.store.Base(), }) @@ -253,7 +252,7 @@ func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) // Handle messages from the poolReactor telling the reactor what to do. // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down! -func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) { +func (bcR *Reactor) poolRoutine(stateSynced bool) { trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond) defer trySyncTicker.Stop() @@ -286,7 +285,7 @@ func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) { if peer == nil { continue } - msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: request.Height}) + msgBytes, err := EncodeMsg(&bcproto.BlockRequest{Height: request.Height}) if err != nil { bcR.Logger.Error("could not convert msg to proto", "err", err) continue @@ -388,14 +387,14 @@ FOR_LOOP: if peer != nil { // NOTE: we've already removed the peer's request, but we // still need to clean up the rest. - bcR.Switch.StopPeerForError(peer, fmt.Errorf("blockchainReactor validation error: %v", err)) + bcR.Switch.StopPeerForError(peer, fmt.Errorf("Reactor validation error: %v", err)) } peerID2 := bcR.pool.RedoRequest(second.Height) peer2 := bcR.Switch.Peers().Get(peerID2) if peer2 != nil && peer2 != peer { // NOTE: we've already removed the peer's request, but we // still need to clean up the rest. - bcR.Switch.StopPeerForError(peer2, fmt.Errorf("blockchainReactor validation error: %v", err)) + bcR.Switch.StopPeerForError(peer2, fmt.Errorf("Reactor validation error: %v", err)) } continue FOR_LOOP } @@ -430,8 +429,8 @@ FOR_LOOP: } // BroadcastStatusRequest broadcasts `BlockStore` base and height. -func (bcR *BlockchainReactor) BroadcastStatusRequest() error { - bm, err := bc.EncodeMsg(&bcproto.StatusRequest{}) +func (bcR *Reactor) BroadcastStatusRequest() error { + bm, err := EncodeMsg(&bcproto.StatusRequest{}) if err != nil { bcR.Logger.Error("could not convert msg to proto", "err", err) return fmt.Errorf("could not convert msg to proto: %w", err) diff --git a/blockchain/v0/reactor_test.go b/blockchain/reactor_test.go similarity index 87% rename from blockchain/v0/reactor_test.go rename to blockchain/reactor_test.go index 6184b5b78..e23dbdd35 100644 --- a/blockchain/v0/reactor_test.go +++ b/blockchain/reactor_test.go @@ -1,4 +1,4 @@ -package v0 +package blockchain import ( "fmt" @@ -48,24 +48,24 @@ func randGenesisDoc(numValidators int, randPower bool, minPower int64) (*types.G }, privValidators } -type BlockchainReactorPair struct { - reactor *BlockchainReactor +type ReactorPair struct { + reactor *Reactor app proxy.AppConns } -func newBlockchainReactor( +func newReactor( t *testing.T, logger log.Logger, genDoc *types.GenesisDoc, privVals []types.PrivValidator, - maxBlockHeight int64) BlockchainReactorPair { + maxBlockHeight int64) ReactorPair { if len(privVals) != 1 { panic("only support one validator") } app := &testApp{} cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() if err != nil { panic(fmt.Errorf("error start app: %w", err)) @@ -93,7 +93,7 @@ func newBlockchainReactor( mock.Anything, mock.Anything).Return(nil) - // Make the BlockchainReactor itself. + // Make the Reactor itself. // NOTE we have to create and commit the blocks first because // pool.height is determined from the store. fastSync := true @@ -141,10 +141,10 @@ func newBlockchainReactor( blockStore.SaveBlock(thisBlock, thisParts, lastCommit) } - bcReactor := NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) + bcReactor := NewReactor(state.Copy(), blockExec, blockStore, fastSync) bcReactor.SetLogger(logger.With("module", "blockchain")) - return BlockchainReactorPair{bcReactor, proxyApp} + return ReactorPair{bcReactor, proxyApp} } func TestNoBlockResponse(t *testing.T) { @@ -154,10 +154,10 @@ func TestNoBlockResponse(t *testing.T) { maxBlockHeight := int64(65) - reactorPairs := make([]BlockchainReactorPair, 2) + reactorPairs := make([]ReactorPair, 2) - reactorPairs[0] = newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, maxBlockHeight) - reactorPairs[1] = newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, 0) + reactorPairs[0] = newReactor(t, log.TestingLogger(), genDoc, privVals, maxBlockHeight) + reactorPairs[1] = newReactor(t, log.TestingLogger(), genDoc, privVals, 0) p2p.MakeConnectedSwitches(config.P2P, 2, func(i int, s *p2p.Switch) *p2p.Switch { s.AddReactor("BLOCKCHAIN", reactorPairs[i].reactor) @@ -218,7 +218,7 @@ func TestBadBlockStopsPeer(t *testing.T) { // Other chain needs a different validator set otherGenDoc, otherPrivVals := randGenesisDoc(1, false, 30) - otherChain := newBlockchainReactor(t, log.TestingLogger(), otherGenDoc, otherPrivVals, maxBlockHeight) + otherChain := newReactor(t, log.TestingLogger(), otherGenDoc, otherPrivVals, maxBlockHeight) defer func() { err := otherChain.reactor.Stop() @@ -227,12 +227,12 @@ func TestBadBlockStopsPeer(t *testing.T) { require.NoError(t, err) }() - reactorPairs := make([]BlockchainReactorPair, 4) + reactorPairs := make([]ReactorPair, 4) - reactorPairs[0] = newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, maxBlockHeight) - reactorPairs[1] = newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, 0) - reactorPairs[2] = newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, 0) - reactorPairs[3] = newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, 0) + reactorPairs[0] = newReactor(t, log.TestingLogger(), genDoc, privVals, maxBlockHeight) + reactorPairs[1] = newReactor(t, log.TestingLogger(), genDoc, privVals, 0) + reactorPairs[2] = newReactor(t, log.TestingLogger(), genDoc, privVals, 0) + reactorPairs[3] = newReactor(t, log.TestingLogger(), genDoc, privVals, 0) switches := p2p.MakeConnectedSwitches(config.P2P, 4, func(i int, s *p2p.Switch) *p2p.Switch { s.AddReactor("BLOCKCHAIN", reactorPairs[i].reactor) @@ -270,7 +270,7 @@ func TestBadBlockStopsPeer(t *testing.T) { // race, but can't be easily avoided. reactorPairs[3].reactor.store = otherChain.reactor.store - lastReactorPair := newBlockchainReactor(t, log.TestingLogger(), genDoc, privVals, 0) + lastReactorPair := newReactor(t, log.TestingLogger(), genDoc, privVals, 0) reactorPairs = append(reactorPairs, lastReactorPair) switches = append(switches, p2p.MakeConnectedSwitches(config.P2P, 1, func(i int, s *p2p.Switch) *p2p.Switch { diff --git a/blockchain/v1/peer.go b/blockchain/v1/peer.go deleted file mode 100644 index ad26585b3..000000000 --- a/blockchain/v1/peer.go +++ /dev/null @@ -1,211 +0,0 @@ -package v1 - -import ( - "fmt" - "math" - "time" - - flow "github.com/tendermint/tendermint/libs/flowrate" - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -//-------- -// Peer - -// BpPeerParams stores the peer parameters that are used when creating a peer. -type BpPeerParams struct { - timeout time.Duration - minRecvRate int64 - sampleRate time.Duration - windowSize time.Duration -} - -// BpPeer is the datastructure associated with a fast sync peer. -type BpPeer struct { - logger log.Logger - ID p2p.ID - - Base int64 // the peer reported base - Height int64 // the peer reported height - NumPendingBlockRequests int // number of requests still waiting for block responses - blocks map[int64]*types.Block // blocks received or expected to be received from this peer - blockResponseTimer *time.Timer - recvMonitor *flow.Monitor - params *BpPeerParams // parameters for timer and monitor - - onErr func(err error, peerID p2p.ID) // function to call on error -} - -// NewBpPeer creates a new peer. -func NewBpPeer(peerID p2p.ID, base int64, height int64, - onErr func(err error, peerID p2p.ID), params *BpPeerParams) *BpPeer { - - if params == nil { - params = BpPeerDefaultParams() - } - return &BpPeer{ - ID: peerID, - Base: base, - Height: height, - blocks: make(map[int64]*types.Block, maxRequestsPerPeer), - logger: log.NewNopLogger(), - onErr: onErr, - params: params, - } -} - -// String returns a string representation of a peer. -func (peer *BpPeer) String() string { - return fmt.Sprintf("peer: %v height: %v pending: %v", peer.ID, peer.Height, peer.NumPendingBlockRequests) -} - -// SetLogger sets the logger of the peer. -func (peer *BpPeer) SetLogger(l log.Logger) { - peer.logger = l -} - -// Cleanup performs cleanup of the peer, removes blocks, requests, stops timer and monitor. -func (peer *BpPeer) Cleanup() { - if peer.blockResponseTimer != nil { - peer.blockResponseTimer.Stop() - } - if peer.NumPendingBlockRequests != 0 { - peer.logger.Info("peer with pending requests is being cleaned", "peer", peer.ID) - } - if len(peer.blocks)-peer.NumPendingBlockRequests != 0 { - peer.logger.Info("peer with pending blocks is being cleaned", "peer", peer.ID) - } - for h := range peer.blocks { - delete(peer.blocks, h) - } - peer.NumPendingBlockRequests = 0 - peer.recvMonitor = nil -} - -// BlockAtHeight returns the block at a given height if available and errMissingBlock otherwise. -func (peer *BpPeer) BlockAtHeight(height int64) (*types.Block, error) { - block, ok := peer.blocks[height] - if !ok { - return nil, errMissingBlock - } - if block == nil { - return nil, errMissingBlock - } - return peer.blocks[height], nil -} - -// AddBlock adds a block at peer level. Block must be non-nil and recvSize a positive integer -// The peer must have a pending request for this block. -func (peer *BpPeer) AddBlock(block *types.Block, recvSize int) error { - if block == nil || recvSize < 0 { - panic("bad parameters") - } - existingBlock, ok := peer.blocks[block.Height] - if !ok { - peer.logger.Error("unsolicited block", "blockHeight", block.Height, "peer", peer.ID) - return errMissingBlock - } - if existingBlock != nil { - peer.logger.Error("already have a block for height", "height", block.Height) - return errDuplicateBlock - } - if peer.NumPendingBlockRequests == 0 { - panic("peer does not have pending requests") - } - peer.blocks[block.Height] = block - peer.NumPendingBlockRequests-- - if peer.NumPendingBlockRequests == 0 { - peer.stopMonitor() - peer.stopBlockResponseTimer() - } else { - peer.recvMonitor.Update(recvSize) - peer.resetBlockResponseTimer() - } - return nil -} - -// RemoveBlock removes the block of given height -func (peer *BpPeer) RemoveBlock(height int64) { - delete(peer.blocks, height) -} - -// RequestSent records that a request was sent, and starts the peer timer and monitor if needed. -func (peer *BpPeer) RequestSent(height int64) { - peer.blocks[height] = nil - - if peer.NumPendingBlockRequests == 0 { - peer.startMonitor() - peer.resetBlockResponseTimer() - } - peer.NumPendingBlockRequests++ -} - -// CheckRate verifies that the response rate of the peer is acceptable (higher than the minimum allowed). -func (peer *BpPeer) CheckRate() error { - if peer.NumPendingBlockRequests == 0 { - return nil - } - curRate := peer.recvMonitor.Status().CurRate - // curRate can be 0 on start - if curRate != 0 && curRate < peer.params.minRecvRate { - err := errSlowPeer - peer.logger.Error("SendTimeout", "peer", peer, - "reason", err, - "curRate", fmt.Sprintf("%d KB/s", curRate/1024), - "minRate", fmt.Sprintf("%d KB/s", peer.params.minRecvRate/1024)) - return err - } - return nil -} - -func (peer *BpPeer) onTimeout() { - peer.onErr(errNoPeerResponse, peer.ID) -} - -func (peer *BpPeer) stopMonitor() { - peer.recvMonitor.Done() - peer.recvMonitor = nil -} - -func (peer *BpPeer) startMonitor() { - peer.recvMonitor = flow.New(peer.params.sampleRate, peer.params.windowSize) - initialValue := float64(peer.params.minRecvRate) * math.E - peer.recvMonitor.SetREMA(initialValue) -} - -func (peer *BpPeer) resetBlockResponseTimer() { - if peer.blockResponseTimer == nil { - peer.blockResponseTimer = time.AfterFunc(peer.params.timeout, peer.onTimeout) - } else { - peer.blockResponseTimer.Reset(peer.params.timeout) - } -} - -func (peer *BpPeer) stopBlockResponseTimer() bool { - if peer.blockResponseTimer == nil { - return false - } - return peer.blockResponseTimer.Stop() -} - -// BpPeerDefaultParams returns the default peer parameters. -func BpPeerDefaultParams() *BpPeerParams { - return &BpPeerParams{ - // Timeout for a peer to respond to a block request. - timeout: 15 * time.Second, - - // Minimum recv rate to ensure we're receiving blocks from a peer fast - // enough. If a peer is not sending data at at least that rate, we - // consider them to have timedout and we disconnect. - // - // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, - // sending data across atlantic ~ 7.5 KB/s. - minRecvRate: int64(7680), - - // Monitor parameters - sampleRate: time.Second, - windowSize: 40 * time.Second, - } -} diff --git a/blockchain/v1/peer_test.go b/blockchain/v1/peer_test.go deleted file mode 100644 index fd9e9f14b..000000000 --- a/blockchain/v1/peer_test.go +++ /dev/null @@ -1,280 +0,0 @@ -package v1 - -import ( - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/tendermint/tendermint/libs/log" - tmrand "github.com/tendermint/tendermint/libs/rand" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -func TestPeerMonitor(t *testing.T) { - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 10, - func(err error, _ p2p.ID) {}, - nil) - peer.SetLogger(log.TestingLogger()) - peer.startMonitor() - assert.NotNil(t, peer.recvMonitor) - peer.stopMonitor() - assert.Nil(t, peer.recvMonitor) -} - -func TestPeerResetBlockResponseTimer(t *testing.T) { - var ( - numErrFuncCalls int // number of calls to the errFunc - lastErr error // last generated error - peerTestMtx sync.Mutex // modifications of ^^ variables are also done from timer handler goroutine - ) - params := &BpPeerParams{timeout: 20 * time.Millisecond} - - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 10, - func(err error, _ p2p.ID) { - peerTestMtx.Lock() - defer peerTestMtx.Unlock() - lastErr = err - numErrFuncCalls++ - }, - params) - - peer.SetLogger(log.TestingLogger()) - checkByStoppingPeerTimer(t, peer, false) - - // initial reset call with peer having a nil timer - peer.resetBlockResponseTimer() - assert.NotNil(t, peer.blockResponseTimer) - // make sure timer is running and stop it - checkByStoppingPeerTimer(t, peer, true) - - // reset with running timer - peer.resetBlockResponseTimer() - time.Sleep(5 * time.Millisecond) - peer.resetBlockResponseTimer() - assert.NotNil(t, peer.blockResponseTimer) - - // let the timer expire and ... - time.Sleep(50 * time.Millisecond) - // ... check timer is not running - checkByStoppingPeerTimer(t, peer, false) - - peerTestMtx.Lock() - // ... check errNoPeerResponse has been sent - assert.Equal(t, 1, numErrFuncCalls) - assert.Equal(t, lastErr, errNoPeerResponse) - peerTestMtx.Unlock() -} - -func TestPeerRequestSent(t *testing.T) { - params := &BpPeerParams{timeout: 2 * time.Millisecond} - - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 10, - func(err error, _ p2p.ID) {}, - params) - - peer.SetLogger(log.TestingLogger()) - - peer.RequestSent(1) - assert.NotNil(t, peer.recvMonitor) - assert.NotNil(t, peer.blockResponseTimer) - assert.Equal(t, 1, peer.NumPendingBlockRequests) - - peer.RequestSent(1) - assert.NotNil(t, peer.recvMonitor) - assert.NotNil(t, peer.blockResponseTimer) - assert.Equal(t, 2, peer.NumPendingBlockRequests) -} - -func TestPeerGetAndRemoveBlock(t *testing.T) { - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 100, - func(err error, _ p2p.ID) {}, - nil) - - // Change peer height - peer.Height = int64(10) - assert.Equal(t, int64(10), peer.Height) - - // request some blocks and receive few of them - for i := 1; i <= 10; i++ { - peer.RequestSent(int64(i)) - if i > 5 { - // only receive blocks 1..5 - continue - } - _ = peer.AddBlock(makeSmallBlock(i), 10) - } - - tests := []struct { - name string - height int64 - wantErr error - blockPresent bool - }{ - {"no request", 100, errMissingBlock, false}, - {"no block", 6, errMissingBlock, false}, - {"block 1 present", 1, nil, true}, - {"block max present", 5, nil, true}, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - // try to get the block - b, err := peer.BlockAtHeight(tt.height) - assert.Equal(t, tt.wantErr, err) - assert.Equal(t, tt.blockPresent, b != nil) - - // remove the block - peer.RemoveBlock(tt.height) - _, err = peer.BlockAtHeight(tt.height) - assert.Equal(t, errMissingBlock, err) - }) - } -} - -func TestPeerAddBlock(t *testing.T) { - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 100, - func(err error, _ p2p.ID) {}, - nil) - - // request some blocks, receive one - for i := 1; i <= 10; i++ { - peer.RequestSent(int64(i)) - if i == 5 { - // receive block 5 - _ = peer.AddBlock(makeSmallBlock(i), 10) - } - } - - tests := []struct { - name string - height int64 - wantErr error - blockPresent bool - }{ - {"no request", 50, errMissingBlock, false}, - {"duplicate block", 5, errDuplicateBlock, true}, - {"block 1 successfully received", 1, nil, true}, - {"block max successfully received", 10, nil, true}, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - // try to get the block - err := peer.AddBlock(makeSmallBlock(int(tt.height)), 10) - assert.Equal(t, tt.wantErr, err) - _, err = peer.BlockAtHeight(tt.height) - assert.Equal(t, tt.blockPresent, err == nil) - }) - } -} - -func TestPeerOnErrFuncCalledDueToExpiration(t *testing.T) { - - params := &BpPeerParams{timeout: 10 * time.Millisecond} - var ( - numErrFuncCalls int // number of calls to the onErr function - lastErr error // last generated error - peerTestMtx sync.Mutex // modifications of ^^ variables are also done from timer handler goroutine - ) - - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 10, - func(err error, _ p2p.ID) { - peerTestMtx.Lock() - defer peerTestMtx.Unlock() - lastErr = err - numErrFuncCalls++ - }, - params) - - peer.SetLogger(log.TestingLogger()) - - peer.RequestSent(1) - time.Sleep(50 * time.Millisecond) - // timer should have expired by now, check that the on error function was called - peerTestMtx.Lock() - assert.Equal(t, 1, numErrFuncCalls) - assert.Equal(t, errNoPeerResponse, lastErr) - peerTestMtx.Unlock() -} - -func TestPeerCheckRate(t *testing.T) { - params := &BpPeerParams{ - timeout: time.Second, - minRecvRate: int64(100), // 100 bytes/sec exponential moving average - } - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 10, - func(err error, _ p2p.ID) {}, - params) - peer.SetLogger(log.TestingLogger()) - - require.Nil(t, peer.CheckRate()) - - for i := 0; i < 40; i++ { - peer.RequestSent(int64(i)) - } - - // monitor starts with a higher rEMA (~ 2*minRecvRate), wait for it to go down - time.Sleep(900 * time.Millisecond) - - // normal peer - send a bit more than 100 bytes/sec, > 10 bytes/100msec, check peer is not considered slow - for i := 0; i < 10; i++ { - _ = peer.AddBlock(makeSmallBlock(i), 11) - time.Sleep(100 * time.Millisecond) - require.Nil(t, peer.CheckRate()) - } - - // slow peer - send a bit less than 10 bytes/100msec - for i := 10; i < 20; i++ { - _ = peer.AddBlock(makeSmallBlock(i), 9) - time.Sleep(100 * time.Millisecond) - } - // check peer is considered slow - assert.Equal(t, errSlowPeer, peer.CheckRate()) -} - -func TestPeerCleanup(t *testing.T) { - params := &BpPeerParams{timeout: 2 * time.Millisecond} - - peer := NewBpPeer( - p2p.ID(tmrand.Str(12)), 0, 10, - func(err error, _ p2p.ID) {}, - params) - peer.SetLogger(log.TestingLogger()) - - assert.Nil(t, peer.blockResponseTimer) - peer.RequestSent(1) - assert.NotNil(t, peer.blockResponseTimer) - - peer.Cleanup() - checkByStoppingPeerTimer(t, peer, false) -} - -// Check if peer timer is running or not (a running timer can be successfully stopped). -// Note: stops the timer. -func checkByStoppingPeerTimer(t *testing.T, peer *BpPeer, running bool) { - assert.NotPanics(t, func() { - stopped := peer.stopBlockResponseTimer() - if running { - assert.True(t, stopped) - } else { - assert.False(t, stopped) - } - }) -} - -func makeSmallBlock(height int) *types.Block { - return types.MakeBlock(int64(height), []types.Tx{types.Tx("foo")}, nil, nil) -} diff --git a/blockchain/v1/pool.go b/blockchain/v1/pool.go deleted file mode 100644 index 27e0f3a04..000000000 --- a/blockchain/v1/pool.go +++ /dev/null @@ -1,370 +0,0 @@ -package v1 - -import ( - "sort" - - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -// BlockPool keeps track of the fast sync peers, block requests and block responses. -type BlockPool struct { - logger log.Logger - // Set of peers that have sent status responses, with height bigger than pool.Height - peers map[p2p.ID]*BpPeer - // Set of block heights and the corresponding peers from where a block response is expected or has been received. - blocks map[int64]p2p.ID - - plannedRequests map[int64]struct{} // list of blocks to be assigned peers for blockRequest - nextRequestHeight int64 // next height to be added to plannedRequests - - Height int64 // height of next block to execute - MaxPeerHeight int64 // maximum height of all peers - toBcR bcReactor -} - -// NewBlockPool creates a new BlockPool. -func NewBlockPool(height int64, toBcR bcReactor) *BlockPool { - return &BlockPool{ - Height: height, - MaxPeerHeight: 0, - peers: make(map[p2p.ID]*BpPeer), - blocks: make(map[int64]p2p.ID), - plannedRequests: make(map[int64]struct{}), - nextRequestHeight: height, - toBcR: toBcR, - } -} - -// SetLogger sets the logger of the pool. -func (pool *BlockPool) SetLogger(l log.Logger) { - pool.logger = l -} - -// ReachedMaxHeight check if the pool has reached the maximum peer height. -func (pool *BlockPool) ReachedMaxHeight() bool { - return pool.Height >= pool.MaxPeerHeight -} - -func (pool *BlockPool) rescheduleRequest(peerID p2p.ID, height int64) { - pool.logger.Info("reschedule requests made to peer for height ", "peerID", peerID, "height", height) - pool.plannedRequests[height] = struct{}{} - delete(pool.blocks, height) - pool.peers[peerID].RemoveBlock(height) -} - -// Updates the pool's max height. If no peers are left MaxPeerHeight is set to 0. -func (pool *BlockPool) updateMaxPeerHeight() { - var newMax int64 - for _, peer := range pool.peers { - peerHeight := peer.Height - if peerHeight > newMax { - newMax = peerHeight - } - } - pool.MaxPeerHeight = newMax -} - -// UpdatePeer adds a new peer or updates an existing peer with a new base and height. -// If a peer is short it is not added. -func (pool *BlockPool) UpdatePeer(peerID p2p.ID, base int64, height int64) error { - - peer := pool.peers[peerID] - - if peer == nil { - if height < pool.Height { - pool.logger.Info("Peer height too small", - "peer", peerID, "height", height, "fsm_height", pool.Height) - return errPeerTooShort - } - // Add new peer. - peer = NewBpPeer(peerID, base, height, pool.toBcR.sendPeerError, nil) - peer.SetLogger(pool.logger.With("peer", peerID)) - pool.peers[peerID] = peer - pool.logger.Info("added peer", "peerID", peerID, "base", base, "height", height, "num_peers", len(pool.peers)) - } else { - // Check if peer is lowering its height. This is not allowed. - if height < peer.Height { - pool.RemovePeer(peerID, errPeerLowersItsHeight) - return errPeerLowersItsHeight - } - // Update existing peer. - peer.Base = base - peer.Height = height - } - - // Update the pool's MaxPeerHeight if needed. - pool.updateMaxPeerHeight() - - return nil -} - -// Cleans and deletes the peer. Recomputes the max peer height. -func (pool *BlockPool) deletePeer(peer *BpPeer) { - if peer == nil { - return - } - peer.Cleanup() - delete(pool.peers, peer.ID) - - if peer.Height == pool.MaxPeerHeight { - pool.updateMaxPeerHeight() - } -} - -// RemovePeer removes the blocks and requests from the peer, reschedules them and deletes the peer. -func (pool *BlockPool) RemovePeer(peerID p2p.ID, err error) { - peer := pool.peers[peerID] - if peer == nil { - return - } - pool.logger.Info("removing peer", "peerID", peerID, "error", err) - - // Reschedule the block requests made to the peer, or received and not processed yet. - // Note that some of the requests may be removed further down. - for h := range pool.peers[peerID].blocks { - pool.rescheduleRequest(peerID, h) - } - - oldMaxPeerHeight := pool.MaxPeerHeight - // Delete the peer. This operation may result in the pool's MaxPeerHeight being lowered. - pool.deletePeer(peer) - - // Check if the pool's MaxPeerHeight has been lowered. - // This may happen if the tallest peer has been removed. - if oldMaxPeerHeight > pool.MaxPeerHeight { - // Remove any planned requests for heights over the new MaxPeerHeight. - for h := range pool.plannedRequests { - if h > pool.MaxPeerHeight { - delete(pool.plannedRequests, h) - } - } - // Adjust the nextRequestHeight to the new max plus one. - if pool.nextRequestHeight > pool.MaxPeerHeight { - pool.nextRequestHeight = pool.MaxPeerHeight + 1 - } - } -} - -func (pool *BlockPool) removeShortPeers() { - for _, peer := range pool.peers { - if peer.Height < pool.Height { - pool.RemovePeer(peer.ID, nil) - } - } -} - -func (pool *BlockPool) removeBadPeers() { - pool.removeShortPeers() - for _, peer := range pool.peers { - if err := peer.CheckRate(); err != nil { - pool.RemovePeer(peer.ID, err) - pool.toBcR.sendPeerError(err, peer.ID) - } - } -} - -// MakeNextRequests creates more requests if the block pool is running low. -func (pool *BlockPool) MakeNextRequests(maxNumRequests int) { - heights := pool.makeRequestBatch(maxNumRequests) - if len(heights) != 0 { - pool.logger.Info("makeNextRequests will make following requests", - "number", len(heights), "heights", heights) - } - - for _, height := range heights { - h := int64(height) - if !pool.sendRequest(h) { - // If a good peer was not found for sending the request at height h then return, - // as it shouldn't be possible to find a peer for h+1. - return - } - delete(pool.plannedRequests, h) - } -} - -// Makes a batch of requests sorted by height such that the block pool has up to maxNumRequests entries. -func (pool *BlockPool) makeRequestBatch(maxNumRequests int) []int { - pool.removeBadPeers() - // At this point pool.requests may include heights for requests to be redone due to removal of peers: - // - peers timed out or were removed by switch - // - FSM timed out on waiting to advance the block execution due to missing blocks at h or h+1 - // Determine the number of requests needed by subtracting the number of requests already made from the maximum - // allowed - numNeeded := maxNumRequests - len(pool.blocks) - for len(pool.plannedRequests) < numNeeded { - if pool.nextRequestHeight > pool.MaxPeerHeight { - break - } - pool.plannedRequests[pool.nextRequestHeight] = struct{}{} - pool.nextRequestHeight++ - } - - heights := make([]int, 0, len(pool.plannedRequests)) - for k := range pool.plannedRequests { - heights = append(heights, int(k)) - } - sort.Ints(heights) - return heights -} - -func (pool *BlockPool) sendRequest(height int64) bool { - for _, peer := range pool.peers { - if peer.NumPendingBlockRequests >= maxRequestsPerPeer { - continue - } - if peer.Base > height || peer.Height < height { - continue - } - - err := pool.toBcR.sendBlockRequest(peer.ID, height) - if err == errNilPeerForBlockRequest { - // Switch does not have this peer, remove it and continue to look for another peer. - pool.logger.Error("switch does not have peer..removing peer selected for height", "peer", - peer.ID, "height", height) - pool.RemovePeer(peer.ID, err) - continue - } - - if err == errSendQueueFull { - pool.logger.Error("peer queue is full", "peer", peer.ID, "height", height) - continue - } - - pool.logger.Info("assigned request to peer", "peer", peer.ID, "height", height) - - pool.blocks[height] = peer.ID - peer.RequestSent(height) - - return true - } - pool.logger.Error("could not find peer to send request for block at height", "height", height) - return false -} - -// AddBlock validates that the block comes from the peer it was expected from and stores it in the 'blocks' map. -func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) error { - peer, ok := pool.peers[peerID] - if !ok { - pool.logger.Error("block from unknown peer", "height", block.Height, "peer", peerID) - return errBadDataFromPeer - } - if wantPeerID, ok := pool.blocks[block.Height]; ok && wantPeerID != peerID { - pool.logger.Error("block received from wrong peer", "height", block.Height, - "peer", peerID, "expected_peer", wantPeerID) - return errBadDataFromPeer - } - - return peer.AddBlock(block, blockSize) -} - -// BlockData stores the peer responsible to deliver a block and the actual block if delivered. -type BlockData struct { - block *types.Block - peer *BpPeer -} - -// BlockAndPeerAtHeight retrieves the block and delivery peer at specified height. -// Returns errMissingBlock if a block was not found -func (pool *BlockPool) BlockAndPeerAtHeight(height int64) (bData *BlockData, err error) { - peerID := pool.blocks[height] - peer := pool.peers[peerID] - if peer == nil { - return nil, errMissingBlock - } - - block, err := peer.BlockAtHeight(height) - if err != nil { - return nil, err - } - - return &BlockData{peer: peer, block: block}, nil - -} - -// FirstTwoBlocksAndPeers returns the blocks and the delivery peers at pool's height H and H+1. -func (pool *BlockPool) FirstTwoBlocksAndPeers() (first, second *BlockData, err error) { - first, err = pool.BlockAndPeerAtHeight(pool.Height) - second, err2 := pool.BlockAndPeerAtHeight(pool.Height + 1) - if err == nil { - err = err2 - } - return -} - -// InvalidateFirstTwoBlocks removes the peers that sent us the first two blocks, blocks are removed by RemovePeer(). -func (pool *BlockPool) InvalidateFirstTwoBlocks(err error) { - first, err1 := pool.BlockAndPeerAtHeight(pool.Height) - second, err2 := pool.BlockAndPeerAtHeight(pool.Height + 1) - - if err1 == nil { - pool.RemovePeer(first.peer.ID, err) - } - if err2 == nil { - pool.RemovePeer(second.peer.ID, err) - } -} - -// ProcessedCurrentHeightBlock performs cleanup after a block is processed. It removes block at pool height and -// the peers that are now short. -func (pool *BlockPool) ProcessedCurrentHeightBlock() { - peerID, peerOk := pool.blocks[pool.Height] - if peerOk { - pool.peers[peerID].RemoveBlock(pool.Height) - } - delete(pool.blocks, pool.Height) - pool.logger.Debug("removed block at height", "height", pool.Height) - pool.Height++ - pool.removeShortPeers() -} - -// RemovePeerAtCurrentHeights checks if a block at pool's height H exists and if not, it removes the -// delivery peer and returns. If a block at height H exists then the check and peer removal is done for H+1. -// This function is called when the FSM is not able to make progress for some time. -// This happens if either the block H or H+1 have not been delivered. -func (pool *BlockPool) RemovePeerAtCurrentHeights(err error) { - peerID := pool.blocks[pool.Height] - peer, ok := pool.peers[peerID] - if ok { - if _, err := peer.BlockAtHeight(pool.Height); err != nil { - pool.logger.Info("remove peer that hasn't sent block at pool.Height", - "peer", peerID, "height", pool.Height) - pool.RemovePeer(peerID, err) - return - } - } - peerID = pool.blocks[pool.Height+1] - peer, ok = pool.peers[peerID] - if ok { - if _, err := peer.BlockAtHeight(pool.Height + 1); err != nil { - pool.logger.Info("remove peer that hasn't sent block at pool.Height+1", - "peer", peerID, "height", pool.Height+1) - pool.RemovePeer(peerID, err) - return - } - } -} - -// Cleanup performs pool and peer cleanup -func (pool *BlockPool) Cleanup() { - for id, peer := range pool.peers { - peer.Cleanup() - delete(pool.peers, id) - } - pool.plannedRequests = make(map[int64]struct{}) - pool.blocks = make(map[int64]p2p.ID) - pool.nextRequestHeight = 0 - pool.Height = 0 - pool.MaxPeerHeight = 0 -} - -// NumPeers returns the number of peers in the pool -func (pool *BlockPool) NumPeers() int { - return len(pool.peers) -} - -// NeedsBlocks returns true if more blocks are required. -func (pool *BlockPool) NeedsBlocks() bool { - return len(pool.blocks) < maxNumRequests -} diff --git a/blockchain/v1/pool_test.go b/blockchain/v1/pool_test.go deleted file mode 100644 index 31b9d09f7..000000000 --- a/blockchain/v1/pool_test.go +++ /dev/null @@ -1,691 +0,0 @@ -package v1 - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -type testPeer struct { - id p2p.ID - base int64 - height int64 -} - -type testBcR struct { - logger log.Logger -} - -type testValues struct { - numRequestsSent int -} - -var testResults testValues - -func resetPoolTestResults() { - testResults.numRequestsSent = 0 -} - -func (testR *testBcR) sendPeerError(err error, peerID p2p.ID) { -} - -func (testR *testBcR) sendStatusRequest() { -} - -func (testR *testBcR) sendBlockRequest(peerID p2p.ID, height int64) error { - testResults.numRequestsSent++ - return nil -} - -func (testR *testBcR) resetStateTimer(name string, timer **time.Timer, timeout time.Duration) { -} - -func (testR *testBcR) switchToConsensus() { - -} - -func newTestBcR() *testBcR { - testBcR := &testBcR{logger: log.TestingLogger()} - return testBcR -} - -type tPBlocks struct { - id p2p.ID - create bool -} - -// Makes a block pool with specified current height, list of peers, block requests and block responses -func makeBlockPool(bcr *testBcR, height int64, peers []BpPeer, blocks map[int64]tPBlocks) *BlockPool { - bPool := NewBlockPool(height, bcr) - bPool.SetLogger(bcr.logger) - - txs := []types.Tx{types.Tx("foo"), types.Tx("bar")} - - var maxH int64 - for _, p := range peers { - if p.Height > maxH { - maxH = p.Height - } - bPool.peers[p.ID] = NewBpPeer(p.ID, p.Base, p.Height, bcr.sendPeerError, nil) - bPool.peers[p.ID].SetLogger(bcr.logger) - - } - bPool.MaxPeerHeight = maxH - for h, p := range blocks { - bPool.blocks[h] = p.id - bPool.peers[p.id].RequestSent(h) - if p.create { - // simulate that a block at height h has been received - _ = bPool.peers[p.id].AddBlock(types.MakeBlock(h, txs, nil, nil), 100) - } - } - return bPool -} - -func assertPeerSetsEquivalent(t *testing.T, set1 map[p2p.ID]*BpPeer, set2 map[p2p.ID]*BpPeer) { - assert.Equal(t, len(set1), len(set2)) - for peerID, peer1 := range set1 { - peer2 := set2[peerID] - assert.NotNil(t, peer2) - assert.Equal(t, peer1.NumPendingBlockRequests, peer2.NumPendingBlockRequests) - assert.Equal(t, peer1.Height, peer2.Height) - assert.Equal(t, peer1.Base, peer2.Base) - assert.Equal(t, len(peer1.blocks), len(peer2.blocks)) - for h, block1 := range peer1.blocks { - block2 := peer2.blocks[h] - // block1 and block2 could be nil if a request was made but no block was received - assert.Equal(t, block1, block2) - } - } -} - -func assertBlockPoolEquivalent(t *testing.T, poolWanted, pool *BlockPool) { - assert.Equal(t, poolWanted.blocks, pool.blocks) - assertPeerSetsEquivalent(t, poolWanted.peers, pool.peers) - assert.Equal(t, poolWanted.MaxPeerHeight, pool.MaxPeerHeight) - assert.Equal(t, poolWanted.Height, pool.Height) - -} - -func TestBlockPoolUpdatePeer(t *testing.T) { - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - args testPeer - poolWanted *BlockPool - errWanted error - }{ - { - name: "add a first short peer", - pool: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - args: testPeer{"P1", 0, 50}, - errWanted: errPeerTooShort, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - { - name: "add a first good peer", - pool: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - args: testPeer{"P1", 0, 101}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 101}}, map[int64]tPBlocks{}), - }, - { - name: "add a first good peer with base", - pool: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - args: testPeer{"P1", 10, 101}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Base: 10, Height: 101}}, map[int64]tPBlocks{}), - }, - { - name: "increase the height of P1 from 120 to 123", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, map[int64]tPBlocks{}), - args: testPeer{"P1", 0, 123}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 123}}, map[int64]tPBlocks{}), - }, - { - name: "decrease the height of P1 from 120 to 110", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, map[int64]tPBlocks{}), - args: testPeer{"P1", 0, 110}, - errWanted: errPeerLowersItsHeight, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - { - name: "decrease the height of P1 from 105 to 102 with blocks", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 105}}, - map[int64]tPBlocks{ - 100: {"P1", true}, 101: {"P1", true}, 102: {"P1", true}}), - args: testPeer{"P1", 0, 102}, - errWanted: errPeerLowersItsHeight, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, - map[int64]tPBlocks{}), - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - pool := tt.pool - err := pool.UpdatePeer(tt.args.id, tt.args.base, tt.args.height) - assert.Equal(t, tt.errWanted, err) - assert.Equal(t, tt.poolWanted.blocks, tt.pool.blocks) - assertPeerSetsEquivalent(t, tt.poolWanted.peers, tt.pool.peers) - assert.Equal(t, tt.poolWanted.MaxPeerHeight, tt.pool.MaxPeerHeight) - }) - } -} - -func TestBlockPoolRemovePeer(t *testing.T) { - testBcR := newTestBcR() - - type args struct { - peerID p2p.ID - err error - } - - tests := []struct { - name string - pool *BlockPool - args args - poolWanted *BlockPool - }{ - { - name: "attempt to delete non-existing peer", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, map[int64]tPBlocks{}), - args: args{"P99", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, map[int64]tPBlocks{}), - }, - { - name: "delete the only peer without blocks", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, map[int64]tPBlocks{}), - args: args{"P1", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - { - name: "delete the shortest of two peers without blocks", - pool: makeBlockPool( - testBcR, - 100, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 120}}, - map[int64]tPBlocks{}), - args: args{"P1", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P2", Height: 120}}, map[int64]tPBlocks{}), - }, - { - name: "delete the tallest of two peers without blocks", - pool: makeBlockPool( - testBcR, - 100, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 120}}, - map[int64]tPBlocks{}), - args: args{"P2", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 100}}, map[int64]tPBlocks{}), - }, - { - name: "delete the only peer with block requests sent and blocks received", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, - map[int64]tPBlocks{100: {"P1", true}, 101: {"P1", false}}), - args: args{"P1", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - { - name: "delete the shortest of two peers with block requests sent and blocks received", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}, {ID: "P2", Height: 200}}, - map[int64]tPBlocks{100: {"P1", true}, 101: {"P1", false}}), - args: args{"P1", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P2", Height: 200}}, map[int64]tPBlocks{}), - }, - { - name: "delete the tallest of two peers with block requests sent and blocks received", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}, {ID: "P2", Height: 110}}, - map[int64]tPBlocks{100: {"P1", true}, 101: {"P1", false}}), - args: args{"P1", nil}, - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P2", Height: 110}}, map[int64]tPBlocks{}), - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - tt.pool.RemovePeer(tt.args.peerID, tt.args.err) - assertBlockPoolEquivalent(t, tt.poolWanted, tt.pool) - }) - } -} - -func TestBlockPoolRemoveShortPeers(t *testing.T) { - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - poolWanted *BlockPool - }{ - { - name: "no short peers", - pool: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 110}, {ID: "P3", Height: 120}}, map[int64]tPBlocks{}), - poolWanted: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 110}, {ID: "P3", Height: 120}}, map[int64]tPBlocks{}), - }, - - { - name: "one short peer", - pool: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 90}, {ID: "P3", Height: 120}}, map[int64]tPBlocks{}), - poolWanted: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P3", Height: 120}}, map[int64]tPBlocks{}), - }, - - { - name: "all short peers", - pool: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 90}, {ID: "P2", Height: 91}, {ID: "P3", Height: 92}}, map[int64]tPBlocks{}), - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - pool := tt.pool - pool.removeShortPeers() - assertBlockPoolEquivalent(t, tt.poolWanted, tt.pool) - }) - } -} - -func TestBlockPoolSendRequestBatch(t *testing.T) { - type testPeerResult struct { - id p2p.ID - numPendingBlockRequests int - } - - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - maxRequestsPerPeer int - expRequests map[int64]bool - expRequestsSent int - expPeerResults []testPeerResult - }{ - { - name: "one peer - send up to maxRequestsPerPeer block requests", - pool: makeBlockPool(testBcR, 10, []BpPeer{{ID: "P1", Height: 100}}, map[int64]tPBlocks{}), - maxRequestsPerPeer: 2, - expRequests: map[int64]bool{10: true, 11: true}, - expRequestsSent: 2, - expPeerResults: []testPeerResult{{id: "P1", numPendingBlockRequests: 2}}, - }, - { - name: "multiple peers - stops at gap between height and base", - pool: makeBlockPool(testBcR, 10, []BpPeer{ - {ID: "P1", Base: 1, Height: 12}, - {ID: "P2", Base: 15, Height: 100}, - }, map[int64]tPBlocks{}), - maxRequestsPerPeer: 10, - expRequests: map[int64]bool{10: true, 11: true, 12: true}, - expRequestsSent: 3, - expPeerResults: []testPeerResult{ - {id: "P1", numPendingBlockRequests: 3}, - {id: "P2", numPendingBlockRequests: 0}, - }, - }, - { - name: "n peers - send n*maxRequestsPerPeer block requests", - pool: makeBlockPool( - testBcR, - 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{}), - maxRequestsPerPeer: 2, - expRequests: map[int64]bool{10: true, 11: true}, - expRequestsSent: 4, - expPeerResults: []testPeerResult{ - {id: "P1", numPendingBlockRequests: 2}, - {id: "P2", numPendingBlockRequests: 2}}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - resetPoolTestResults() - - var pool = tt.pool - maxRequestsPerPeer = tt.maxRequestsPerPeer - pool.MakeNextRequests(10) - - assert.Equal(t, tt.expRequestsSent, testResults.numRequestsSent) - for _, tPeer := range tt.expPeerResults { - var peer = pool.peers[tPeer.id] - assert.NotNil(t, peer) - assert.Equal(t, tPeer.numPendingBlockRequests, peer.NumPendingBlockRequests) - } - }) - } -} - -func TestBlockPoolAddBlock(t *testing.T) { - testBcR := newTestBcR() - txs := []types.Tx{types.Tx("foo"), types.Tx("bar")} - - type args struct { - peerID p2p.ID - block *types.Block - blockSize int - } - tests := []struct { - name string - pool *BlockPool - args args - poolWanted *BlockPool - errWanted error - }{ - {name: "block from unknown peer", - pool: makeBlockPool(testBcR, 10, []BpPeer{{ID: "P1", Height: 100}}, map[int64]tPBlocks{}), - args: args{ - peerID: "P2", - block: types.MakeBlock(int64(10), txs, nil, nil), - blockSize: 100, - }, - poolWanted: makeBlockPool(testBcR, 10, []BpPeer{{ID: "P1", Height: 100}}, map[int64]tPBlocks{}), - errWanted: errBadDataFromPeer, - }, - {name: "unexpected block 11 from known peer - waiting for 10", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{10: {"P1", false}}), - args: args{ - peerID: "P1", - block: types.MakeBlock(int64(11), txs, nil, nil), - blockSize: 100, - }, - poolWanted: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{10: {"P1", false}}), - errWanted: errMissingBlock, - }, - {name: "unexpected block 10 from known peer - already have 10", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{10: {"P1", true}, 11: {"P1", false}}), - args: args{ - peerID: "P1", - block: types.MakeBlock(int64(10), txs, nil, nil), - blockSize: 100, - }, - poolWanted: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{10: {"P1", true}, 11: {"P1", false}}), - errWanted: errDuplicateBlock, - }, - {name: "unexpected block 10 from known peer P2 - expected 10 to come from P1", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{10: {"P1", false}}), - args: args{ - peerID: "P2", - block: types.MakeBlock(int64(10), txs, nil, nil), - blockSize: 100, - }, - poolWanted: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{10: {"P1", false}}), - errWanted: errBadDataFromPeer, - }, - {name: "expected block from known peer", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{10: {"P1", false}}), - args: args{ - peerID: "P1", - block: types.MakeBlock(int64(10), txs, nil, nil), - blockSize: 100, - }, - poolWanted: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{10: {"P1", true}}), - errWanted: nil, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - err := tt.pool.AddBlock(tt.args.peerID, tt.args.block, tt.args.blockSize) - assert.Equal(t, tt.errWanted, err) - assertBlockPoolEquivalent(t, tt.poolWanted, tt.pool) - }) - } -} - -func TestBlockPoolFirstTwoBlocksAndPeers(t *testing.T) { - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - firstWanted int64 - secondWanted int64 - errWanted error - }{ - { - name: "both blocks missing", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{15: {"P1", true}, 16: {"P2", true}}), - errWanted: errMissingBlock, - }, - { - name: "second block missing", - pool: makeBlockPool(testBcR, 15, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{15: {"P1", true}, 18: {"P2", true}}), - firstWanted: 15, - errWanted: errMissingBlock, - }, - { - name: "first block missing", - pool: makeBlockPool(testBcR, 15, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{16: {"P2", true}, 18: {"P2", true}}), - secondWanted: 16, - errWanted: errMissingBlock, - }, - { - name: "both blocks present", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{10: {"P1", true}, 11: {"P2", true}}), - firstWanted: 10, - secondWanted: 11, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - pool := tt.pool - gotFirst, gotSecond, err := pool.FirstTwoBlocksAndPeers() - assert.Equal(t, tt.errWanted, err) - - if tt.firstWanted != 0 { - peer := pool.blocks[tt.firstWanted] - block := pool.peers[peer].blocks[tt.firstWanted] - assert.Equal(t, block, gotFirst.block, - "BlockPool.FirstTwoBlocksAndPeers() gotFirst = %v, want %v", - tt.firstWanted, gotFirst.block.Height) - } - - if tt.secondWanted != 0 { - peer := pool.blocks[tt.secondWanted] - block := pool.peers[peer].blocks[tt.secondWanted] - assert.Equal(t, block, gotSecond.block, - "BlockPool.FirstTwoBlocksAndPeers() gotFirst = %v, want %v", - tt.secondWanted, gotSecond.block.Height) - } - }) - } -} - -func TestBlockPoolInvalidateFirstTwoBlocks(t *testing.T) { - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - poolWanted *BlockPool - }{ - { - name: "both blocks missing", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{15: {"P1", true}, 16: {"P2", true}}), - poolWanted: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{15: {"P1", true}, 16: {"P2", true}}), - }, - { - name: "second block missing", - pool: makeBlockPool(testBcR, 15, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{15: {"P1", true}, 18: {"P2", true}}), - poolWanted: makeBlockPool(testBcR, 15, - []BpPeer{{ID: "P2", Height: 100}}, - map[int64]tPBlocks{18: {"P2", true}}), - }, - { - name: "first block missing", - pool: makeBlockPool(testBcR, 15, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{18: {"P1", true}, 16: {"P2", true}}), - poolWanted: makeBlockPool(testBcR, 15, - []BpPeer{{ID: "P1", Height: 100}}, - map[int64]tPBlocks{18: {"P1", true}}), - }, - { - name: "both blocks present", - pool: makeBlockPool(testBcR, 10, - []BpPeer{{ID: "P1", Height: 100}, {ID: "P2", Height: 100}}, - map[int64]tPBlocks{10: {"P1", true}, 11: {"P2", true}}), - poolWanted: makeBlockPool(testBcR, 10, - []BpPeer{}, - map[int64]tPBlocks{}), - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - tt.pool.InvalidateFirstTwoBlocks(errNoPeerResponse) - assertBlockPoolEquivalent(t, tt.poolWanted, tt.pool) - }) - } -} - -func TestProcessedCurrentHeightBlock(t *testing.T) { - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - poolWanted *BlockPool - }{ - { - name: "one peer", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, - map[int64]tPBlocks{100: {"P1", true}, 101: {"P1", true}}), - poolWanted: makeBlockPool(testBcR, 101, []BpPeer{{ID: "P1", Height: 120}}, - map[int64]tPBlocks{101: {"P1", true}}), - }, - { - name: "multiple peers", - pool: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 120}, {ID: "P2", Height: 120}, {ID: "P3", Height: 130}}, - map[int64]tPBlocks{ - 100: {"P1", true}, 104: {"P1", true}, 105: {"P1", false}, - 101: {"P2", true}, 103: {"P2", false}, - 102: {"P3", true}, 106: {"P3", true}}), - poolWanted: makeBlockPool(testBcR, 101, - []BpPeer{{ID: "P1", Height: 120}, {ID: "P2", Height: 120}, {ID: "P3", Height: 130}}, - map[int64]tPBlocks{ - 104: {"P1", true}, 105: {"P1", false}, - 101: {"P2", true}, 103: {"P2", false}, - 102: {"P3", true}, 106: {"P3", true}}), - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - tt.pool.ProcessedCurrentHeightBlock() - assertBlockPoolEquivalent(t, tt.poolWanted, tt.pool) - }) - } -} - -func TestRemovePeerAtCurrentHeight(t *testing.T) { - testBcR := newTestBcR() - - tests := []struct { - name string - pool *BlockPool - poolWanted *BlockPool - }{ - { - name: "one peer, remove peer for block at H", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, - map[int64]tPBlocks{100: {"P1", false}, 101: {"P1", true}}), - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - { - name: "one peer, remove peer for block at H+1", - pool: makeBlockPool(testBcR, 100, []BpPeer{{ID: "P1", Height: 120}}, - map[int64]tPBlocks{100: {"P1", true}, 101: {"P1", false}}), - poolWanted: makeBlockPool(testBcR, 100, []BpPeer{}, map[int64]tPBlocks{}), - }, - { - name: "multiple peers, remove peer for block at H", - pool: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 120}, {ID: "P2", Height: 120}, {ID: "P3", Height: 130}}, - map[int64]tPBlocks{ - 100: {"P1", false}, 104: {"P1", true}, 105: {"P1", false}, - 101: {"P2", true}, 103: {"P2", false}, - 102: {"P3", true}, 106: {"P3", true}}), - poolWanted: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P2", Height: 120}, {ID: "P3", Height: 130}}, - map[int64]tPBlocks{ - 101: {"P2", true}, 103: {"P2", false}, - 102: {"P3", true}, 106: {"P3", true}}), - }, - { - name: "multiple peers, remove peer for block at H+1", - pool: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 120}, {ID: "P2", Height: 120}, {ID: "P3", Height: 130}}, - map[int64]tPBlocks{ - 100: {"P1", true}, 104: {"P1", true}, 105: {"P1", false}, - 101: {"P2", false}, 103: {"P2", false}, - 102: {"P3", true}, 106: {"P3", true}}), - poolWanted: makeBlockPool(testBcR, 100, - []BpPeer{{ID: "P1", Height: 120}, {ID: "P3", Height: 130}}, - map[int64]tPBlocks{ - 100: {"P1", true}, 104: {"P1", true}, 105: {"P1", false}, - 102: {"P3", true}, 106: {"P3", true}}), - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - tt.pool.RemovePeerAtCurrentHeights(errNoPeerResponse) - assertBlockPoolEquivalent(t, tt.poolWanted, tt.pool) - }) - } -} diff --git a/blockchain/v1/reactor.go b/blockchain/v1/reactor.go deleted file mode 100644 index 4b12f70e9..000000000 --- a/blockchain/v1/reactor.go +++ /dev/null @@ -1,575 +0,0 @@ -package v1 - -import ( - "fmt" - "reflect" - "time" - - "github.com/tendermint/tendermint/behaviour" //nolint:misspell - bc "github.com/tendermint/tendermint/blockchain" - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/p2p" - bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/store" - "github.com/tendermint/tendermint/types" -) - -const ( - // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height) - BlockchainChannel = byte(0x40) - trySyncIntervalMS = 10 - trySendIntervalMS = 10 - - // ask for best height every 10s - statusUpdateIntervalSeconds = 10 -) - -var ( - // Maximum number of requests that can be pending per peer, i.e. for which requests have been sent but blocks - // have not been received. - maxRequestsPerPeer = 20 - // Maximum number of block requests for the reactor, pending or for which blocks have been received. - maxNumRequests = 64 -) - -type consensusReactor interface { - // for when we switch from blockchain reactor and fast sync to - // the consensus machine - SwitchToConsensus(state sm.State, skipWAL bool) -} - -// BlockchainReactor handles long-term catchup syncing. -type BlockchainReactor struct { - p2p.BaseReactor - - initialState sm.State // immutable - state sm.State - - blockExec *sm.BlockExecutor - store *store.BlockStore - - fastSync bool - stateSynced bool - - fsm *BcReactorFSM - blocksSynced uint64 - - // Receive goroutine forwards messages to this channel to be processed in the context of the poolRoutine. - messagesForFSMCh chan bcReactorMessage - - // Switch goroutine may send RemovePeer to the blockchain reactor. This is an error message that is relayed - // to this channel to be processed in the context of the poolRoutine. - errorsForFSMCh chan bcReactorMessage - - // This channel is used by the FSM and indirectly the block pool to report errors to the blockchain reactor and - // the switch. - eventsFromFSMCh chan bcFsmMessage - - swReporter *behaviour.SwitchReporter -} - -// NewBlockchainReactor returns new reactor instance. -func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore, - fastSync bool) *BlockchainReactor { - - if state.LastBlockHeight != store.Height() { - panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, - store.Height())) - } - - const capacity = 1000 - eventsFromFSMCh := make(chan bcFsmMessage, capacity) - messagesForFSMCh := make(chan bcReactorMessage, capacity) - errorsForFSMCh := make(chan bcReactorMessage, capacity) - - startHeight := store.Height() + 1 - if startHeight == 1 { - startHeight = state.InitialHeight - } - bcR := &BlockchainReactor{ - initialState: state, - state: state, - blockExec: blockExec, - fastSync: fastSync, - store: store, - messagesForFSMCh: messagesForFSMCh, - eventsFromFSMCh: eventsFromFSMCh, - errorsForFSMCh: errorsForFSMCh, - } - fsm := NewFSM(startHeight, bcR) - bcR.fsm = fsm - bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR) - // bcR.swReporter = behavior.NewSwitchReporter(bcR.BaseReactor.Switch) - - return bcR -} - -// bcReactorMessage is used by the reactor to send messages to the FSM. -type bcReactorMessage struct { - event bReactorEvent - data bReactorEventData -} - -type bFsmEvent uint - -const ( - // message type events - peerErrorEv = iota + 1 - syncFinishedEv -) - -type bFsmEventData struct { - peerID p2p.ID - err error -} - -// bcFsmMessage is used by the FSM to send messages to the reactor -type bcFsmMessage struct { - event bFsmEvent - data bFsmEventData -} - -// SetLogger implements service.Service by setting the logger on reactor and pool. -func (bcR *BlockchainReactor) SetLogger(l log.Logger) { - bcR.BaseService.Logger = l - bcR.fsm.SetLogger(l) -} - -// OnStart implements service.Service. -func (bcR *BlockchainReactor) OnStart() error { - bcR.swReporter = behaviour.NewSwitchReporter(bcR.BaseReactor.Switch) - if bcR.fastSync { - go bcR.poolRoutine() - } - return nil -} - -// OnStop implements service.Service. -func (bcR *BlockchainReactor) OnStop() { - _ = bcR.Stop() -} - -// SwitchToFastSync is called by the state sync reactor when switching to fast sync. -func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error { - bcR.fastSync = true - bcR.initialState = state - bcR.state = state - bcR.stateSynced = true - - bcR.fsm = NewFSM(state.LastBlockHeight+1, bcR) - bcR.fsm.SetLogger(bcR.Logger) - go bcR.poolRoutine() - return nil -} - -// GetChannels implements Reactor -func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor { - return []*p2p.ChannelDescriptor{ - { - ID: BlockchainChannel, - Priority: 10, - SendQueueCapacity: 2000, - RecvBufferCapacity: 50 * 4096, - RecvMessageCapacity: bc.MaxMsgSize, - }, - } -} - -// AddPeer implements Reactor by sending our state to peer. -func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) { - msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{ - Base: bcR.store.Base(), - Height: bcR.store.Height(), - }) - if err != nil { - bcR.Logger.Error("could not convert msg to protobuf", "err", err) - return - } - peer.Send(BlockchainChannel, msgBytes) - // it's OK if send fails. will try later in poolRoutine - - // peer is added to the pool once we receive the first - // bcStatusResponseMessage from the peer and call pool.updatePeer() -} - -// sendBlockToPeer loads a block and sends it to the requesting peer. -// If the block doesn't exist a bcNoBlockResponseMessage is sent. -// If all nodes are honest, no node should be requesting for a block that doesn't exist. -func (bcR *BlockchainReactor) sendBlockToPeer(msg *bcproto.BlockRequest, - src p2p.Peer) (queued bool) { - - block := bcR.store.LoadBlock(msg.Height) - if block != nil { - pbbi, err := block.ToProto() - if err != nil { - bcR.Logger.Error("Could not send block message to peer", "err", err) - return false - } - msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: pbbi}) - if err != nil { - bcR.Logger.Error("unable to marshal msg", "err", err) - return false - } - return src.TrySend(BlockchainChannel, msgBytes) - } - - bcR.Logger.Info("peer asking for a block we don't have", "src", src, "height", msg.Height) - - msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height}) - if err != nil { - bcR.Logger.Error("unable to marshal msg", "err", err) - return false - } - return src.TrySend(BlockchainChannel, msgBytes) -} - -func (bcR *BlockchainReactor) sendStatusResponseToPeer(msg *bcproto.StatusRequest, src p2p.Peer) (queued bool) { - msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{ - Base: bcR.store.Base(), - Height: bcR.store.Height(), - }) - if err != nil { - bcR.Logger.Error("unable to marshal msg", "err", err) - return false - } - - return src.TrySend(BlockchainChannel, msgBytes) -} - -// RemovePeer implements Reactor by removing peer from the pool. -func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) { - msgData := bcReactorMessage{ - event: peerRemoveEv, - data: bReactorEventData{ - peerID: peer.ID(), - err: errSwitchRemovesPeer, - }, - } - bcR.errorsForFSMCh <- msgData -} - -// Receive implements Reactor by handling 4 types of messages (look below). -func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { - msg, err := bc.DecodeMsg(msgBytes) - if err != nil { - bcR.Logger.Error("error decoding message", "src", src, "chId", chID, "err", err) - _ = bcR.swReporter.Report(behaviour.BadMessage(src.ID(), err.Error())) - return - } - - if err = bc.ValidateMsg(msg); err != nil { - bcR.Logger.Error("peer sent us invalid msg", "peer", src, "msg", msg, "err", err) - _ = bcR.swReporter.Report(behaviour.BadMessage(src.ID(), err.Error())) - return - } - - bcR.Logger.Debug("Receive", "src", src, "chID", chID, "msg", msg) - - switch msg := msg.(type) { - case *bcproto.BlockRequest: - if queued := bcR.sendBlockToPeer(msg, src); !queued { - // Unfortunately not queued since the queue is full. - bcR.Logger.Error("Could not send block message to peer", "src", src, "height", msg.Height) - } - - case *bcproto.StatusRequest: - // Send peer our state. - if queued := bcR.sendStatusResponseToPeer(msg, src); !queued { - // Unfortunately not queued since the queue is full. - bcR.Logger.Error("Could not send status message to peer", "src", src) - } - - case *bcproto.BlockResponse: - bi, err := types.BlockFromProto(msg.Block) - if err != nil { - bcR.Logger.Error("error transition block from protobuf", "err", err) - return - } - msgForFSM := bcReactorMessage{ - event: blockResponseEv, - data: bReactorEventData{ - peerID: src.ID(), - height: bi.Height, - block: bi, - length: len(msgBytes), - }, - } - bcR.Logger.Info("Received", "src", src, "height", bi.Height) - bcR.messagesForFSMCh <- msgForFSM - case *bcproto.NoBlockResponse: - msgForFSM := bcReactorMessage{ - event: noBlockResponseEv, - data: bReactorEventData{ - peerID: src.ID(), - height: msg.Height, - }, - } - bcR.Logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height) - bcR.messagesForFSMCh <- msgForFSM - - case *bcproto.StatusResponse: - // Got a peer status. Unverified. - msgForFSM := bcReactorMessage{ - event: statusResponseEv, - data: bReactorEventData{ - peerID: src.ID(), - height: msg.Height, - length: len(msgBytes), - }, - } - bcR.messagesForFSMCh <- msgForFSM - - default: - bcR.Logger.Error(fmt.Sprintf("unknown message type %v", reflect.TypeOf(msg))) - } -} - -// processBlocksRoutine processes blocks until signlaed to stop over the stopProcessing channel -func (bcR *BlockchainReactor) processBlocksRoutine(stopProcessing chan struct{}) { - - processReceivedBlockTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond) - doProcessBlockCh := make(chan struct{}, 1) - - lastHundred := time.Now() - lastRate := 0.0 - -ForLoop: - for { - select { - case <-stopProcessing: - bcR.Logger.Info("finishing block execution") - break ForLoop - case <-processReceivedBlockTicker.C: // try to execute blocks - select { - case doProcessBlockCh <- struct{}{}: - default: - } - case <-doProcessBlockCh: - for { - err := bcR.processBlock() - if err == errMissingBlock { - break - } - // Notify FSM of block processing result. - msgForFSM := bcReactorMessage{ - event: processedBlockEv, - data: bReactorEventData{ - err: err, - }, - } - _ = bcR.fsm.Handle(&msgForFSM) - - if err != nil { - break - } - - bcR.blocksSynced++ - if bcR.blocksSynced%100 == 0 { - lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds()) - height, maxPeerHeight := bcR.fsm.Status() - bcR.Logger.Info("Fast Sync Rate", "height", height, - "max_peer_height", maxPeerHeight, "blocks/s", lastRate) - lastHundred = time.Now() - } - } - } - } -} - -// poolRoutine receives and handles messages from the Receive() routine and from the FSM. -func (bcR *BlockchainReactor) poolRoutine() { - - bcR.fsm.Start() - - sendBlockRequestTicker := time.NewTicker(trySendIntervalMS * time.Millisecond) - statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second) - - stopProcessing := make(chan struct{}, 1) - go bcR.processBlocksRoutine(stopProcessing) - -ForLoop: - for { - select { - - case <-sendBlockRequestTicker.C: - if !bcR.fsm.NeedsBlocks() { - continue - } - _ = bcR.fsm.Handle(&bcReactorMessage{ - event: makeRequestsEv, - data: bReactorEventData{ - maxNumRequests: maxNumRequests}}) - - case <-statusUpdateTicker.C: - // Ask for status updates. - go bcR.sendStatusRequest() - - case msg := <-bcR.messagesForFSMCh: - // Sent from the Receive() routine when status (statusResponseEv) and - // block (blockResponseEv) response events are received - _ = bcR.fsm.Handle(&msg) - - case msg := <-bcR.errorsForFSMCh: - // Sent from the switch.RemovePeer() routine (RemovePeerEv) and - // FSM state timer expiry routine (stateTimeoutEv). - _ = bcR.fsm.Handle(&msg) - - case msg := <-bcR.eventsFromFSMCh: - switch msg.event { - case syncFinishedEv: - stopProcessing <- struct{}{} - // Sent from the FSM when it enters finished state. - break ForLoop - case peerErrorEv: - // Sent from the FSM when it detects peer error - bcR.reportPeerErrorToSwitch(msg.data.err, msg.data.peerID) - if msg.data.err == errNoPeerResponse { - // Sent from the peer timeout handler routine - _ = bcR.fsm.Handle(&bcReactorMessage{ - event: peerRemoveEv, - data: bReactorEventData{ - peerID: msg.data.peerID, - err: msg.data.err, - }, - }) - } - // else { - // For slow peers, or errors due to blocks received from wrong peer - // the FSM had already removed the peers - // } - default: - bcR.Logger.Error("Event from FSM not supported", "type", msg.event) - } - - case <-bcR.Quit(): - break ForLoop - } - } -} - -func (bcR *BlockchainReactor) reportPeerErrorToSwitch(err error, peerID p2p.ID) { - peer := bcR.Switch.Peers().Get(peerID) - if peer != nil { - _ = bcR.swReporter.Report(behaviour.BadMessage(peerID, err.Error())) - } -} - -func (bcR *BlockchainReactor) processBlock() error { - - first, second, err := bcR.fsm.FirstTwoBlocks() - if err != nil { - // We need both to sync the first block. - return err - } - - chainID := bcR.initialState.ChainID - - firstParts, err := first.MakePartSet(types.BlockPartSizeBytes) - if err != nil { - bcR.Logger.Error("failed to make ", - "height", first.Height, - "err", err.Error()) - return err - } - firstPartSetHeader := firstParts.Header() - firstID := types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader} - // Finally, verify the first block using the second's commit - // NOTE: we can probably make this more efficient, but note that calling - // first.Hash() doesn't verify the tx contents, so MakePartSet() is - // currently necessary. - err = bcR.state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit) - if err != nil { - bcR.Logger.Error("error during commit verification", "err", err, - "first", first.Height, "second", second.Height) - return errBlockVerificationFailure - } - - bcR.store.SaveBlock(first, firstParts, second.LastCommit) - - bcR.state, _, err = bcR.blockExec.ApplyBlock(bcR.state, firstID, first) - if err != nil { - panic(fmt.Sprintf("failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) - } - - return nil -} - -// Implements bcRNotifier -// sendStatusRequest broadcasts `BlockStore` height. -func (bcR *BlockchainReactor) sendStatusRequest() { - msgBytes, err := bc.EncodeMsg(&bcproto.StatusRequest{}) - if err != nil { - panic(err) - } - bcR.Switch.Broadcast(BlockchainChannel, msgBytes) -} - -// Implements bcRNotifier -// BlockRequest sends `BlockRequest` height. -func (bcR *BlockchainReactor) sendBlockRequest(peerID p2p.ID, height int64) error { - peer := bcR.Switch.Peers().Get(peerID) - if peer == nil { - return errNilPeerForBlockRequest - } - - msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: height}) - if err != nil { - return err - } - queued := peer.TrySend(BlockchainChannel, msgBytes) - if !queued { - return errSendQueueFull - } - return nil -} - -// Implements bcRNotifier -func (bcR *BlockchainReactor) switchToConsensus() { - conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor) - if ok { - conR.SwitchToConsensus(bcR.state, bcR.blocksSynced > 0 || bcR.stateSynced) - bcR.eventsFromFSMCh <- bcFsmMessage{event: syncFinishedEv} - } - // else { - // Should only happen during testing. - // } -} - -// Implements bcRNotifier -// Called by FSM and pool: -// - pool calls when it detects slow peer or when peer times out -// - FSM calls when: -// - adding a block (addBlock) fails -// - reactor processing of a block reports failure and FSM sends back the peers of first and second blocks -func (bcR *BlockchainReactor) sendPeerError(err error, peerID p2p.ID) { - bcR.Logger.Info("sendPeerError:", "peer", peerID, "error", err) - msgData := bcFsmMessage{ - event: peerErrorEv, - data: bFsmEventData{ - peerID: peerID, - err: err, - }, - } - bcR.eventsFromFSMCh <- msgData -} - -// Implements bcRNotifier -func (bcR *BlockchainReactor) resetStateTimer(name string, timer **time.Timer, timeout time.Duration) { - if timer == nil { - panic("nil timer pointer parameter") - } - if *timer == nil { - *timer = time.AfterFunc(timeout, func() { - msg := bcReactorMessage{ - event: stateTimeoutEv, - data: bReactorEventData{ - stateName: name, - }, - } - bcR.errorsForFSMCh <- msg - }) - } else { - (*timer).Reset(timeout) - } -} diff --git a/blockchain/v1/reactor_fsm.go b/blockchain/v1/reactor_fsm.go deleted file mode 100644 index 2571dff6a..000000000 --- a/blockchain/v1/reactor_fsm.go +++ /dev/null @@ -1,462 +0,0 @@ -package v1 - -import ( - "errors" - "fmt" - "sync" - "time" - - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -// Blockchain Reactor State -type bcReactorFSMState struct { - name string - - // called when transitioning out of current state - handle func(*BcReactorFSM, bReactorEvent, bReactorEventData) (next *bcReactorFSMState, err error) - // called when entering the state - enter func(fsm *BcReactorFSM) - - // timeout to ensure FSM is not stuck in a state forever - // the timer is owned and run by the fsm instance - timeout time.Duration -} - -func (s *bcReactorFSMState) String() string { - return s.name -} - -// BcReactorFSM is the datastructure for the Blockchain Reactor State Machine -type BcReactorFSM struct { - logger log.Logger - mtx sync.Mutex - - startTime time.Time - - state *bcReactorFSMState - stateTimer *time.Timer - pool *BlockPool - - // interface used to call the Blockchain reactor to send StatusRequest, BlockRequest, reporting errors, etc. - toBcR bcReactor -} - -// NewFSM creates a new reactor FSM. -func NewFSM(height int64, toBcR bcReactor) *BcReactorFSM { - return &BcReactorFSM{ - state: unknown, - startTime: time.Now(), - pool: NewBlockPool(height, toBcR), - toBcR: toBcR, - } -} - -// bReactorEventData is part of the message sent by the reactor to the FSM and used by the state handlers. -type bReactorEventData struct { - peerID p2p.ID - err error // for peer error: timeout, slow; for processed block event if error occurred - base int64 // for status response - height int64 // for status response; for processed block event - block *types.Block // for block response - stateName string // for state timeout events - length int // for block response event, length of received block, used to detect slow peers - maxNumRequests int // for request needed event, maximum number of pending requests -} - -// Blockchain Reactor Events (the input to the state machine) -type bReactorEvent uint - -const ( - // message type events - startFSMEv = iota + 1 - statusResponseEv - blockResponseEv - noBlockResponseEv - processedBlockEv - makeRequestsEv - stopFSMEv - - // other events - peerRemoveEv = iota + 256 - stateTimeoutEv -) - -func (msg *bcReactorMessage) String() string { - var dataStr string - - switch msg.event { - case startFSMEv: - dataStr = "" - case statusResponseEv: - dataStr = fmt.Sprintf("peer=%v base=%v height=%v", msg.data.peerID, msg.data.base, msg.data.height) - case blockResponseEv: - dataStr = fmt.Sprintf("peer=%v block.height=%v length=%v", - msg.data.peerID, msg.data.block.Height, msg.data.length) - case noBlockResponseEv: - dataStr = fmt.Sprintf("peer=%v requested height=%v", - msg.data.peerID, msg.data.height) - case processedBlockEv: - dataStr = fmt.Sprintf("error=%v", msg.data.err) - case makeRequestsEv: - dataStr = "" - case stopFSMEv: - dataStr = "" - case peerRemoveEv: - dataStr = fmt.Sprintf("peer: %v is being removed by the switch", msg.data.peerID) - case stateTimeoutEv: - dataStr = fmt.Sprintf("state=%v", msg.data.stateName) - default: - dataStr = "cannot interpret message data" - } - - return fmt.Sprintf("%v: %v", msg.event, dataStr) -} - -func (ev bReactorEvent) String() string { - switch ev { - case startFSMEv: - return "startFSMEv" - case statusResponseEv: - return "statusResponseEv" - case blockResponseEv: - return "blockResponseEv" - case noBlockResponseEv: - return "noBlockResponseEv" - case processedBlockEv: - return "processedBlockEv" - case makeRequestsEv: - return "makeRequestsEv" - case stopFSMEv: - return "stopFSMEv" - case peerRemoveEv: - return "peerRemoveEv" - case stateTimeoutEv: - return "stateTimeoutEv" - default: - return "event unknown" - } - -} - -// states -var ( - unknown *bcReactorFSMState - waitForPeer *bcReactorFSMState - waitForBlock *bcReactorFSMState - finished *bcReactorFSMState -) - -// timeouts for state timers -const ( - waitForPeerTimeout = 3 * time.Second - waitForBlockAtCurrentHeightTimeout = 10 * time.Second -) - -// errors -var ( - // internal to the package - errNoErrorFinished = errors.New("fast sync is finished") - errInvalidEvent = errors.New("invalid event in current state") - errMissingBlock = errors.New("missing blocks") - errNilPeerForBlockRequest = errors.New("peer for block request does not exist in the switch") - errSendQueueFull = errors.New("block request not made, send-queue is full") - errPeerTooShort = errors.New("peer height too low, old peer removed/ new peer not added") - errSwitchRemovesPeer = errors.New("switch is removing peer") - errTimeoutEventWrongState = errors.New("timeout event for a state different than the current one") - errNoTallerPeer = errors.New("fast sync timed out on waiting for a peer taller than this node") - - // reported eventually to the switch - // handle return - errPeerLowersItsHeight = errors.New("fast sync peer reports a height lower than previous") - // handle return - errNoPeerResponseForCurrentHeights = errors.New("fast sync timed out on peer block response for current heights") - errNoPeerResponse = errors.New("fast sync timed out on peer block response") // xx - errBadDataFromPeer = errors.New("fast sync received block from wrong peer or block is bad") // xx - errDuplicateBlock = errors.New("fast sync received duplicate block from peer") - errBlockVerificationFailure = errors.New("fast sync block verification failure") // xx - errSlowPeer = errors.New("fast sync peer is not sending us data fast enough") // xx - -) - -func init() { - unknown = &bcReactorFSMState{ - name: "unknown", - handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { - switch ev { - case startFSMEv: - // Broadcast Status message. Currently doesn't return non-nil error. - fsm.toBcR.sendStatusRequest() - return waitForPeer, nil - - case stopFSMEv: - return finished, errNoErrorFinished - - default: - return unknown, errInvalidEvent - } - }, - } - - waitForPeer = &bcReactorFSMState{ - name: "waitForPeer", - timeout: waitForPeerTimeout, - enter: func(fsm *BcReactorFSM) { - // Stop when leaving the state. - fsm.resetStateTimer() - }, - handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { - switch ev { - case stateTimeoutEv: - if data.stateName != "waitForPeer" { - fsm.logger.Error("received a state timeout event for different state", - "state", data.stateName) - return waitForPeer, errTimeoutEventWrongState - } - // There was no statusResponse received from any peer. - // Should we send status request again? - return finished, errNoTallerPeer - - case statusResponseEv: - if err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height); err != nil { - if fsm.pool.NumPeers() == 0 { - return waitForPeer, err - } - } - if fsm.stateTimer != nil { - fsm.stateTimer.Stop() - } - return waitForBlock, nil - - case stopFSMEv: - if fsm.stateTimer != nil { - fsm.stateTimer.Stop() - } - return finished, errNoErrorFinished - - default: - return waitForPeer, errInvalidEvent - } - }, - } - - waitForBlock = &bcReactorFSMState{ - name: "waitForBlock", - timeout: waitForBlockAtCurrentHeightTimeout, - enter: func(fsm *BcReactorFSM) { - // Stop when leaving the state. - fsm.resetStateTimer() - }, - handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { - switch ev { - - case statusResponseEv: - err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height) - if fsm.pool.NumPeers() == 0 { - return waitForPeer, err - } - if fsm.pool.ReachedMaxHeight() { - return finished, err - } - return waitForBlock, err - - case blockResponseEv: - fsm.logger.Debug("blockResponseEv", "H", data.block.Height) - err := fsm.pool.AddBlock(data.peerID, data.block, data.length) - if err != nil { - // A block was received that was unsolicited, from unexpected peer, or that we already have it. - // Ignore block, remove peer and send error to switch. - fsm.pool.RemovePeer(data.peerID, err) - fsm.toBcR.sendPeerError(err, data.peerID) - } - if fsm.pool.NumPeers() == 0 { - return waitForPeer, err - } - return waitForBlock, err - case noBlockResponseEv: - fsm.logger.Error("peer does not have requested block", "peer", data.peerID) - - return waitForBlock, nil - case processedBlockEv: - if data.err != nil { - first, second, _ := fsm.pool.FirstTwoBlocksAndPeers() - fsm.logger.Error("error processing block", "err", data.err, - "first", first.block.Height, "second", second.block.Height) - fsm.logger.Error("send peer error for", "peer", first.peer.ID) - fsm.toBcR.sendPeerError(data.err, first.peer.ID) - fsm.logger.Error("send peer error for", "peer", second.peer.ID) - fsm.toBcR.sendPeerError(data.err, second.peer.ID) - // Remove the first two blocks. This will also remove the peers - fsm.pool.InvalidateFirstTwoBlocks(data.err) - } else { - fsm.pool.ProcessedCurrentHeightBlock() - // Since we advanced one block reset the state timer - fsm.resetStateTimer() - } - - // Both cases above may result in achieving maximum height. - if fsm.pool.ReachedMaxHeight() { - return finished, nil - } - - return waitForBlock, data.err - - case peerRemoveEv: - // This event is sent by the switch to remove disconnected and errored peers. - fsm.pool.RemovePeer(data.peerID, data.err) - if fsm.pool.NumPeers() == 0 { - return waitForPeer, nil - } - if fsm.pool.ReachedMaxHeight() { - return finished, nil - } - return waitForBlock, nil - - case makeRequestsEv: - fsm.makeNextRequests(data.maxNumRequests) - return waitForBlock, nil - - case stateTimeoutEv: - if data.stateName != "waitForBlock" { - fsm.logger.Error("received a state timeout event for different state", - "state", data.stateName) - return waitForBlock, errTimeoutEventWrongState - } - // We haven't received the block at current height or height+1. Remove peer. - fsm.pool.RemovePeerAtCurrentHeights(errNoPeerResponseForCurrentHeights) - fsm.resetStateTimer() - if fsm.pool.NumPeers() == 0 { - return waitForPeer, errNoPeerResponseForCurrentHeights - } - if fsm.pool.ReachedMaxHeight() { - return finished, nil - } - return waitForBlock, errNoPeerResponseForCurrentHeights - - case stopFSMEv: - if fsm.stateTimer != nil { - fsm.stateTimer.Stop() - } - return finished, errNoErrorFinished - - default: - return waitForBlock, errInvalidEvent - } - }, - } - - finished = &bcReactorFSMState{ - name: "finished", - enter: func(fsm *BcReactorFSM) { - fsm.logger.Info("Time to switch to consensus reactor!", "height", fsm.pool.Height) - fsm.toBcR.switchToConsensus() - fsm.cleanup() - }, - handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { - return finished, nil - }, - } -} - -// Interface used by FSM for sending Block and Status requests, -// informing of peer errors and state timeouts -// Implemented by BlockchainReactor and tests -type bcReactor interface { - sendStatusRequest() - sendBlockRequest(peerID p2p.ID, height int64) error - sendPeerError(err error, peerID p2p.ID) - resetStateTimer(name string, timer **time.Timer, timeout time.Duration) - switchToConsensus() -} - -// SetLogger sets the FSM logger. -func (fsm *BcReactorFSM) SetLogger(l log.Logger) { - fsm.logger = l - fsm.pool.SetLogger(l) -} - -// Start starts the FSM. -func (fsm *BcReactorFSM) Start() { - _ = fsm.Handle(&bcReactorMessage{event: startFSMEv}) -} - -// Handle processes messages and events sent to the FSM. -func (fsm *BcReactorFSM) Handle(msg *bcReactorMessage) error { - fsm.mtx.Lock() - defer fsm.mtx.Unlock() - fsm.logger.Debug("FSM received", "event", msg, "state", fsm.state) - - if fsm.state == nil { - fsm.state = unknown - } - next, err := fsm.state.handle(fsm, msg.event, msg.data) - if err != nil { - fsm.logger.Error("FSM event handler returned", "err", err, - "state", fsm.state, "event", msg.event) - } - - oldState := fsm.state.name - fsm.transition(next) - if oldState != fsm.state.name { - fsm.logger.Info("FSM changed state", "new_state", fsm.state) - } - return err -} - -func (fsm *BcReactorFSM) transition(next *bcReactorFSMState) { - if next == nil { - return - } - if fsm.state != next { - fsm.state = next - if next.enter != nil { - next.enter(fsm) - } - } -} - -// Called when entering an FSM state in order to detect lack of progress in the state machine. -// Note the use of the 'bcr' interface to facilitate testing without timer expiring. -func (fsm *BcReactorFSM) resetStateTimer() { - fsm.toBcR.resetStateTimer(fsm.state.name, &fsm.stateTimer, fsm.state.timeout) -} - -func (fsm *BcReactorFSM) isCaughtUp() bool { - return fsm.state == finished -} - -func (fsm *BcReactorFSM) makeNextRequests(maxNumRequests int) { - fsm.pool.MakeNextRequests(maxNumRequests) -} - -func (fsm *BcReactorFSM) cleanup() { - fsm.pool.Cleanup() -} - -// NeedsBlocks checks if more block requests are required. -func (fsm *BcReactorFSM) NeedsBlocks() bool { - fsm.mtx.Lock() - defer fsm.mtx.Unlock() - return fsm.state.name == "waitForBlock" && fsm.pool.NeedsBlocks() -} - -// FirstTwoBlocks returns the two blocks at pool height and height+1 -func (fsm *BcReactorFSM) FirstTwoBlocks() (first, second *types.Block, err error) { - fsm.mtx.Lock() - defer fsm.mtx.Unlock() - firstBP, secondBP, err := fsm.pool.FirstTwoBlocksAndPeers() - if err == nil { - first = firstBP.block - second = secondBP.block - } - return -} - -// Status returns the pool's height and the maximum peer height. -func (fsm *BcReactorFSM) Status() (height, maxPeerHeight int64) { - fsm.mtx.Lock() - defer fsm.mtx.Unlock() - return fsm.pool.Height, fsm.pool.MaxPeerHeight -} diff --git a/blockchain/v1/reactor_fsm_test.go b/blockchain/v1/reactor_fsm_test.go deleted file mode 100644 index 99b8d378d..000000000 --- a/blockchain/v1/reactor_fsm_test.go +++ /dev/null @@ -1,944 +0,0 @@ -package v1 - -import ( - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/tendermint/tendermint/libs/log" - tmmath "github.com/tendermint/tendermint/libs/math" - tmrand "github.com/tendermint/tendermint/libs/rand" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -type lastBlockRequestT struct { - peerID p2p.ID - height int64 -} - -type lastPeerErrorT struct { - peerID p2p.ID - err error -} - -// reactor for FSM testing -type testReactor struct { - logger log.Logger - fsm *BcReactorFSM - numStatusRequests int - numBlockRequests int - lastBlockRequest lastBlockRequestT - lastPeerError lastPeerErrorT - stateTimerStarts map[string]int -} - -func sendEventToFSM(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) error { - return fsm.Handle(&bcReactorMessage{event: ev, data: data}) -} - -type fsmStepTestValues struct { - currentState string - event bReactorEvent - data bReactorEventData - - wantErr error - wantState string - wantStatusReqSent bool - wantReqIncreased bool - wantNewBlocks []int64 - wantRemovedPeers []p2p.ID -} - -// --------------------------------------------------------------------------- -// helper test function for different FSM events, state and expected behavior -func sStopFSMEv(current, expected string) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: stopFSMEv, - wantState: expected, - wantErr: errNoErrorFinished} -} - -func sUnknownFSMEv(current string) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: 1234, - wantState: current, - wantErr: errInvalidEvent} -} - -func sStartFSMEv() fsmStepTestValues { - return fsmStepTestValues{ - currentState: "unknown", - event: startFSMEv, - wantState: "waitForPeer", - wantStatusReqSent: true} -} - -func sStateTimeoutEv(current, expected string, timedoutState string, wantErr error) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: stateTimeoutEv, - data: bReactorEventData{ - stateName: timedoutState, - }, - wantState: expected, - wantErr: wantErr, - } -} - -func sProcessedBlockEv(current, expected string, reactorError error) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: processedBlockEv, - data: bReactorEventData{ - err: reactorError, - }, - wantState: expected, - wantErr: reactorError, - } -} - -func sStatusEv(current, expected string, peerID p2p.ID, height int64, err error) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: statusResponseEv, - data: bReactorEventData{peerID: peerID, height: height}, - wantState: expected, - wantErr: err} -} - -func sMakeRequestsEv(current, expected string, maxPendingRequests int) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: makeRequestsEv, - data: bReactorEventData{maxNumRequests: maxPendingRequests}, - wantState: expected, - wantReqIncreased: true, - } -} - -func sMakeRequestsEvErrored(current, expected string, - maxPendingRequests int, err error, peersRemoved []p2p.ID) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: makeRequestsEv, - data: bReactorEventData{maxNumRequests: maxPendingRequests}, - wantState: expected, - wantErr: err, - wantRemovedPeers: peersRemoved, - wantReqIncreased: true, - } -} - -func sBlockRespEv(current, expected string, peerID p2p.ID, height int64, prevBlocks []int64) fsmStepTestValues { - txs := []types.Tx{types.Tx("foo"), types.Tx("bar")} - return fsmStepTestValues{ - currentState: current, - event: blockResponseEv, - data: bReactorEventData{ - peerID: peerID, - height: height, - block: types.MakeBlock(height, txs, nil, nil), - length: 100}, - wantState: expected, - wantNewBlocks: append(prevBlocks, height), - } -} - -func sBlockRespEvErrored(current, expected string, - peerID p2p.ID, height int64, prevBlocks []int64, wantErr error, peersRemoved []p2p.ID) fsmStepTestValues { - txs := []types.Tx{types.Tx("foo"), types.Tx("bar")} - - return fsmStepTestValues{ - currentState: current, - event: blockResponseEv, - data: bReactorEventData{ - peerID: peerID, - height: height, - block: types.MakeBlock(height, txs, nil, nil), - length: 100}, - wantState: expected, - wantErr: wantErr, - wantRemovedPeers: peersRemoved, - wantNewBlocks: prevBlocks, - } -} - -func sPeerRemoveEv(current, expected string, peerID p2p.ID, err error, peersRemoved []p2p.ID) fsmStepTestValues { - return fsmStepTestValues{ - currentState: current, - event: peerRemoveEv, - data: bReactorEventData{ - peerID: peerID, - err: err, - }, - wantState: expected, - wantRemovedPeers: peersRemoved, - } -} - -// -------------------------------------------- - -func newTestReactor(height int64) *testReactor { - testBcR := &testReactor{logger: log.TestingLogger(), stateTimerStarts: make(map[string]int)} - testBcR.fsm = NewFSM(height, testBcR) - testBcR.fsm.SetLogger(testBcR.logger) - return testBcR -} - -func fixBlockResponseEvStep(step *fsmStepTestValues, testBcR *testReactor) { - // There is currently no good way to know to which peer a block request was sent. - // So in some cases where it does not matter, before we simulate a block response - // we cheat and look where it is expected from. - if step.event == blockResponseEv { - height := step.data.height - peerID, ok := testBcR.fsm.pool.blocks[height] - if ok { - step.data.peerID = peerID - } - } -} - -type testFields struct { - name string - startingHeight int64 - maxRequestsPerPeer int - maxPendingRequests int - steps []fsmStepTestValues -} - -func executeFSMTests(t *testing.T, tests []testFields, matchRespToReq bool) { - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - // Create test reactor - testBcR := newTestReactor(tt.startingHeight) - - if tt.maxRequestsPerPeer != 0 { - maxRequestsPerPeer = tt.maxRequestsPerPeer - } - - for _, step := range tt.steps { - step := step - assert.Equal(t, step.currentState, testBcR.fsm.state.name) - - var heightBefore int64 - if step.event == processedBlockEv && step.data.err == errBlockVerificationFailure { - heightBefore = testBcR.fsm.pool.Height - } - oldNumStatusRequests := testBcR.numStatusRequests - oldNumBlockRequests := testBcR.numBlockRequests - if matchRespToReq { - fixBlockResponseEvStep(&step, testBcR) - } - - fsmErr := sendEventToFSM(testBcR.fsm, step.event, step.data) - assert.Equal(t, step.wantErr, fsmErr) - - if step.wantStatusReqSent { - assert.Equal(t, oldNumStatusRequests+1, testBcR.numStatusRequests) - } else { - assert.Equal(t, oldNumStatusRequests, testBcR.numStatusRequests) - } - - if step.wantReqIncreased { - assert.True(t, oldNumBlockRequests < testBcR.numBlockRequests) - } else { - assert.Equal(t, oldNumBlockRequests, testBcR.numBlockRequests) - } - - for _, height := range step.wantNewBlocks { - _, err := testBcR.fsm.pool.BlockAndPeerAtHeight(height) - assert.Nil(t, err) - } - if step.event == processedBlockEv && step.data.err == errBlockVerificationFailure { - heightAfter := testBcR.fsm.pool.Height - assert.Equal(t, heightBefore, heightAfter) - firstAfter, err1 := testBcR.fsm.pool.BlockAndPeerAtHeight(testBcR.fsm.pool.Height) - secondAfter, err2 := testBcR.fsm.pool.BlockAndPeerAtHeight(testBcR.fsm.pool.Height + 1) - assert.NotNil(t, err1) - assert.NotNil(t, err2) - assert.Nil(t, firstAfter) - assert.Nil(t, secondAfter) - } - - assert.Equal(t, step.wantState, testBcR.fsm.state.name) - - if step.wantState == "finished" { - assert.True(t, testBcR.fsm.isCaughtUp()) - } - } - }) - } -} - -func TestFSMBasic(t *testing.T) { - tests := []testFields{ - { - name: "one block, one peer - TS2", - startingHeight: 1, - maxRequestsPerPeer: 2, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 2, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 1, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 2, []int64{1}), - sProcessedBlockEv("waitForBlock", "finished", nil), - }, - }, - { - name: "multi block, multi peer - TS2", - startingHeight: 1, - maxRequestsPerPeer: 2, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 4, nil), - sStatusEv("waitForBlock", "waitForBlock", "P2", 4, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 1, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 2, []int64{1}), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 3, []int64{1, 2}), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 4, []int64{1, 2, 3}), - - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - sProcessedBlockEv("waitForBlock", "finished", nil), - }, - }, - } - - executeFSMTests(t, tests, true) -} - -func TestFSMBlockVerificationFailure(t *testing.T) { - tests := []testFields{ - { - name: "block verification failure - TS2 variant", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - - // add P1 and get blocks 1-3 from it - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 1, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 2, []int64{1}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 3, []int64{1, 2}), - - // add P2 - sStatusEv("waitForBlock", "waitForBlock", "P2", 3, nil), - - // process block failure, should remove P1 and all blocks - sProcessedBlockEv("waitForBlock", "waitForBlock", errBlockVerificationFailure), - - // get blocks 1-3 from P2 - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 1, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 2, []int64{1}), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 3, []int64{1, 2}), - - // finish after processing blocks 1 and 2 - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - sProcessedBlockEv("waitForBlock", "finished", nil), - }, - }, - } - - executeFSMTests(t, tests, false) -} - -func TestFSMBadBlockFromPeer(t *testing.T) { - tests := []testFields{ - { - name: "block we haven't asked for", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 and ask for blocks 1-3 - sStatusEv("waitForPeer", "waitForBlock", "P1", 300, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - - // blockResponseEv for height 100 should cause an error - sBlockRespEvErrored("waitForBlock", "waitForPeer", - "P1", 100, []int64{}, errMissingBlock, []p2p.ID{}), - }, - }, - { - name: "block we already have", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 and get block 1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 100, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEv("waitForBlock", "waitForBlock", - "P1", 1, []int64{}), - - // Get block 1 again. Since peer is removed together with block 1, - // the blocks present in the pool should be {} - sBlockRespEvErrored("waitForBlock", "waitForPeer", - "P1", 1, []int64{}, errDuplicateBlock, []p2p.ID{"P1"}), - }, - }, - { - name: "block from unknown peer", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 and get block 1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - - // get block 1 from unknown peer P2 - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEvErrored("waitForBlock", "waitForBlock", - "P2", 1, []int64{}, errBadDataFromPeer, []p2p.ID{"P2"}), - }, - }, - { - name: "block from wrong peer", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1, make requests for blocks 1-3 to P1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - - // add P2 - sStatusEv("waitForBlock", "waitForBlock", "P2", 3, nil), - - // receive block 1 from P2 - sBlockRespEvErrored("waitForBlock", "waitForBlock", - "P2", 1, []int64{}, errBadDataFromPeer, []p2p.ID{"P2"}), - }, - }, - } - - executeFSMTests(t, tests, false) -} - -func TestFSMBlockAtCurrentHeightDoesNotArriveInTime(t *testing.T) { - tests := []testFields{ - { - name: "block at current height undelivered - TS5", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1, get blocks 1 and 2, process block 1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEv("waitForBlock", "waitForBlock", - "P1", 1, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", - "P1", 2, []int64{1}), - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - - // add P2 - sStatusEv("waitForBlock", "waitForBlock", "P2", 3, nil), - - // timeout on block 3, P1 should be removed - sStateTimeoutEv("waitForBlock", "waitForBlock", "waitForBlock", errNoPeerResponseForCurrentHeights), - - // make requests and finish by receiving blocks 2 and 3 from P2 - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 2, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P2", 3, []int64{2}), - sProcessedBlockEv("waitForBlock", "finished", nil), - }, - }, - { - name: "block at current height undelivered, at maxPeerHeight after peer removal - TS3", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1, request blocks 1-3 from P1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - - // add P2 (tallest) - sStatusEv("waitForBlock", "waitForBlock", "P2", 30, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - - // receive blocks 1-3 from P1 - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 1, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 2, []int64{1}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 3, []int64{1, 2}), - - // process blocks at heights 1 and 2 - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - - // timeout on block at height 4 - sStateTimeoutEv("waitForBlock", "finished", "waitForBlock", nil), - }, - }, - } - - executeFSMTests(t, tests, true) -} - -func TestFSMPeerRelatedEvents(t *testing.T) { - tests := []testFields{ - { - name: "peer remove event with no blocks", - startingHeight: 1, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1, P2, P3 - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sStatusEv("waitForBlock", "waitForBlock", "P2", 3, nil), - sStatusEv("waitForBlock", "waitForBlock", "P3", 3, nil), - - // switch removes P2 - sPeerRemoveEv("waitForBlock", "waitForBlock", "P2", errSwitchRemovesPeer, []p2p.ID{"P2"}), - }, - }, - { - name: "only peer removed while in waitForBlock state", - startingHeight: 100, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 200, nil), - - // switch removes P1 - sPeerRemoveEv("waitForBlock", "waitForPeer", "P1", errSwitchRemovesPeer, []p2p.ID{"P1"}), - }, - }, - { - name: "highest peer removed while in waitForBlock state, node reaches maxPeerHeight - TS4 ", - startingHeight: 100, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 and make requests - sStatusEv("waitForPeer", "waitForBlock", "P1", 101, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - // add P2 - sStatusEv("waitForBlock", "waitForBlock", "P2", 200, nil), - - // get blocks 100 and 101 from P1 and process block at height 100 - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 100, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 101, []int64{100}), - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - - // switch removes peer P1, should be finished - sPeerRemoveEv("waitForBlock", "finished", "P2", errSwitchRemovesPeer, []p2p.ID{"P2"}), - }, - }, - { - name: "highest peer lowers its height in waitForBlock state, node reaches maxPeerHeight - TS4", - startingHeight: 100, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 and make requests - sStatusEv("waitForPeer", "waitForBlock", "P1", 101, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - - // add P2 - sStatusEv("waitForBlock", "waitForBlock", "P2", 200, nil), - - // get blocks 100 and 101 from P1 - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 100, []int64{}), - sBlockRespEv("waitForBlock", "waitForBlock", "P1", 101, []int64{100}), - - // processed block at heights 100 - sProcessedBlockEv("waitForBlock", "waitForBlock", nil), - - // P2 becomes short - sStatusEv("waitForBlock", "finished", "P2", 100, errPeerLowersItsHeight), - }, - }, - { - name: "new short peer while in waitForPeer state", - startingHeight: 100, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForPeer", "P1", 3, errPeerTooShort), - }, - }, - { - name: "new short peer while in waitForBlock state", - startingHeight: 100, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 200, nil), - sStatusEv("waitForBlock", "waitForBlock", "P2", 3, errPeerTooShort), - }, - }, - { - name: "only peer updated with low height while in waitForBlock state", - startingHeight: 100, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 200, nil), - sStatusEv("waitForBlock", "waitForPeer", "P1", 3, errPeerLowersItsHeight), - }, - }, - { - name: "peer does not exist in the switch", - startingHeight: 9999999, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - // add P1 - sStatusEv("waitForPeer", "waitForBlock", "P1", 20000000, nil), - // send request for block 9999999 - // Note: For this block request the "switch missing the peer" error is simulated, - // see implementation of bcReactor interface, sendBlockRequest(), in this file. - sMakeRequestsEvErrored("waitForBlock", "waitForBlock", - maxNumRequests, nil, []p2p.ID{"P1"}), - }, - }, - } - - executeFSMTests(t, tests, true) -} - -func TestFSMStopFSM(t *testing.T) { - tests := []testFields{ - { - name: "stopFSMEv in unknown", - steps: []fsmStepTestValues{ - sStopFSMEv("unknown", "finished"), - }, - }, - { - name: "stopFSMEv in waitForPeer", - startingHeight: 1, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStopFSMEv("waitForPeer", "finished"), - }, - }, - { - name: "stopFSMEv in waitForBlock", - startingHeight: 1, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sStopFSMEv("waitForBlock", "finished"), - }, - }, - } - - executeFSMTests(t, tests, false) -} - -func TestFSMUnknownElements(t *testing.T) { - tests := []testFields{ - { - name: "unknown event for state unknown", - steps: []fsmStepTestValues{ - sUnknownFSMEv("unknown"), - }, - }, - { - name: "unknown event for state waitForPeer", - steps: []fsmStepTestValues{ - sStartFSMEv(), - sUnknownFSMEv("waitForPeer"), - }, - }, - { - name: "unknown event for state waitForBlock", - startingHeight: 1, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sUnknownFSMEv("waitForBlock"), - }, - }, - } - - executeFSMTests(t, tests, false) -} - -func TestFSMPeerStateTimeoutEvent(t *testing.T) { - tests := []testFields{ - { - name: "timeout event for state waitForPeer while in state waitForPeer - TS1", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStateTimeoutEv("waitForPeer", "finished", "waitForPeer", errNoTallerPeer), - }, - }, - { - name: "timeout event for state waitForPeer while in a state != waitForPeer", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStateTimeoutEv("waitForPeer", "waitForPeer", "waitForBlock", errTimeoutEventWrongState), - }, - }, - { - name: "timeout event for state waitForBlock while in state waitForBlock ", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sStateTimeoutEv("waitForBlock", "waitForPeer", "waitForBlock", errNoPeerResponseForCurrentHeights), - }, - }, - { - name: "timeout event for state waitForBlock while in a state != waitForBlock", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sStateTimeoutEv("waitForBlock", "waitForBlock", "waitForPeer", errTimeoutEventWrongState), - }, - }, - { - name: "timeout event for state waitForBlock with multiple peers", - startingHeight: 1, - maxRequestsPerPeer: 3, - steps: []fsmStepTestValues{ - sStartFSMEv(), - sStatusEv("waitForPeer", "waitForBlock", "P1", 3, nil), - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - sStatusEv("waitForBlock", "waitForBlock", "P2", 3, nil), - sStateTimeoutEv("waitForBlock", "waitForBlock", "waitForBlock", errNoPeerResponseForCurrentHeights), - }, - }, - } - - executeFSMTests(t, tests, false) -} - -func makeCorrectTransitionSequence(startingHeight int64, numBlocks int64, numPeers int, randomPeerHeights bool, - maxRequestsPerPeer int, maxPendingRequests int) testFields { - - // Generate numPeers peers with random or numBlocks heights according to the randomPeerHeights flag. - peerHeights := make([]int64, numPeers) - for i := 0; i < numPeers; i++ { - if i == 0 { - peerHeights[0] = numBlocks - continue - } - if randomPeerHeights { - peerHeights[i] = int64(tmmath.MaxInt(tmrand.Intn(int(numBlocks)), int(startingHeight)+1)) - } else { - peerHeights[i] = numBlocks - } - } - - // Approximate the slice capacity to save time for appends. - testSteps := make([]fsmStepTestValues, 0, 3*numBlocks+int64(numPeers)) - - testName := fmt.Sprintf("%v-blocks %v-startingHeight %v-peers %v-maxRequestsPerPeer %v-maxNumRequests", - numBlocks, startingHeight, numPeers, maxRequestsPerPeer, maxPendingRequests) - - // Add startFSMEv step. - testSteps = append(testSteps, sStartFSMEv()) - - // For each peer, add statusResponseEv step. - for i := 0; i < numPeers; i++ { - peerName := fmt.Sprintf("P%d", i) - if i == 0 { - testSteps = append( - testSteps, - sStatusEv("waitForPeer", "waitForBlock", p2p.ID(peerName), peerHeights[i], nil)) - } else { - testSteps = append(testSteps, - sStatusEv("waitForBlock", "waitForBlock", p2p.ID(peerName), peerHeights[i], nil)) - } - } - - height := startingHeight - numBlocksReceived := 0 - prevBlocks := make([]int64, 0, maxPendingRequests) - -forLoop: - for i := 0; i < int(numBlocks); i++ { - - // Add the makeRequestEv step periodically. - if i%maxRequestsPerPeer == 0 { - testSteps = append( - testSteps, - sMakeRequestsEv("waitForBlock", "waitForBlock", maxNumRequests), - ) - } - - // Add the blockRespEv step - testSteps = append( - testSteps, - sBlockRespEv("waitForBlock", "waitForBlock", - "P0", height, prevBlocks)) - prevBlocks = append(prevBlocks, height) - height++ - numBlocksReceived++ - - // Add the processedBlockEv step periodically. - if numBlocksReceived >= maxRequestsPerPeer || height >= numBlocks { - for j := int(height) - numBlocksReceived; j < int(height); j++ { - if j >= int(numBlocks) { - // This is the last block that is processed, we should be in "finished" state. - testSteps = append( - testSteps, - sProcessedBlockEv("waitForBlock", "finished", nil)) - break forLoop - } - testSteps = append( - testSteps, - sProcessedBlockEv("waitForBlock", "waitForBlock", nil)) - } - numBlocksReceived = 0 - prevBlocks = make([]int64, 0, maxPendingRequests) - } - } - - return testFields{ - name: testName, - startingHeight: startingHeight, - maxRequestsPerPeer: maxRequestsPerPeer, - maxPendingRequests: maxPendingRequests, - steps: testSteps, - } -} - -const ( - maxStartingHeightTest = 100 - maxRequestsPerPeerTest = 20 - maxTotalPendingRequestsTest = 600 - maxNumPeersTest = 1000 - maxNumBlocksInChainTest = 10000 // should be smaller than 9999999 -) - -func makeCorrectTransitionSequenceWithRandomParameters() testFields { - // Generate a starting height for fast sync. - startingHeight := int64(tmrand.Intn(maxStartingHeightTest) + 1) - - // Generate the number of requests per peer. - maxRequestsPerPeer := tmrand.Intn(maxRequestsPerPeerTest) + 1 - - // Generate the maximum number of total pending requests, >= maxRequestsPerPeer. - maxPendingRequests := tmrand.Intn(maxTotalPendingRequestsTest-maxRequestsPerPeer) + maxRequestsPerPeer - - // Generate the number of blocks to be synced. - numBlocks := int64(tmrand.Intn(maxNumBlocksInChainTest)) + startingHeight - - // Generate a number of peers. - numPeers := tmrand.Intn(maxNumPeersTest) + 1 - - return makeCorrectTransitionSequence(startingHeight, numBlocks, numPeers, true, maxRequestsPerPeer, maxPendingRequests) -} - -func shouldApplyProcessedBlockEvStep(step *fsmStepTestValues, testBcR *testReactor) bool { - if step.event == processedBlockEv { - _, err := testBcR.fsm.pool.BlockAndPeerAtHeight(testBcR.fsm.pool.Height) - if err == errMissingBlock { - return false - } - _, err = testBcR.fsm.pool.BlockAndPeerAtHeight(testBcR.fsm.pool.Height + 1) - if err == errMissingBlock { - return false - } - } - return true -} - -func TestFSMCorrectTransitionSequences(t *testing.T) { - - tests := []testFields{ - makeCorrectTransitionSequence(1, 100, 10, true, 10, 40), - makeCorrectTransitionSequenceWithRandomParameters(), - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - // Create test reactor - testBcR := newTestReactor(tt.startingHeight) - - if tt.maxRequestsPerPeer != 0 { - maxRequestsPerPeer = tt.maxRequestsPerPeer - } - - for _, step := range tt.steps { - step := step - assert.Equal(t, step.currentState, testBcR.fsm.state.name) - - oldNumStatusRequests := testBcR.numStatusRequests - fixBlockResponseEvStep(&step, testBcR) - if !shouldApplyProcessedBlockEvStep(&step, testBcR) { - continue - } - - fsmErr := sendEventToFSM(testBcR.fsm, step.event, step.data) - assert.Equal(t, step.wantErr, fsmErr) - - if step.wantStatusReqSent { - assert.Equal(t, oldNumStatusRequests+1, testBcR.numStatusRequests) - } else { - assert.Equal(t, oldNumStatusRequests, testBcR.numStatusRequests) - } - - assert.Equal(t, step.wantState, testBcR.fsm.state.name) - if step.wantState == "finished" { - assert.True(t, testBcR.fsm.isCaughtUp()) - } - } - - }) - } -} - -// ---------------------------------------- -// implements the bcRNotifier -func (testR *testReactor) sendPeerError(err error, peerID p2p.ID) { - testR.logger.Info("Reactor received sendPeerError call from FSM", "peer", peerID, "err", err) - testR.lastPeerError.peerID = peerID - testR.lastPeerError.err = err -} - -func (testR *testReactor) sendStatusRequest() { - testR.logger.Info("Reactor received sendStatusRequest call from FSM") - testR.numStatusRequests++ -} - -func (testR *testReactor) sendBlockRequest(peerID p2p.ID, height int64) error { - testR.logger.Info("Reactor received sendBlockRequest call from FSM", "peer", peerID, "height", height) - testR.numBlockRequests++ - testR.lastBlockRequest.peerID = peerID - testR.lastBlockRequest.height = height - if height == 9999999 { - // simulate switch does not have peer - return errNilPeerForBlockRequest - } - return nil -} - -func (testR *testReactor) resetStateTimer(name string, timer **time.Timer, timeout time.Duration) { - testR.logger.Info("Reactor received resetStateTimer call from FSM", "state", name, "timeout", timeout) - if _, ok := testR.stateTimerStarts[name]; !ok { - testR.stateTimerStarts[name] = 1 - } else { - testR.stateTimerStarts[name]++ - } -} - -func (testR *testReactor) switchToConsensus() { -} - -// ---------------------------------------- diff --git a/blockchain/v1/reactor_test.go b/blockchain/v1/reactor_test.go deleted file mode 100644 index 53bcc36fd..000000000 --- a/blockchain/v1/reactor_test.go +++ /dev/null @@ -1,365 +0,0 @@ -package v1 - -import ( - "fmt" - "os" - "sort" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - dbm "github.com/tendermint/tm-db" - - abci "github.com/tendermint/tendermint/abci/types" - cfg "github.com/tendermint/tendermint/config" - "github.com/tendermint/tendermint/libs/log" - mpmocks "github.com/tendermint/tendermint/mempool/mocks" - "github.com/tendermint/tendermint/p2p" - tmproto "github.com/tendermint/tendermint/proto/tendermint/types" - "github.com/tendermint/tendermint/proxy" - sm "github.com/tendermint/tendermint/state" - sf "github.com/tendermint/tendermint/state/test/factory" - "github.com/tendermint/tendermint/store" - "github.com/tendermint/tendermint/types" - tmtime "github.com/tendermint/tendermint/types/time" -) - -var config *cfg.Config - -func randGenesisDoc(numValidators int, randPower bool, minPower int64) (*types.GenesisDoc, []types.PrivValidator) { - validators := make([]types.GenesisValidator, numValidators) - privValidators := make([]types.PrivValidator, numValidators) - for i := 0; i < numValidators; i++ { - val, privVal := types.RandValidator(randPower, minPower) - validators[i] = types.GenesisValidator{ - PubKey: val.PubKey, - Power: val.VotingPower, - } - privValidators[i] = privVal - } - sort.Sort(types.PrivValidatorsByAddress(privValidators)) - - return &types.GenesisDoc{ - GenesisTime: tmtime.Now(), - ChainID: config.ChainID(), - Validators: validators, - }, privValidators -} - -func makeVote( - t *testing.T, - header *types.Header, - blockID types.BlockID, - valset *types.ValidatorSet, - privVal types.PrivValidator) *types.Vote { - - pubKey, err := privVal.GetPubKey() - require.NoError(t, err) - - valIdx, _ := valset.GetByAddress(pubKey.Address()) - vote := &types.Vote{ - ValidatorAddress: pubKey.Address(), - ValidatorIndex: valIdx, - Height: header.Height, - Round: 1, - Timestamp: tmtime.Now(), - Type: tmproto.PrecommitType, - BlockID: blockID, - } - - vpb := vote.ToProto() - - _ = privVal.SignVote(header.ChainID, vpb) - vote.Signature = vpb.Signature - - return vote -} - -type BlockchainReactorPair struct { - bcR *BlockchainReactor - conR *consensusReactorTest -} - -func newBlockchainReactor( - t *testing.T, - logger log.Logger, - genDoc *types.GenesisDoc, - privVals []types.PrivValidator, - maxBlockHeight int64) *BlockchainReactor { - if len(privVals) != 1 { - panic("only support one validator") - } - - app := &testApp{} - cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) - err := proxyApp.Start() - if err != nil { - panic(fmt.Errorf("error start app: %w", err)) - } - - blockDB := dbm.NewMemDB() - stateDB := dbm.NewMemDB() - stateStore := sm.NewStore(stateDB) - blockStore := store.NewBlockStore(blockDB) - - state, err := stateStore.LoadFromDBOrGenesisDoc(genDoc) - if err != nil { - panic(fmt.Errorf("error constructing state from genesis file: %w", err)) - } - - mp := &mpmocks.Mempool{} - mp.On("Lock").Return() - mp.On("Unlock").Return() - mp.On("FlushAppConn", mock.Anything).Return(nil) - mp.On("Update", - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything).Return(nil) - - // Make the BlockchainReactor itself. - // NOTE we have to create and commit the blocks first because - // pool.height is determined from the store. - fastSync := true - db := dbm.NewMemDB() - stateStore = sm.NewStore(db) - blockExec := sm.NewBlockExecutor(stateStore, log.TestingLogger(), proxyApp.Consensus(), - mp, sm.EmptyEvidencePool{}) - if err = stateStore.Save(state); err != nil { - panic(err) - } - - // let's add some blocks in - for blockHeight := int64(1); blockHeight <= maxBlockHeight; blockHeight++ { - lastCommit := types.NewCommit(blockHeight-1, 1, types.BlockID{}, nil) - if blockHeight > 1 { - lastBlockMeta := blockStore.LoadBlockMeta(blockHeight - 1) - lastBlock := blockStore.LoadBlock(blockHeight - 1) - - vote := makeVote(t, &lastBlock.Header, lastBlockMeta.BlockID, state.Validators, privVals[0]) - lastCommit = types.NewCommit(vote.Height, vote.Round, lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()}) - } - - thisBlock := sf.MakeBlock(state, blockHeight, lastCommit) - - thisParts, err := thisBlock.MakePartSet(types.BlockPartSizeBytes) - require.NoError(t, err) - blockID := types.BlockID{Hash: thisBlock.Hash(), PartSetHeader: thisParts.Header()} - - state, _, err = blockExec.ApplyBlock(state, blockID, thisBlock) - if err != nil { - panic(fmt.Errorf("error apply block: %w", err)) - } - - blockStore.SaveBlock(thisBlock, thisParts, lastCommit) - } - - bcReactor := NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) - bcReactor.SetLogger(logger.With("module", "blockchain")) - - return bcReactor -} - -func newBlockchainReactorPair( - t *testing.T, - logger log.Logger, - genDoc *types.GenesisDoc, - privVals []types.PrivValidator, - maxBlockHeight int64) BlockchainReactorPair { - - consensusReactor := &consensusReactorTest{} - consensusReactor.BaseReactor = *p2p.NewBaseReactor("Consensus reactor", consensusReactor) - - return BlockchainReactorPair{ - newBlockchainReactor(t, logger, genDoc, privVals, maxBlockHeight), - consensusReactor} -} - -type consensusReactorTest struct { - p2p.BaseReactor // BaseService + p2p.Switch - switchedToConsensus bool - mtx sync.Mutex -} - -func (conR *consensusReactorTest) SwitchToConsensus(state sm.State, blocksSynced bool) { - conR.mtx.Lock() - defer conR.mtx.Unlock() - conR.switchedToConsensus = true -} - -func TestFastSyncNoBlockResponse(t *testing.T) { - - config = cfg.ResetTestRoot("blockchain_new_reactor_test") - defer os.RemoveAll(config.RootDir) - genDoc, privVals := randGenesisDoc(1, false, 30) - - maxBlockHeight := int64(65) - - reactorPairs := make([]BlockchainReactorPair, 2) - - logger := log.TestingLogger() - reactorPairs[0] = newBlockchainReactorPair(t, logger, genDoc, privVals, maxBlockHeight) - reactorPairs[1] = newBlockchainReactorPair(t, logger, genDoc, privVals, 0) - - p2p.MakeConnectedSwitches(config.P2P, 2, func(i int, s *p2p.Switch) *p2p.Switch { - s.AddReactor("BLOCKCHAIN", reactorPairs[i].bcR) - s.AddReactor("CONSENSUS", reactorPairs[i].conR) - moduleName := fmt.Sprintf("blockchain-%v", i) - reactorPairs[i].bcR.SetLogger(logger.With("module", moduleName)) - - return s - - }, p2p.Connect2Switches) - - defer func() { - for _, r := range reactorPairs { - _ = r.bcR.Stop() - _ = r.conR.Stop() - } - }() - - tests := []struct { - height int64 - existent bool - }{ - {maxBlockHeight + 2, false}, - {10, true}, - {1, true}, - {maxBlockHeight + 100, false}, - } - - for { - time.Sleep(10 * time.Millisecond) - reactorPairs[1].conR.mtx.Lock() - if reactorPairs[1].conR.switchedToConsensus { - reactorPairs[1].conR.mtx.Unlock() - break - } - reactorPairs[1].conR.mtx.Unlock() - } - - assert.Equal(t, maxBlockHeight, reactorPairs[0].bcR.store.Height()) - - for _, tt := range tests { - block := reactorPairs[1].bcR.store.LoadBlock(tt.height) - if tt.existent { - assert.True(t, block != nil) - } else { - assert.True(t, block == nil) - } - } -} - -// NOTE: This is too hard to test without -// an easy way to add test peer to switch -// or without significant refactoring of the module. -// Alternatively we could actually dial a TCP conn but -// that seems extreme. -func TestFastSyncBadBlockStopsPeer(t *testing.T) { - numNodes := 4 - maxBlockHeight := int64(148) - - config = cfg.ResetTestRoot("blockchain_reactor_test") - defer os.RemoveAll(config.RootDir) - genDoc, privVals := randGenesisDoc(1, false, 30) - - otherChain := newBlockchainReactorPair(t, log.TestingLogger(), genDoc, privVals, maxBlockHeight) - defer func() { - _ = otherChain.bcR.Stop() - _ = otherChain.conR.Stop() - }() - - reactorPairs := make([]BlockchainReactorPair, numNodes) - logger := make([]log.Logger, numNodes) - - for i := 0; i < numNodes; i++ { - logger[i] = log.TestingLogger() - height := int64(0) - if i == 0 { - height = maxBlockHeight - } - reactorPairs[i] = newBlockchainReactorPair(t, logger[i], genDoc, privVals, height) - } - - switches := p2p.MakeConnectedSwitches(config.P2P, numNodes, func(i int, s *p2p.Switch) *p2p.Switch { - reactorPairs[i].conR.mtx.Lock() - s.AddReactor("BLOCKCHAIN", reactorPairs[i].bcR) - s.AddReactor("CONSENSUS", reactorPairs[i].conR) - moduleName := fmt.Sprintf("blockchain-%v", i) - reactorPairs[i].bcR.SetLogger(logger[i].With("module", moduleName)) - reactorPairs[i].conR.mtx.Unlock() - return s - - }, p2p.Connect2Switches) - - defer func() { - for _, r := range reactorPairs { - _ = r.bcR.Stop() - _ = r.conR.Stop() - } - }() - -outerFor: - for { - time.Sleep(10 * time.Millisecond) - for i := 0; i < numNodes; i++ { - reactorPairs[i].conR.mtx.Lock() - if !reactorPairs[i].conR.switchedToConsensus { - reactorPairs[i].conR.mtx.Unlock() - continue outerFor - } - reactorPairs[i].conR.mtx.Unlock() - } - break - } - - // at this time, reactors[0-3] is the newest - assert.Equal(t, numNodes-1, reactorPairs[1].bcR.Switch.Peers().Size()) - - // mark last reactorPair as an invalid peer - reactorPairs[numNodes-1].bcR.store = otherChain.bcR.store - - lastLogger := log.TestingLogger() - lastReactorPair := newBlockchainReactorPair(t, lastLogger, genDoc, privVals, 0) - reactorPairs = append(reactorPairs, lastReactorPair) - - switches = append(switches, p2p.MakeConnectedSwitches(config.P2P, 1, func(i int, s *p2p.Switch) *p2p.Switch { - s.AddReactor("BLOCKCHAIN", reactorPairs[len(reactorPairs)-1].bcR) - s.AddReactor("CONSENSUS", reactorPairs[len(reactorPairs)-1].conR) - moduleName := fmt.Sprintf("blockchain-%v", len(reactorPairs)-1) - reactorPairs[len(reactorPairs)-1].bcR.SetLogger(lastLogger.With("module", moduleName)) - return s - - }, p2p.Connect2Switches)...) - - for i := 0; i < len(reactorPairs)-1; i++ { - p2p.Connect2Switches(switches, i, len(reactorPairs)-1) - } - - for { - time.Sleep(1 * time.Second) - lastReactorPair.conR.mtx.Lock() - if lastReactorPair.conR.switchedToConsensus { - lastReactorPair.conR.mtx.Unlock() - break - } - lastReactorPair.conR.mtx.Unlock() - - if lastReactorPair.bcR.Switch.Peers().Size() == 0 { - break - } - } - - assert.True(t, lastReactorPair.bcR.Switch.Peers().Size() < len(reactorPairs)-1) -} - -type testApp struct { - abci.BaseApplication -} diff --git a/blockchain/v2/io.go b/blockchain/v2/io.go deleted file mode 100644 index 4951573ce..000000000 --- a/blockchain/v2/io.go +++ /dev/null @@ -1,140 +0,0 @@ -package v2 - -import ( - "fmt" - - bc "github.com/tendermint/tendermint/blockchain" - "github.com/tendermint/tendermint/p2p" - bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" - "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -type iIO interface { - sendBlockRequest(peerID p2p.ID, height int64) error - sendBlockToPeer(block *types.Block, peerID p2p.ID) error - sendBlockNotFound(height int64, peerID p2p.ID) error - sendStatusResponse(base, height int64, peerID p2p.ID) error - - broadcastStatusRequest() error - - trySwitchToConsensus(state state.State, skipWAL bool) bool -} - -type switchIO struct { - sw *p2p.Switch -} - -func newSwitchIo(sw *p2p.Switch) *switchIO { - return &switchIO{ - sw: sw, - } -} - -const ( - // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height) - BlockchainChannel = byte(0x40) -) - -type consensusReactor interface { - // for when we switch from blockchain reactor and fast sync to - // the consensus machine - SwitchToConsensus(state state.State, skipWAL bool) -} - -func (sio *switchIO) sendBlockRequest(peerID p2p.ID, height int64) error { - peer := sio.sw.Peers().Get(peerID) - if peer == nil { - return fmt.Errorf("peer not found") - } - msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: height}) - if err != nil { - return err - } - - queued := peer.TrySend(BlockchainChannel, msgBytes) - if !queued { - return fmt.Errorf("send queue full") - } - return nil -} - -func (sio *switchIO) sendStatusResponse(base int64, height int64, peerID p2p.ID) error { - peer := sio.sw.Peers().Get(peerID) - if peer == nil { - return fmt.Errorf("peer not found") - } - - msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{Height: height, Base: base}) - if err != nil { - return err - } - - if queued := peer.TrySend(BlockchainChannel, msgBytes); !queued { - return fmt.Errorf("peer queue full") - } - - return nil -} - -func (sio *switchIO) sendBlockToPeer(block *types.Block, peerID p2p.ID) error { - peer := sio.sw.Peers().Get(peerID) - if peer == nil { - return fmt.Errorf("peer not found") - } - if block == nil { - panic("trying to send nil block") - } - - bpb, err := block.ToProto() - if err != nil { - return err - } - - msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: bpb}) - if err != nil { - return err - } - if queued := peer.TrySend(BlockchainChannel, msgBytes); !queued { - return fmt.Errorf("peer queue full") - } - - return nil -} - -func (sio *switchIO) sendBlockNotFound(height int64, peerID p2p.ID) error { - peer := sio.sw.Peers().Get(peerID) - if peer == nil { - return fmt.Errorf("peer not found") - } - msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: height}) - if err != nil { - return err - } - - if queued := peer.TrySend(BlockchainChannel, msgBytes); !queued { - return fmt.Errorf("peer queue full") - } - - return nil -} - -func (sio *switchIO) trySwitchToConsensus(state state.State, skipWAL bool) bool { - conR, ok := sio.sw.Reactor("CONSENSUS").(consensusReactor) - if ok { - conR.SwitchToConsensus(state, skipWAL) - } - return ok -} - -func (sio *switchIO) broadcastStatusRequest() error { - msgBytes, err := bc.EncodeMsg(&bcproto.StatusRequest{}) - if err != nil { - return err - } - - // XXX: maybe we should use an io specific peer list here - sio.sw.Broadcast(BlockchainChannel, msgBytes) - - return nil -} diff --git a/blockchain/v2/metrics.go b/blockchain/v2/metrics.go deleted file mode 100644 index c68ec6447..000000000 --- a/blockchain/v2/metrics.go +++ /dev/null @@ -1,125 +0,0 @@ -package v2 - -import ( - "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/discard" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" -) - -const ( - // MetricsSubsystem is a subsystem shared by all metrics exposed by this - // package. - MetricsSubsystem = "blockchain" -) - -// Metrics contains metrics exposed by this package. -type Metrics struct { - // events_in - EventsIn metrics.Counter - // events_in - EventsHandled metrics.Counter - // events_out - EventsOut metrics.Counter - // errors_in - ErrorsIn metrics.Counter - // errors_handled - ErrorsHandled metrics.Counter - // errors_out - ErrorsOut metrics.Counter - // events_shed - EventsShed metrics.Counter - // events_sent - EventsSent metrics.Counter - // errors_sent - ErrorsSent metrics.Counter - // errors_shed - ErrorsShed metrics.Counter -} - -// PrometheusMetrics returns metrics for in and out events, errors, etc. handled by routines. -// Can we burn in the routine name here? -func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { - labels := []string{} - for i := 0; i < len(labelsAndValues); i += 2 { - labels = append(labels, labelsAndValues[i]) - } - return &Metrics{ - EventsIn: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "events_in", - Help: "Events read from the channel.", - }, labels).With(labelsAndValues...), - EventsHandled: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "events_handled", - Help: "Events handled", - }, labels).With(labelsAndValues...), - EventsOut: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "events_out", - Help: "Events output from routine.", - }, labels).With(labelsAndValues...), - ErrorsIn: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "errors_in", - Help: "Errors read from the channel.", - }, labels).With(labelsAndValues...), - ErrorsHandled: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "errors_handled", - Help: "Errors handled.", - }, labels).With(labelsAndValues...), - ErrorsOut: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "errors_out", - Help: "Errors output from routine.", - }, labels).With(labelsAndValues...), - ErrorsSent: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "errors_sent", - Help: "Errors sent to routine.", - }, labels).With(labelsAndValues...), - ErrorsShed: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "errors_shed", - Help: "Errors dropped from sending.", - }, labels).With(labelsAndValues...), - EventsSent: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "events_sent", - Help: "Events sent to routine.", - }, labels).With(labelsAndValues...), - EventsShed: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "events_shed", - Help: "Events dropped from sending.", - }, labels).With(labelsAndValues...), - } -} - -// NopMetrics returns no-op Metrics. -func NopMetrics() *Metrics { - return &Metrics{ - EventsIn: discard.NewCounter(), - EventsHandled: discard.NewCounter(), - EventsOut: discard.NewCounter(), - ErrorsIn: discard.NewCounter(), - ErrorsHandled: discard.NewCounter(), - ErrorsOut: discard.NewCounter(), - EventsShed: discard.NewCounter(), - EventsSent: discard.NewCounter(), - ErrorsSent: discard.NewCounter(), - ErrorsShed: discard.NewCounter(), - } -} diff --git a/blockchain/v2/processor.go b/blockchain/v2/processor.go deleted file mode 100644 index 44bf934fd..000000000 --- a/blockchain/v2/processor.go +++ /dev/null @@ -1,193 +0,0 @@ -package v2 - -import ( - "fmt" - - "github.com/tendermint/tendermint/p2p" - tmState "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -// Events generated by the processor: -// block execution failure, event will indicate the peer(s) that caused the error -type pcBlockVerificationFailure struct { - priorityNormal - height int64 - firstPeerID p2p.ID - secondPeerID p2p.ID -} - -func (e pcBlockVerificationFailure) String() string { - return fmt.Sprintf("pcBlockVerificationFailure{%d 1st peer: %v, 2nd peer: %v}", - e.height, e.firstPeerID, e.secondPeerID) -} - -// successful block execution -type pcBlockProcessed struct { - priorityNormal - height int64 - peerID p2p.ID -} - -func (e pcBlockProcessed) String() string { - return fmt.Sprintf("pcBlockProcessed{%d peer: %v}", e.height, e.peerID) -} - -// processor has finished -type pcFinished struct { - priorityNormal - blocksSynced int - tmState tmState.State -} - -func (p pcFinished) Error() string { - return "finished" -} - -type queueItem struct { - block *types.Block - peerID p2p.ID -} - -type blockQueue map[int64]queueItem - -type pcState struct { - // blocks waiting to be processed - queue blockQueue - - // draining indicates that the next rProcessBlock event with a queue miss constitutes completion - draining bool - - // the number of blocks successfully synced by the processor - blocksSynced int - - // the processorContext which contains the processor dependencies - context processorContext -} - -func (state *pcState) String() string { - return fmt.Sprintf("height: %d queue length: %d draining: %v blocks synced: %d", - state.height(), len(state.queue), state.draining, state.blocksSynced) -} - -// newPcState returns a pcState initialized with the last verified block enqueued -func newPcState(context processorContext) *pcState { - return &pcState{ - queue: blockQueue{}, - draining: false, - blocksSynced: 0, - context: context, - } -} - -// nextTwo returns the next two unverified blocks -func (state *pcState) nextTwo() (queueItem, queueItem, error) { - if first, ok := state.queue[state.height()+1]; ok { - if second, ok := state.queue[state.height()+2]; ok { - return first, second, nil - } - } - return queueItem{}, queueItem{}, fmt.Errorf("not found") -} - -// synced returns true when at most the last verified block remains in the queue -func (state *pcState) synced() bool { - return len(state.queue) <= 1 -} - -func (state *pcState) enqueue(peerID p2p.ID, block *types.Block, height int64) { - if item, ok := state.queue[height]; ok { - panic(fmt.Sprintf( - "duplicate block %d (%X) enqueued by processor (sent by %v; existing block %X from %v)", - height, block.Hash(), peerID, item.block.Hash(), item.peerID)) - } - - state.queue[height] = queueItem{block: block, peerID: peerID} -} - -func (state *pcState) height() int64 { - return state.context.tmState().LastBlockHeight -} - -// purgePeer moves all unprocessed blocks from the queue -func (state *pcState) purgePeer(peerID p2p.ID) { - // what if height is less than state.height? - for height, item := range state.queue { - if item.peerID == peerID { - delete(state.queue, height) - } - } -} - -// handle processes FSM events -func (state *pcState) handle(event Event) (Event, error) { - switch event := event.(type) { - case bcResetState: - state.context.setState(event.state) - return noOp, nil - - case scFinishedEv: - if state.synced() { - return pcFinished{tmState: state.context.tmState(), blocksSynced: state.blocksSynced}, nil - } - state.draining = true - return noOp, nil - - case scPeerError: - state.purgePeer(event.peerID) - return noOp, nil - - case scBlockReceived: - if event.block == nil { - return noOp, nil - } - - // enqueue block if height is higher than state height, else ignore it - if event.block.Height > state.height() { - state.enqueue(event.peerID, event.block, event.block.Height) - } - return noOp, nil - - case rProcessBlock: - tmState := state.context.tmState() - firstItem, secondItem, err := state.nextTwo() - if err != nil { - if state.draining { - return pcFinished{tmState: tmState, blocksSynced: state.blocksSynced}, nil - } - return noOp, nil - } - - first, second := firstItem.block, secondItem.block - firstParts, err := first.MakePartSet(types.BlockPartSizeBytes) - if err != nil { - panic(fmt.Sprintf("failed to make part set, height %d: %v ", first.Height, err.Error())) - } - firstID := types.BlockID{Hash: first.Hash(), PartSetHeader: firstParts.Header()} - - // verify if +second+ last commit "confirms" +first+ block - err = state.context.verifyCommit(tmState.ChainID, firstID, first.Height, second.LastCommit) - if err != nil { - state.purgePeer(firstItem.peerID) - if firstItem.peerID != secondItem.peerID { - state.purgePeer(secondItem.peerID) - } - return pcBlockVerificationFailure{ - height: first.Height, firstPeerID: firstItem.peerID, secondPeerID: secondItem.peerID}, - nil - } - - state.context.saveBlock(first, firstParts, second.LastCommit) - - if err := state.context.applyBlock(firstID, first); err != nil { - panic(fmt.Sprintf("failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) - } - - delete(state.queue, first.Height) - state.blocksSynced++ - - return pcBlockProcessed{height: first.Height, peerID: firstItem.peerID}, nil - } - - return noOp, nil -} diff --git a/blockchain/v2/processor_context.go b/blockchain/v2/processor_context.go deleted file mode 100644 index 6a0466550..000000000 --- a/blockchain/v2/processor_context.go +++ /dev/null @@ -1,100 +0,0 @@ -package v2 - -import ( - "fmt" - - "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -type processorContext interface { - applyBlock(blockID types.BlockID, block *types.Block) error - verifyCommit(chainID string, blockID types.BlockID, height int64, commit *types.Commit) error - saveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) - tmState() state.State - setState(state.State) -} - -type pContext struct { - store blockStore - applier blockApplier - state state.State -} - -func newProcessorContext(st blockStore, ex blockApplier, s state.State) *pContext { - return &pContext{ - store: st, - applier: ex, - state: s, - } -} - -func (pc *pContext) applyBlock(blockID types.BlockID, block *types.Block) error { - newState, _, err := pc.applier.ApplyBlock(pc.state, blockID, block) - pc.state = newState - return err -} - -func (pc pContext) tmState() state.State { - return pc.state -} - -func (pc *pContext) setState(state state.State) { - pc.state = state -} - -func (pc pContext) verifyCommit(chainID string, blockID types.BlockID, height int64, commit *types.Commit) error { - return pc.state.Validators.VerifyCommitLight(chainID, blockID, height, commit) -} - -func (pc *pContext) saveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) { - pc.store.SaveBlock(block, blockParts, seenCommit) -} - -type mockPContext struct { - applicationBL []int64 - verificationBL []int64 - state state.State -} - -func newMockProcessorContext( - state state.State, - verificationBlackList []int64, - applicationBlackList []int64) *mockPContext { - return &mockPContext{ - applicationBL: applicationBlackList, - verificationBL: verificationBlackList, - state: state, - } -} - -func (mpc *mockPContext) applyBlock(blockID types.BlockID, block *types.Block) error { - for _, h := range mpc.applicationBL { - if h == block.Height { - return fmt.Errorf("generic application error") - } - } - mpc.state.LastBlockHeight = block.Height - return nil -} - -func (mpc *mockPContext) verifyCommit(chainID string, blockID types.BlockID, height int64, commit *types.Commit) error { - for _, h := range mpc.verificationBL { - if h == height { - return fmt.Errorf("generic verification error") - } - } - return nil -} - -func (mpc *mockPContext) saveBlock(block *types.Block, blockParts *types.PartSet, seenCommit *types.Commit) { - -} - -func (mpc *mockPContext) setState(state state.State) { - mpc.state = state -} - -func (mpc *mockPContext) tmState() state.State { - return mpc.state -} diff --git a/blockchain/v2/processor_test.go b/blockchain/v2/processor_test.go deleted file mode 100644 index 04ce05b6e..000000000 --- a/blockchain/v2/processor_test.go +++ /dev/null @@ -1,306 +0,0 @@ -package v2 - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/tendermint/tendermint/p2p" - tmState "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -// pcBlock is a test helper structure with simple types. Its purpose is to help with test readability. -type pcBlock struct { - pid string - height int64 -} - -// params is a test structure used to create processor state. -type params struct { - height int64 - items []pcBlock - blocksSynced int - verBL []int64 - appBL []int64 - draining bool -} - -// makePcBlock makes an empty block. -func makePcBlock(height int64) *types.Block { - return &types.Block{Header: types.Header{Height: height}} -} - -// makeState takes test parameters and creates a specific processor state. -func makeState(p *params) *pcState { - var ( - tmState = tmState.State{LastBlockHeight: p.height} - context = newMockProcessorContext(tmState, p.verBL, p.appBL) - ) - state := newPcState(context) - - for _, item := range p.items { - state.enqueue(p2p.ID(item.pid), makePcBlock(item.height), item.height) - } - - state.blocksSynced = p.blocksSynced - state.draining = p.draining - return state -} - -func mBlockResponse(peerID p2p.ID, height int64) scBlockReceived { - return scBlockReceived{ - peerID: peerID, - block: makePcBlock(height), - } -} - -type pcFsmMakeStateValues struct { - currentState *params - event Event - wantState *params - wantNextEvent Event - wantErr error - wantPanic bool -} - -type testFields struct { - name string - steps []pcFsmMakeStateValues -} - -func executeProcessorTests(t *testing.T, tests []testFields) { - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - var state *pcState - for _, step := range tt.steps { - defer func() { - r := recover() - if (r != nil) != step.wantPanic { - t.Errorf("recover = %v, wantPanic = %v", r, step.wantPanic) - } - }() - - // First step must always initialize the currentState as state. - if step.currentState != nil { - state = makeState(step.currentState) - } - if state == nil { - panic("Bad (initial?) step") - } - - nextEvent, err := state.handle(step.event) - t.Log(state) - assert.Equal(t, step.wantErr, err) - assert.Equal(t, makeState(step.wantState), state) - assert.Equal(t, step.wantNextEvent, nextEvent) - // Next step may use the wantedState as their currentState. - state = makeState(step.wantState) - } - }) - } -} - -func TestRProcessPeerError(t *testing.T) { - tests := []testFields{ - { - name: "error for existing peer", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}}}, - event: scPeerError{peerID: "P2"}, - wantState: ¶ms{items: []pcBlock{{"P1", 1}}}, - wantNextEvent: noOp, - }, - }, - }, - { - name: "error for unknown peer", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}}}, - event: scPeerError{peerID: "P3"}, - wantState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}}}, - wantNextEvent: noOp, - }, - }, - }, - } - - executeProcessorTests(t, tests) -} - -func TestPcBlockResponse(t *testing.T) { - tests := []testFields{ - { - name: "add one block", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{}, event: mBlockResponse("P1", 1), - wantState: ¶ms{items: []pcBlock{{"P1", 1}}}, wantNextEvent: noOp, - }, - }, - }, - - { - name: "add two blocks", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{}, event: mBlockResponse("P1", 3), - wantState: ¶ms{items: []pcBlock{{"P1", 3}}}, wantNextEvent: noOp, - }, - { // use previous wantState as currentState, - event: mBlockResponse("P1", 4), - wantState: ¶ms{items: []pcBlock{{"P1", 3}, {"P1", 4}}}, wantNextEvent: noOp, - }, - }, - }, - } - - executeProcessorTests(t, tests) -} - -func TestRProcessBlockSuccess(t *testing.T) { - tests := []testFields{ - { - name: "noop - no blocks over current height", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{}, event: rProcessBlock{}, - wantState: ¶ms{}, wantNextEvent: noOp, - }, - }, - }, - { - name: "noop - high new blocks", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{height: 5, items: []pcBlock{{"P1", 30}, {"P2", 31}}}, event: rProcessBlock{}, - wantState: ¶ms{height: 5, items: []pcBlock{{"P1", 30}, {"P2", 31}}}, wantNextEvent: noOp, - }, - }, - }, - { - name: "blocks H+1 and H+2 present", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}}}, event: rProcessBlock{}, - wantState: ¶ms{height: 1, items: []pcBlock{{"P2", 2}}, blocksSynced: 1}, - wantNextEvent: pcBlockProcessed{height: 1, peerID: "P1"}, - }, - }, - }, - { - name: "blocks H+1 and H+2 present after draining", - steps: []pcFsmMakeStateValues{ - { // some contiguous blocks - on stop check draining is set - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}, {"P1", 4}}}, - event: scFinishedEv{}, - wantState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}, {"P1", 4}}, draining: true}, - wantNextEvent: noOp, - }, - { - event: rProcessBlock{}, - wantState: ¶ms{height: 1, items: []pcBlock{{"P2", 2}, {"P1", 4}}, blocksSynced: 1, draining: true}, - wantNextEvent: pcBlockProcessed{height: 1, peerID: "P1"}, - }, - { // finish when H+1 or/and H+2 are missing - event: rProcessBlock{}, - wantState: ¶ms{height: 1, items: []pcBlock{{"P2", 2}, {"P1", 4}}, blocksSynced: 1, draining: true}, - wantNextEvent: pcFinished{tmState: tmState.State{LastBlockHeight: 1}, blocksSynced: 1}, - }, - }, - }, - } - - executeProcessorTests(t, tests) -} - -func TestRProcessBlockFailures(t *testing.T) { - tests := []testFields{ - { - name: "blocks H+1 and H+2 present from different peers - H+1 verification fails ", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}}, verBL: []int64{1}}, event: rProcessBlock{}, - wantState: ¶ms{items: []pcBlock{}, verBL: []int64{1}}, - wantNextEvent: pcBlockVerificationFailure{height: 1, firstPeerID: "P1", secondPeerID: "P2"}, - }, - }, - }, - { - name: "blocks H+1 and H+2 present from same peer - H+1 applyBlock fails ", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}}, appBL: []int64{1}}, event: rProcessBlock{}, - wantState: ¶ms{items: []pcBlock{}, appBL: []int64{1}}, wantPanic: true, - }, - }, - }, - { - name: "blocks H+1 and H+2 present from same peers - H+1 verification fails ", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{height: 0, items: []pcBlock{{"P1", 1}, {"P1", 2}, {"P2", 3}}, - verBL: []int64{1}}, event: rProcessBlock{}, - wantState: ¶ms{height: 0, items: []pcBlock{{"P2", 3}}, verBL: []int64{1}}, - wantNextEvent: pcBlockVerificationFailure{height: 1, firstPeerID: "P1", secondPeerID: "P1"}, - }, - }, - }, - { - name: "blocks H+1 and H+2 present from different peers - H+1 applyBlock fails ", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{items: []pcBlock{{"P1", 1}, {"P2", 2}, {"P2", 3}}, appBL: []int64{1}}, - event: rProcessBlock{}, - wantState: ¶ms{items: []pcBlock{{"P2", 3}}, appBL: []int64{1}}, wantPanic: true, - }, - }, - }, - } - - executeProcessorTests(t, tests) -} - -func TestScFinishedEv(t *testing.T) { - tests := []testFields{ - { - name: "no blocks", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{height: 100, items: []pcBlock{}, blocksSynced: 100}, event: scFinishedEv{}, - wantState: ¶ms{height: 100, items: []pcBlock{}, blocksSynced: 100}, - wantNextEvent: pcFinished{tmState: tmState.State{LastBlockHeight: 100}, blocksSynced: 100}, - }, - }, - }, - { - name: "maxHeight+1 block present", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{height: 100, items: []pcBlock{ - {"P1", 101}}, blocksSynced: 100}, event: scFinishedEv{}, - wantState: ¶ms{height: 100, items: []pcBlock{{"P1", 101}}, blocksSynced: 100}, - wantNextEvent: pcFinished{tmState: tmState.State{LastBlockHeight: 100}, blocksSynced: 100}, - }, - }, - }, - { - name: "more blocks present", - steps: []pcFsmMakeStateValues{ - { - currentState: ¶ms{height: 100, items: []pcBlock{ - {"P1", 101}, {"P1", 102}}, blocksSynced: 100}, event: scFinishedEv{}, - wantState: ¶ms{height: 100, items: []pcBlock{ - {"P1", 101}, {"P1", 102}}, blocksSynced: 100, draining: true}, - wantNextEvent: noOp, - wantErr: nil, - }, - }, - }, - } - - executeProcessorTests(t, tests) -} diff --git a/blockchain/v2/reactor.go b/blockchain/v2/reactor.go deleted file mode 100644 index 3b3a236e1..000000000 --- a/blockchain/v2/reactor.go +++ /dev/null @@ -1,564 +0,0 @@ -package v2 - -import ( - "errors" - "fmt" - "time" - - "github.com/tendermint/tendermint/behaviour" //nolint:misspell - bc "github.com/tendermint/tendermint/blockchain" - "github.com/tendermint/tendermint/libs/log" - tmsync "github.com/tendermint/tendermint/libs/sync" - "github.com/tendermint/tendermint/p2p" - bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" - "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -const ( - // chBufferSize is the buffer size of all event channels. - chBufferSize int = 1000 -) - -type blockStore interface { - LoadBlock(height int64) *types.Block - SaveBlock(*types.Block, *types.PartSet, *types.Commit) - Base() int64 - Height() int64 -} - -// BlockchainReactor handles fast sync protocol. -type BlockchainReactor struct { - p2p.BaseReactor - - fastSync bool // if true, enable fast sync on start - stateSynced bool // set to true when SwitchToFastSync is called by state sync - scheduler *Routine - processor *Routine - logger log.Logger - - mtx tmsync.RWMutex - maxPeerHeight int64 - syncHeight int64 - events chan Event // non-nil during a fast sync - - reporter behaviour.Reporter - io iIO - store blockStore -} - -//nolint:unused,deadcode -type blockVerifier interface { - VerifyCommit(chainID string, blockID types.BlockID, height int64, commit *types.Commit) error -} - -type blockApplier interface { - ApplyBlock(state state.State, blockID types.BlockID, block *types.Block) (state.State, int64, error) -} - -// XXX: unify naming in this package around tmState -func newReactor(state state.State, store blockStore, reporter behaviour.Reporter, - blockApplier blockApplier, fastSync bool) *BlockchainReactor { - initHeight := state.LastBlockHeight + 1 - if initHeight == 1 { - initHeight = state.InitialHeight - } - scheduler := newScheduler(initHeight, time.Now()) - pContext := newProcessorContext(store, blockApplier, state) - // TODO: Fix naming to just newProcesssor - // newPcState requires a processorContext - processor := newPcState(pContext) - - return &BlockchainReactor{ - scheduler: newRoutine("scheduler", scheduler.handle, chBufferSize), - processor: newRoutine("processor", processor.handle, chBufferSize), - store: store, - reporter: reporter, - logger: log.NewNopLogger(), - fastSync: fastSync, - } -} - -// NewBlockchainReactor creates a new reactor instance. -func NewBlockchainReactor( - state state.State, - blockApplier blockApplier, - store blockStore, - fastSync bool) *BlockchainReactor { - reporter := behaviour.NewMockReporter() - return newReactor(state, store, reporter, blockApplier, fastSync) -} - -// SetSwitch implements Reactor interface. -func (r *BlockchainReactor) SetSwitch(sw *p2p.Switch) { - r.Switch = sw - if sw != nil { - r.io = newSwitchIo(sw) - } else { - r.io = nil - } -} - -func (r *BlockchainReactor) setMaxPeerHeight(height int64) { - r.mtx.Lock() - defer r.mtx.Unlock() - if height > r.maxPeerHeight { - r.maxPeerHeight = height - } -} - -func (r *BlockchainReactor) setSyncHeight(height int64) { - r.mtx.Lock() - defer r.mtx.Unlock() - r.syncHeight = height -} - -// SyncHeight returns the height to which the BlockchainReactor has synced. -func (r *BlockchainReactor) SyncHeight() int64 { - r.mtx.RLock() - defer r.mtx.RUnlock() - return r.syncHeight -} - -// SetLogger sets the logger of the reactor. -func (r *BlockchainReactor) SetLogger(logger log.Logger) { - r.logger = logger - r.scheduler.setLogger(logger) - r.processor.setLogger(logger) -} - -// Start implements cmn.Service interface -func (r *BlockchainReactor) Start() error { - r.reporter = behaviour.NewSwitchReporter(r.BaseReactor.Switch) - if r.fastSync { - err := r.startSync(nil) - if err != nil { - return fmt.Errorf("failed to start fast sync: %w", err) - } - } - return nil -} - -// startSync begins a fast sync, signaled by r.events being non-nil. If state is non-nil, -// the scheduler and processor is updated with this state on startup. -func (r *BlockchainReactor) startSync(state *state.State) error { - r.mtx.Lock() - defer r.mtx.Unlock() - if r.events != nil { - return errors.New("fast sync already in progress") - } - r.events = make(chan Event, chBufferSize) - go r.scheduler.start() - go r.processor.start() - if state != nil { - <-r.scheduler.ready() - <-r.processor.ready() - r.scheduler.send(bcResetState{state: *state}) - r.processor.send(bcResetState{state: *state}) - } - go r.demux(r.events) - return nil -} - -// endSync ends a fast sync -func (r *BlockchainReactor) endSync() { - r.mtx.Lock() - defer r.mtx.Unlock() - if r.events != nil { - close(r.events) - } - r.events = nil - r.scheduler.stop() - r.processor.stop() -} - -// SwitchToFastSync is called by the state sync reactor when switching to fast sync. -func (r *BlockchainReactor) SwitchToFastSync(state state.State) error { - r.stateSynced = true - state = state.Copy() - return r.startSync(&state) -} - -// reactor generated ticker events: -// ticker for cleaning peers -type rTryPrunePeer struct { - priorityHigh - time time.Time -} - -func (e rTryPrunePeer) String() string { - return fmt.Sprintf("rTryPrunePeer{%v}", e.time) -} - -// ticker event for scheduling block requests -type rTrySchedule struct { - priorityHigh - time time.Time -} - -func (e rTrySchedule) String() string { - return fmt.Sprintf("rTrySchedule{%v}", e.time) -} - -// ticker for block processing -type rProcessBlock struct { - priorityNormal -} - -func (e rProcessBlock) String() string { - return "rProcessBlock" -} - -// reactor generated events based on blockchain related messages from peers: -// blockResponse message received from a peer -type bcBlockResponse struct { - priorityNormal - time time.Time - peerID p2p.ID - size int64 - block *types.Block -} - -func (resp bcBlockResponse) String() string { - return fmt.Sprintf("bcBlockResponse{%d#%X (size: %d bytes) from %v at %v}", - resp.block.Height, resp.block.Hash(), resp.size, resp.peerID, resp.time) -} - -// blockNoResponse message received from a peer -type bcNoBlockResponse struct { - priorityNormal - time time.Time - peerID p2p.ID - height int64 -} - -func (resp bcNoBlockResponse) String() string { - return fmt.Sprintf("bcNoBlockResponse{%v has no block at height %d at %v}", - resp.peerID, resp.height, resp.time) -} - -// statusResponse message received from a peer -type bcStatusResponse struct { - priorityNormal - time time.Time - peerID p2p.ID - base int64 - height int64 -} - -func (resp bcStatusResponse) String() string { - return fmt.Sprintf("bcStatusResponse{%v is at height %d (base: %d) at %v}", - resp.peerID, resp.height, resp.base, resp.time) -} - -// new peer is connected -type bcAddNewPeer struct { - priorityNormal - peerID p2p.ID -} - -func (resp bcAddNewPeer) String() string { - return fmt.Sprintf("bcAddNewPeer{%v}", resp.peerID) -} - -// existing peer is removed -type bcRemovePeer struct { - priorityHigh - peerID p2p.ID - reason interface{} -} - -func (resp bcRemovePeer) String() string { - return fmt.Sprintf("bcRemovePeer{%v due to %v}", resp.peerID, resp.reason) -} - -// resets the scheduler and processor state, e.g. following a switch from state syncing -type bcResetState struct { - priorityHigh - state state.State -} - -func (e bcResetState) String() string { - return fmt.Sprintf("bcResetState{%v}", e.state) -} - -// Takes the channel as a parameter to avoid race conditions on r.events. -func (r *BlockchainReactor) demux(events <-chan Event) { - var lastRate = 0.0 - var lastHundred = time.Now() - - var ( - processBlockFreq = 20 * time.Millisecond - doProcessBlockCh = make(chan struct{}, 1) - doProcessBlockTk = time.NewTicker(processBlockFreq) - ) - defer doProcessBlockTk.Stop() - - var ( - prunePeerFreq = 1 * time.Second - doPrunePeerCh = make(chan struct{}, 1) - doPrunePeerTk = time.NewTicker(prunePeerFreq) - ) - defer doPrunePeerTk.Stop() - - var ( - scheduleFreq = 20 * time.Millisecond - doScheduleCh = make(chan struct{}, 1) - doScheduleTk = time.NewTicker(scheduleFreq) - ) - defer doScheduleTk.Stop() - - var ( - statusFreq = 10 * time.Second - doStatusCh = make(chan struct{}, 1) - doStatusTk = time.NewTicker(statusFreq) - ) - defer doStatusTk.Stop() - doStatusCh <- struct{}{} // immediately broadcast to get status of existing peers - - // XXX: Extract timers to make testing atemporal - for { - select { - // Pacers: send at most per frequency but don't saturate - case <-doProcessBlockTk.C: - select { - case doProcessBlockCh <- struct{}{}: - default: - } - case <-doPrunePeerTk.C: - select { - case doPrunePeerCh <- struct{}{}: - default: - } - case <-doScheduleTk.C: - select { - case doScheduleCh <- struct{}{}: - default: - } - case <-doStatusTk.C: - select { - case doStatusCh <- struct{}{}: - default: - } - - // Tickers: perform tasks periodically - case <-doScheduleCh: - r.scheduler.send(rTrySchedule{time: time.Now()}) - case <-doPrunePeerCh: - r.scheduler.send(rTryPrunePeer{time: time.Now()}) - case <-doProcessBlockCh: - r.processor.send(rProcessBlock{}) - case <-doStatusCh: - if err := r.io.broadcastStatusRequest(); err != nil { - r.logger.Error("Error broadcasting status request", "err", err) - } - - // Events from peers. Closing the channel signals event loop termination. - case event, ok := <-events: - if !ok { - r.logger.Info("Stopping event processing") - return - } - switch event := event.(type) { - case bcStatusResponse: - r.setMaxPeerHeight(event.height) - r.scheduler.send(event) - case bcAddNewPeer, bcRemovePeer, bcBlockResponse, bcNoBlockResponse: - r.scheduler.send(event) - default: - r.logger.Error("Received unexpected event", "event", fmt.Sprintf("%T", event)) - } - - // Incremental events from scheduler - case event := <-r.scheduler.next(): - switch event := event.(type) { - case scBlockReceived: - r.processor.send(event) - case scPeerError: - r.processor.send(event) - if err := r.reporter.Report(behaviour.BadMessage(event.peerID, "scPeerError")); err != nil { - r.logger.Error("Error reporting peer", "err", err) - } - case scBlockRequest: - if err := r.io.sendBlockRequest(event.peerID, event.height); err != nil { - r.logger.Error("Error sending block request", "err", err) - } - case scFinishedEv: - r.processor.send(event) - r.scheduler.stop() - case scSchedulerFail: - r.logger.Error("Scheduler failure", "err", event.reason.Error()) - case scPeersPruned: - // Remove peers from the processor. - for _, peerID := range event.peers { - r.processor.send(scPeerError{peerID: peerID, reason: errors.New("peer was pruned")}) - } - r.logger.Debug("Pruned peers", "count", len(event.peers)) - case noOpEvent: - default: - r.logger.Error("Received unexpected scheduler event", "event", fmt.Sprintf("%T", event)) - } - - // Incremental events from processor - case event := <-r.processor.next(): - switch event := event.(type) { - case pcBlockProcessed: - r.setSyncHeight(event.height) - if r.syncHeight%100 == 0 { - lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds()) - r.logger.Info("Fast Sync Rate", "height", r.syncHeight, - "max_peer_height", r.maxPeerHeight, "blocks/s", lastRate) - lastHundred = time.Now() - } - r.scheduler.send(event) - case pcBlockVerificationFailure: - r.scheduler.send(event) - case pcFinished: - r.logger.Info("Fast sync complete, switching to consensus") - if !r.io.trySwitchToConsensus(event.tmState, event.blocksSynced > 0 || r.stateSynced) { - r.logger.Error("Failed to switch to consensus reactor") - } - r.endSync() - return - case noOpEvent: - default: - r.logger.Error("Received unexpected processor event", "event", fmt.Sprintf("%T", event)) - } - - // Terminal event from scheduler - case err := <-r.scheduler.final(): - switch err { - case nil: - r.logger.Info("Scheduler stopped") - default: - r.logger.Error("Scheduler aborted with error", "err", err) - } - - // Terminal event from processor - case err := <-r.processor.final(): - switch err { - case nil: - r.logger.Info("Processor stopped") - default: - r.logger.Error("Processor aborted with error", "err", err) - } - } - } -} - -// Stop implements cmn.Service interface. -func (r *BlockchainReactor) Stop() error { - r.logger.Info("reactor stopping") - r.endSync() - r.logger.Info("reactor stopped") - return nil -} - -// Receive implements Reactor by handling different message types. -func (r *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { - msg, err := bc.DecodeMsg(msgBytes) - if err != nil { - r.logger.Error("error decoding message", - "src", src.ID(), "chId", chID, "msg", msg, "err", err) - _ = r.reporter.Report(behaviour.BadMessage(src.ID(), err.Error())) - return - } - - if err = bc.ValidateMsg(msg); err != nil { - r.logger.Error("peer sent us invalid msg", "peer", src, "msg", msg, "err", err) - _ = r.reporter.Report(behaviour.BadMessage(src.ID(), err.Error())) - return - } - - r.logger.Debug("Receive", "src", src.ID(), "chID", chID, "msg", msg) - - switch msg := msg.(type) { - case *bcproto.StatusRequest: - if err := r.io.sendStatusResponse(r.store.Base(), r.store.Height(), src.ID()); err != nil { - r.logger.Error("Could not send status message to peer", "src", src) - } - - case *bcproto.BlockRequest: - block := r.store.LoadBlock(msg.Height) - if block != nil { - if err = r.io.sendBlockToPeer(block, src.ID()); err != nil { - r.logger.Error("Could not send block message to peer: ", err) - } - } else { - r.logger.Info("peer asking for a block we don't have", "src", src, "height", msg.Height) - peerID := src.ID() - if err = r.io.sendBlockNotFound(msg.Height, peerID); err != nil { - r.logger.Error("Couldn't send block not found: ", err) - } - } - - case *bcproto.StatusResponse: - r.mtx.RLock() - if r.events != nil { - r.events <- bcStatusResponse{peerID: src.ID(), base: msg.Base, height: msg.Height} - } - r.mtx.RUnlock() - - case *bcproto.BlockResponse: - bi, err := types.BlockFromProto(msg.Block) - if err != nil { - r.logger.Error("error transitioning block from protobuf", "err", err) - return - } - r.mtx.RLock() - if r.events != nil { - r.events <- bcBlockResponse{ - peerID: src.ID(), - block: bi, - size: int64(len(msgBytes)), - time: time.Now(), - } - } - r.mtx.RUnlock() - - case *bcproto.NoBlockResponse: - r.mtx.RLock() - if r.events != nil { - r.events <- bcNoBlockResponse{peerID: src.ID(), height: msg.Height, time: time.Now()} - } - r.mtx.RUnlock() - } -} - -// AddPeer implements Reactor interface -func (r *BlockchainReactor) AddPeer(peer p2p.Peer) { - err := r.io.sendStatusResponse(r.store.Base(), r.store.Height(), peer.ID()) - if err != nil { - r.logger.Error("Could not send status message to peer new", "src", peer.ID, "height", r.SyncHeight()) - } - r.mtx.RLock() - defer r.mtx.RUnlock() - if r.events != nil { - r.events <- bcAddNewPeer{peerID: peer.ID()} - } -} - -// RemovePeer implements Reactor interface. -func (r *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) { - r.mtx.RLock() - defer r.mtx.RUnlock() - if r.events != nil { - r.events <- bcRemovePeer{ - peerID: peer.ID(), - reason: reason, - } - } -} - -// GetChannels implements Reactor -func (r *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor { - return []*p2p.ChannelDescriptor{ - { - ID: BlockchainChannel, - Priority: 5, - SendQueueCapacity: 2000, - RecvBufferCapacity: 50 * 4096, - RecvMessageCapacity: bc.MaxMsgSize, - }, - } -} diff --git a/blockchain/v2/reactor_test.go b/blockchain/v2/reactor_test.go deleted file mode 100644 index f5ca11421..000000000 --- a/blockchain/v2/reactor_test.go +++ /dev/null @@ -1,569 +0,0 @@ -package v2 - -import ( - "fmt" - "net" - "os" - "sort" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - dbm "github.com/tendermint/tm-db" - - abci "github.com/tendermint/tendermint/abci/types" - "github.com/tendermint/tendermint/behaviour" //nolint:misspell - bc "github.com/tendermint/tendermint/blockchain" - cfg "github.com/tendermint/tendermint/config" - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/libs/service" - mpmocks "github.com/tendermint/tendermint/mempool/mocks" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/p2p/conn" - bcproto "github.com/tendermint/tendermint/proto/tendermint/blockchain" - "github.com/tendermint/tendermint/proxy" - sm "github.com/tendermint/tendermint/state" - sf "github.com/tendermint/tendermint/state/test/factory" - "github.com/tendermint/tendermint/store" - "github.com/tendermint/tendermint/types" - tmtime "github.com/tendermint/tendermint/types/time" -) - -type mockPeer struct { - service.Service - id p2p.ID -} - -func (mp mockPeer) FlushStop() {} -func (mp mockPeer) ID() p2p.ID { return mp.id } -func (mp mockPeer) RemoteIP() net.IP { return net.IP{} } -func (mp mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.RemoteIP(), Port: 8800} } - -func (mp mockPeer) IsOutbound() bool { return true } -func (mp mockPeer) IsPersistent() bool { return true } -func (mp mockPeer) CloseConn() error { return nil } - -func (mp mockPeer) NodeInfo() p2p.NodeInfo { - return p2p.DefaultNodeInfo{ - DefaultNodeID: "", - ListenAddr: "", - } -} -func (mp mockPeer) Status() conn.ConnectionStatus { return conn.ConnectionStatus{} } -func (mp mockPeer) SocketAddr() *p2p.NetAddress { return &p2p.NetAddress{} } - -func (mp mockPeer) Send(byte, []byte) bool { return true } -func (mp mockPeer) TrySend(byte, []byte) bool { return true } - -func (mp mockPeer) Set(string, interface{}) {} -func (mp mockPeer) Get(string) interface{} { return struct{}{} } - -// nolint:unused // ignore -type mockBlockStore struct { - blocks map[int64]*types.Block -} - -// nolint:unused // ignore -func (ml *mockBlockStore) Height() int64 { - return int64(len(ml.blocks)) -} - -// nolint:unused // ignore -func (ml *mockBlockStore) LoadBlock(height int64) *types.Block { - return ml.blocks[height] -} - -// nolint:unused // ignore -func (ml *mockBlockStore) SaveBlock(block *types.Block, part *types.PartSet, commit *types.Commit) { - ml.blocks[block.Height] = block -} - -type mockBlockApplier struct { -} - -// XXX: Add whitelist/blacklist? -func (mba *mockBlockApplier) ApplyBlock( - state sm.State, blockID types.BlockID, block *types.Block, -) (sm.State, int64, error) { - state.LastBlockHeight++ - return state, 0, nil -} - -type mockSwitchIo struct { - mtx sync.Mutex - switchedToConsensus bool - numStatusResponse int - numBlockResponse int - numNoBlockResponse int -} - -func (sio *mockSwitchIo) sendBlockRequest(peerID p2p.ID, height int64) error { - return nil -} - -func (sio *mockSwitchIo) sendStatusResponse(base, height int64, peerID p2p.ID) error { - sio.mtx.Lock() - defer sio.mtx.Unlock() - sio.numStatusResponse++ - return nil -} - -func (sio *mockSwitchIo) sendBlockToPeer(block *types.Block, peerID p2p.ID) error { - sio.mtx.Lock() - defer sio.mtx.Unlock() - sio.numBlockResponse++ - return nil -} - -func (sio *mockSwitchIo) sendBlockNotFound(height int64, peerID p2p.ID) error { - sio.mtx.Lock() - defer sio.mtx.Unlock() - sio.numNoBlockResponse++ - return nil -} - -func (sio *mockSwitchIo) trySwitchToConsensus(state sm.State, skipWAL bool) bool { - sio.mtx.Lock() - defer sio.mtx.Unlock() - sio.switchedToConsensus = true - return true -} - -func (sio *mockSwitchIo) broadcastStatusRequest() error { - return nil -} - -type testReactorParams struct { - logger log.Logger - genDoc *types.GenesisDoc - privVals []types.PrivValidator - startHeight int64 - mockA bool -} - -func newTestReactor(t *testing.T, p testReactorParams) *BlockchainReactor { - store, state, _ := newReactorStore(t, p.genDoc, p.privVals, p.startHeight) - reporter := behaviour.NewMockReporter() - - var appl blockApplier - - if p.mockA { - appl = &mockBlockApplier{} - } else { - mp := &mpmocks.Mempool{} - mp.On("Lock").Return() - mp.On("Unlock").Return() - mp.On("FlushAppConn", mock.Anything).Return(nil) - mp.On("Update", - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything).Return(nil) - - app := &testApp{} - cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) - err := proxyApp.Start() - if err != nil { - panic(fmt.Errorf("error start app: %w", err)) - } - db := dbm.NewMemDB() - stateStore := sm.NewStore(db) - appl = sm.NewBlockExecutor(stateStore, p.logger, proxyApp.Consensus(), mp, sm.EmptyEvidencePool{}) - if err = stateStore.Save(state); err != nil { - panic(err) - } - } - - r := newReactor(state, store, reporter, appl, true) - logger := log.TestingLogger() - r.SetLogger(logger.With("module", "blockchain")) - - return r -} - -// This test is left here and not deleted to retain the termination cases for -// future improvement in [#4482](https://github.com/tendermint/tendermint/issues/4482). -// func TestReactorTerminationScenarios(t *testing.T) { - -// config := cfg.ResetTestRoot("blockchain_reactor_v2_test") -// defer os.RemoveAll(config.RootDir) -// genDoc, privVals := randGenesisDoc(config.ChainID(), 1, false, 30) -// refStore, _, _ := newReactorStore(genDoc, privVals, 20) - -// params := testReactorParams{ -// logger: log.TestingLogger(), -// genDoc: genDoc, -// privVals: privVals, -// startHeight: 10, -// bufferSize: 100, -// mockA: true, -// } - -// type testEvent struct { -// evType string -// peer string -// height int64 -// } - -// tests := []struct { -// name string -// params testReactorParams -// msgs []testEvent -// }{ -// { -// name: "simple termination on max peer height - one peer", -// params: params, -// msgs: []testEvent{ -// {evType: "AddPeer", peer: "P1"}, -// {evType: "ReceiveS", peer: "P1", height: 13}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P1", height: 11}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P1", height: 12}, -// {evType: "Process"}, -// {evType: "ReceiveB", peer: "P1", height: 13}, -// {evType: "Process"}, -// }, -// }, -// { -// name: "simple termination on max peer height - two peers", -// params: params, -// msgs: []testEvent{ -// {evType: "AddPeer", peer: "P1"}, -// {evType: "AddPeer", peer: "P2"}, -// {evType: "ReceiveS", peer: "P1", height: 13}, -// {evType: "ReceiveS", peer: "P2", height: 15}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P1", height: 11}, -// {evType: "ReceiveB", peer: "P2", height: 12}, -// {evType: "Process"}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P1", height: 13}, -// {evType: "Process"}, -// {evType: "ReceiveB", peer: "P2", height: 14}, -// {evType: "Process"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 15}, -// {evType: "Process"}, -// }, -// }, -// { -// name: "termination on max peer height - two peers, noBlock error", -// params: params, -// msgs: []testEvent{ -// {evType: "AddPeer", peer: "P1"}, -// {evType: "AddPeer", peer: "P2"}, -// {evType: "ReceiveS", peer: "P1", height: 13}, -// {evType: "ReceiveS", peer: "P2", height: 15}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveNB", peer: "P1", height: 11}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 12}, -// {evType: "ReceiveB", peer: "P2", height: 11}, -// {evType: "Process"}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 13}, -// {evType: "Process"}, -// {evType: "ReceiveB", peer: "P2", height: 14}, -// {evType: "Process"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 15}, -// {evType: "Process"}, -// }, -// }, -// { -// name: "termination on max peer height - two peers, remove one peer", -// params: params, -// msgs: []testEvent{ -// {evType: "AddPeer", peer: "P1"}, -// {evType: "AddPeer", peer: "P2"}, -// {evType: "ReceiveS", peer: "P1", height: 13}, -// {evType: "ReceiveS", peer: "P2", height: 15}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "RemovePeer", peer: "P1"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 12}, -// {evType: "ReceiveB", peer: "P2", height: 11}, -// {evType: "Process"}, -// {evType: "BlockReq"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 13}, -// {evType: "Process"}, -// {evType: "ReceiveB", peer: "P2", height: 14}, -// {evType: "Process"}, -// {evType: "BlockReq"}, -// {evType: "ReceiveB", peer: "P2", height: 15}, -// {evType: "Process"}, -// }, -// }, -// } - -// for _, tt := range tests { -// tt := tt -// t.Run(tt.name, func(t *testing.T) { -// reactor := newTestReactor(params) -// reactor.Start() -// reactor.reporter = behavior.NewMockReporter() -// mockSwitch := &mockSwitchIo{switchedToConsensus: false} -// reactor.io = mockSwitch -// // time for go routines to start -// time.Sleep(time.Millisecond) - -// for _, step := range tt.msgs { -// switch step.evType { -// case "AddPeer": -// reactor.scheduler.send(bcAddNewPeer{peerID: p2p.ID(step.peer)}) -// case "RemovePeer": -// reactor.scheduler.send(bcRemovePeer{peerID: p2p.ID(step.peer)}) -// case "ReceiveS": -// reactor.scheduler.send(bcStatusResponse{ -// peerID: p2p.ID(step.peer), -// height: step.height, -// time: time.Now(), -// }) -// case "ReceiveB": -// reactor.scheduler.send(bcBlockResponse{ -// peerID: p2p.ID(step.peer), -// block: refStore.LoadBlock(step.height), -// size: 10, -// time: time.Now(), -// }) -// case "ReceiveNB": -// reactor.scheduler.send(bcNoBlockResponse{ -// peerID: p2p.ID(step.peer), -// height: step.height, -// time: time.Now(), -// }) -// case "BlockReq": -// reactor.scheduler.send(rTrySchedule{time: time.Now()}) -// case "Process": -// reactor.processor.send(rProcessBlock{}) -// } -// // give time for messages to propagate between routines -// time.Sleep(time.Millisecond) -// } - -// // time for processor to finish and reactor to switch to consensus -// time.Sleep(20 * time.Millisecond) -// assert.True(t, mockSwitch.hasSwitchedToConsensus()) -// reactor.Stop() -// }) -// } -// } - -func TestReactorHelperMode(t *testing.T) { - var ( - channelID = byte(0x40) - ) - - config := cfg.ResetTestRoot("blockchain_reactor_v2_test") - defer os.RemoveAll(config.RootDir) - genDoc, privVals := randGenesisDoc(config.ChainID(), 1, false, 30) - - params := testReactorParams{ - logger: log.TestingLogger(), - genDoc: genDoc, - privVals: privVals, - startHeight: 20, - mockA: true, - } - - type testEvent struct { - peer string - event interface{} - } - - tests := []struct { - name string - params testReactorParams - msgs []testEvent - }{ - { - name: "status request", - params: params, - msgs: []testEvent{ - {"P1", bcproto.StatusRequest{}}, - {"P1", bcproto.BlockRequest{Height: 13}}, - {"P1", bcproto.BlockRequest{Height: 20}}, - {"P1", bcproto.BlockRequest{Height: 22}}, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - reactor := newTestReactor(t, params) - mockSwitch := &mockSwitchIo{switchedToConsensus: false} - reactor.io = mockSwitch - err := reactor.Start() - require.NoError(t, err) - - for i := 0; i < len(tt.msgs); i++ { - step := tt.msgs[i] - switch ev := step.event.(type) { - case bcproto.StatusRequest: - old := mockSwitch.numStatusResponse - msg, err := bc.EncodeMsg(&ev) - assert.NoError(t, err) - reactor.Receive(channelID, mockPeer{id: p2p.ID(step.peer)}, msg) - assert.Equal(t, old+1, mockSwitch.numStatusResponse) - case bcproto.BlockRequest: - if ev.Height > params.startHeight { - old := mockSwitch.numNoBlockResponse - msg, err := bc.EncodeMsg(&ev) - assert.NoError(t, err) - reactor.Receive(channelID, mockPeer{id: p2p.ID(step.peer)}, msg) - assert.Equal(t, old+1, mockSwitch.numNoBlockResponse) - } else { - old := mockSwitch.numBlockResponse - msg, err := bc.EncodeMsg(&ev) - assert.NoError(t, err) - assert.NoError(t, err) - reactor.Receive(channelID, mockPeer{id: p2p.ID(step.peer)}, msg) - assert.Equal(t, old+1, mockSwitch.numBlockResponse) - } - } - } - err = reactor.Stop() - require.NoError(t, err) - }) - } -} - -func TestReactorSetSwitchNil(t *testing.T) { - config := cfg.ResetTestRoot("blockchain_reactor_v2_test") - defer os.RemoveAll(config.RootDir) - genDoc, privVals := randGenesisDoc(config.ChainID(), 1, false, 30) - - reactor := newTestReactor(t, testReactorParams{ - logger: log.TestingLogger(), - genDoc: genDoc, - privVals: privVals, - }) - reactor.SetSwitch(nil) - - assert.Nil(t, reactor.Switch) - assert.Nil(t, reactor.io) -} - -type testApp struct { - abci.BaseApplication -} - -func randGenesisDoc(chainID string, numValidators int, randPower bool, minPower int64) ( - *types.GenesisDoc, []types.PrivValidator) { - validators := make([]types.GenesisValidator, numValidators) - privValidators := make([]types.PrivValidator, numValidators) - for i := 0; i < numValidators; i++ { - val, privVal := types.RandValidator(randPower, minPower) - validators[i] = types.GenesisValidator{ - PubKey: val.PubKey, - Power: val.VotingPower, - } - privValidators[i] = privVal - } - sort.Sort(types.PrivValidatorsByAddress(privValidators)) - - return &types.GenesisDoc{ - GenesisTime: tmtime.Now(), - ChainID: chainID, - Validators: validators, - }, privValidators -} - -// Why are we importing the entire blockExecutor dependency graph here -// when we have the facilities to -func newReactorStore( - t *testing.T, - genDoc *types.GenesisDoc, - privVals []types.PrivValidator, - maxBlockHeight int64) (*store.BlockStore, sm.State, *sm.BlockExecutor) { - if len(privVals) != 1 { - panic("only support one validator") - } - app := &testApp{} - cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) - err := proxyApp.Start() - if err != nil { - panic(fmt.Errorf("error start app: %w", err)) - } - - stateDB := dbm.NewMemDB() - blockStore := store.NewBlockStore(dbm.NewMemDB()) - stateStore := sm.NewStore(stateDB) - state, err := stateStore.LoadFromDBOrGenesisDoc(genDoc) - if err != nil { - panic(fmt.Errorf("error constructing state from genesis file: %w", err)) - } - - mp := &mpmocks.Mempool{} - mp.On("Lock").Return() - mp.On("Unlock").Return() - mp.On("FlushAppConn", mock.Anything).Return(nil) - mp.On("Update", - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything, - mock.Anything).Return(nil) - - db := dbm.NewMemDB() - stateStore = sm.NewStore(db) - blockExec := sm.NewBlockExecutor(stateStore, log.TestingLogger(), proxyApp.Consensus(), - mp, sm.EmptyEvidencePool{}) - if err = stateStore.Save(state); err != nil { - panic(err) - } - - // add blocks in - for blockHeight := int64(1); blockHeight <= maxBlockHeight; blockHeight++ { - lastCommit := types.NewCommit(blockHeight-1, 0, types.BlockID{}, nil) - if blockHeight > 1 { - lastBlockMeta := blockStore.LoadBlockMeta(blockHeight - 1) - lastBlock := blockStore.LoadBlock(blockHeight - 1) - vote, err := types.MakeVote( - lastBlock.Header.Height, - lastBlockMeta.BlockID, - state.Validators, - privVals[0], - lastBlock.Header.ChainID, - time.Now(), - ) - if err != nil { - panic(err) - } - lastCommit = types.NewCommit(vote.Height, vote.Round, - lastBlockMeta.BlockID, []types.CommitSig{vote.CommitSig()}) - } - - thisBlock := sf.MakeBlock(state, blockHeight, lastCommit) - - thisParts, err := thisBlock.MakePartSet(types.BlockPartSizeBytes) - require.NoError(t, err) - - blockID := types.BlockID{Hash: thisBlock.Hash(), PartSetHeader: thisParts.Header()} - - state, _, err = blockExec.ApplyBlock(state, blockID, thisBlock) - if err != nil { - panic(fmt.Errorf("error apply block: %w", err)) - } - - blockStore.SaveBlock(thisBlock, thisParts, lastCommit) - } - return blockStore, state, blockExec -} diff --git a/blockchain/v2/routine.go b/blockchain/v2/routine.go deleted file mode 100644 index dad0e737f..000000000 --- a/blockchain/v2/routine.go +++ /dev/null @@ -1,166 +0,0 @@ -package v2 - -import ( - "fmt" - "strings" - "sync/atomic" - - "github.com/Workiva/go-datastructures/queue" - - "github.com/tendermint/tendermint/libs/log" -) - -type handleFunc = func(event Event) (Event, error) - -const historySize = 25 - -// Routine is a structure that models a finite state machine as serialized -// stream of events processed by a handle function. This Routine structure -// handles the concurrency and messaging guarantees. Events are sent via -// `send` are handled by the `handle` function to produce an iterator -// `next()`. Calling `stop()` on a routine will conclude processing of all -// sent events and produce `final()` event representing the terminal state. -type Routine struct { - name string - handle handleFunc - queue *queue.PriorityQueue - history []Event - out chan Event - fin chan error - rdy chan struct{} - running *uint32 - logger log.Logger - metrics *Metrics -} - -func newRoutine(name string, handleFunc handleFunc, bufferSize int) *Routine { - return &Routine{ - name: name, - handle: handleFunc, - queue: queue.NewPriorityQueue(bufferSize, true), - history: make([]Event, 0, historySize), - out: make(chan Event, bufferSize), - rdy: make(chan struct{}, 1), - fin: make(chan error, 1), - running: new(uint32), - logger: log.NewNopLogger(), - metrics: NopMetrics(), - } -} - -func (rt *Routine) setLogger(logger log.Logger) { - rt.logger = logger -} - -// nolint:unused -func (rt *Routine) setMetrics(metrics *Metrics) { - rt.metrics = metrics -} - -func (rt *Routine) start() { - rt.logger.Info("routine start", "msg", log.NewLazySprintf("%s: run", rt.name)) - running := atomic.CompareAndSwapUint32(rt.running, uint32(0), uint32(1)) - if !running { - panic(fmt.Sprintf("%s is already running", rt.name)) - } - close(rt.rdy) - defer func() { - if r := recover(); r != nil { - var ( - b strings.Builder - j int - ) - for i := len(rt.history) - 1; i >= 0; i-- { - fmt.Fprintf(&b, "%d: %+v\n", j, rt.history[i]) - j++ - } - panic(fmt.Sprintf("%v\nlast events:\n%v", r, b.String())) - } - stopped := atomic.CompareAndSwapUint32(rt.running, uint32(1), uint32(0)) - if !stopped { - panic(fmt.Sprintf("%s is failed to stop", rt.name)) - } - }() - - for { - events, err := rt.queue.Get(1) - if err == queue.ErrDisposed { - rt.terminate(nil) - return - } else if err != nil { - rt.terminate(err) - return - } - oEvent, err := rt.handle(events[0].(Event)) - rt.metrics.EventsHandled.With("routine", rt.name).Add(1) - if err != nil { - rt.terminate(err) - return - } - rt.metrics.EventsOut.With("routine", rt.name).Add(1) - rt.logger.Debug("routine start", "msg", log.NewLazySprintf("%s: produced %T %+v", rt.name, oEvent, oEvent)) - - // Skip rTrySchedule and rProcessBlock events as they clutter the history - // due to their frequency. - switch events[0].(type) { - case rTrySchedule: - case rProcessBlock: - default: - rt.history = append(rt.history, events[0].(Event)) - if len(rt.history) > historySize { - rt.history = rt.history[1:] - } - } - - rt.out <- oEvent - } -} - -// XXX: look into returning OpError in the net package -func (rt *Routine) send(event Event) bool { - rt.logger.Debug("routine send", "msg", log.NewLazySprintf("%s: received %T %+v", rt.name, event, event)) - if !rt.isRunning() { - return false - } - err := rt.queue.Put(event) - if err != nil { - rt.metrics.EventsShed.With("routine", rt.name).Add(1) - rt.logger.Error(fmt.Sprintf("%s: send failed, queue was full/stopped", rt.name)) - return false - } - - rt.metrics.EventsSent.With("routine", rt.name).Add(1) - return true -} - -func (rt *Routine) isRunning() bool { - return atomic.LoadUint32(rt.running) == 1 -} - -func (rt *Routine) next() chan Event { - return rt.out -} - -func (rt *Routine) ready() chan struct{} { - return rt.rdy -} - -func (rt *Routine) stop() { - if !rt.isRunning() { // XXX: this should check rt.queue.Disposed() - return - } - - rt.logger.Info("routine stop", "msg", log.NewLazySprintf("%s: stop", rt.name)) - rt.queue.Dispose() // this should block until all queue items are free? -} - -func (rt *Routine) final() chan error { - return rt.fin -} - -// XXX: Maybe get rid of this -func (rt *Routine) terminate(reason error) { - // We don't close the rt.out channel here, to avoid spinning on the closed channel - // in the event loop. - rt.fin <- reason -} diff --git a/blockchain/v2/routine_test.go b/blockchain/v2/routine_test.go deleted file mode 100644 index 8f92bee3e..000000000 --- a/blockchain/v2/routine_test.go +++ /dev/null @@ -1,163 +0,0 @@ -package v2 - -import ( - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/assert" -) - -type eventA struct { - priorityNormal -} - -var errDone = fmt.Errorf("done") - -func simpleHandler(event Event) (Event, error) { - if _, ok := event.(eventA); ok { - return noOp, errDone - } - return noOp, nil -} - -func TestRoutineFinal(t *testing.T) { - var ( - bufferSize = 10 - routine = newRoutine("simpleRoutine", simpleHandler, bufferSize) - ) - - assert.False(t, routine.isRunning(), - "expected an initialized routine to not be running") - go routine.start() - <-routine.ready() - assert.True(t, routine.isRunning(), - "expected an started routine") - - assert.True(t, routine.send(eventA{}), - "expected sending to a ready routine to succeed") - - assert.Equal(t, errDone, <-routine.final(), - "expected the final event to be done") - - assert.False(t, routine.isRunning(), - "expected an completed routine to no longer be running") -} - -func TestRoutineStop(t *testing.T) { - var ( - bufferSize = 10 - routine = newRoutine("simpleRoutine", simpleHandler, bufferSize) - ) - - assert.False(t, routine.send(eventA{}), - "expected sending to an unstarted routine to fail") - - go routine.start() - <-routine.ready() - - assert.True(t, routine.send(eventA{}), - "expected sending to a running routine to succeed") - - routine.stop() - - assert.False(t, routine.send(eventA{}), - "expected sending to a stopped routine to fail") -} - -type finalCount struct { - count int -} - -func (f finalCount) Error() string { - return "end" -} - -func genStatefulHandler(maxCount int) handleFunc { - counter := 0 - return func(event Event) (Event, error) { - if _, ok := event.(eventA); ok { - counter++ - if counter >= maxCount { - return noOp, finalCount{counter} - } - - return eventA{}, nil - } - return noOp, nil - } -} - -func feedback(r *Routine) { - for event := range r.next() { - r.send(event) - } -} - -func TestStatefulRoutine(t *testing.T) { - var ( - count = 10 - handler = genStatefulHandler(count) - bufferSize = 20 - routine = newRoutine("statefulRoutine", handler, bufferSize) - ) - - go routine.start() - go feedback(routine) - <-routine.ready() - - assert.True(t, routine.send(eventA{}), - "expected sending to a started routine to succeed") - - final := <-routine.final() - if fnl, ok := final.(finalCount); ok { - assert.Equal(t, count, fnl.count, - "expected the routine to count to 10") - } else { - t.Fail() - } -} - -type lowPriorityEvent struct { - priorityLow -} - -type highPriorityEvent struct { - priorityHigh -} - -func handleWithPriority(event Event) (Event, error) { - switch event.(type) { - case lowPriorityEvent: - return noOp, nil - case highPriorityEvent: - return noOp, errDone - } - return noOp, nil -} - -func TestPriority(t *testing.T) { - var ( - bufferSize = 20 - routine = newRoutine("priorityRoutine", handleWithPriority, bufferSize) - ) - - go routine.start() - <-routine.ready() - go func() { - for { - routine.send(lowPriorityEvent{}) - time.Sleep(1 * time.Millisecond) - } - }() - time.Sleep(10 * time.Millisecond) - - assert.True(t, routine.isRunning(), - "expected an started routine") - assert.True(t, routine.send(highPriorityEvent{}), - "expected send to succeed even when saturated") - - assert.Equal(t, errDone, <-routine.final()) - assert.False(t, routine.isRunning(), - "expected an started routine") -} diff --git a/blockchain/v2/scheduler.go b/blockchain/v2/scheduler.go deleted file mode 100644 index 75fe9d46d..000000000 --- a/blockchain/v2/scheduler.go +++ /dev/null @@ -1,712 +0,0 @@ -package v2 - -import ( - "bytes" - "errors" - "fmt" - "math" - "sort" - "time" - - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/types" -) - -// Events generated by the scheduler: -// all blocks have been processed -type scFinishedEv struct { - priorityNormal - reason string -} - -func (e scFinishedEv) String() string { - return fmt.Sprintf("scFinishedEv{%v}", e.reason) -} - -// send a blockRequest message -type scBlockRequest struct { - priorityNormal - peerID p2p.ID - height int64 -} - -func (e scBlockRequest) String() string { - return fmt.Sprintf("scBlockRequest{%d from %v}", e.height, e.peerID) -} - -// a block has been received and validated by the scheduler -type scBlockReceived struct { - priorityNormal - peerID p2p.ID - block *types.Block -} - -func (e scBlockReceived) String() string { - return fmt.Sprintf("scBlockReceived{%d#%X from %v}", e.block.Height, e.block.Hash(), e.peerID) -} - -// scheduler detected a peer error -type scPeerError struct { - priorityHigh - peerID p2p.ID - reason error -} - -func (e scPeerError) String() string { - return fmt.Sprintf("scPeerError{%v errored with %v}", e.peerID, e.reason) -} - -// scheduler removed a set of peers (timed out or slow peer) -type scPeersPruned struct { - priorityHigh - peers []p2p.ID -} - -func (e scPeersPruned) String() string { - return fmt.Sprintf("scPeersPruned{%v}", e.peers) -} - -// XXX: make this fatal? -// scheduler encountered a fatal error -type scSchedulerFail struct { - priorityHigh - reason error -} - -func (e scSchedulerFail) String() string { - return fmt.Sprintf("scSchedulerFail{%v}", e.reason) -} - -type blockState int - -const ( - blockStateUnknown blockState = iota + 1 // no known peer has this block - blockStateNew // indicates that a peer has reported having this block - blockStatePending // indicates that this block has been requested from a peer - blockStateReceived // indicates that this block has been received by a peer - blockStateProcessed // indicates that this block has been applied -) - -func (e blockState) String() string { - switch e { - case blockStateUnknown: - return "Unknown" - case blockStateNew: - return "New" - case blockStatePending: - return "Pending" - case blockStateReceived: - return "Received" - case blockStateProcessed: - return "Processed" - default: - return fmt.Sprintf("invalid blockState: %d", e) - } -} - -type peerState int - -const ( - peerStateNew = iota + 1 - peerStateReady - peerStateRemoved -) - -func (e peerState) String() string { - switch e { - case peerStateNew: - return "New" - case peerStateReady: - return "Ready" - case peerStateRemoved: - return "Removed" - default: - panic(fmt.Sprintf("unknown peerState: %d", e)) - } -} - -type scPeer struct { - peerID p2p.ID - - // initialized as New when peer is added, updated to Ready when statusUpdate is received, - // updated to Removed when peer is removed - state peerState - - base int64 // updated when statusResponse is received - height int64 // updated when statusResponse is received - lastTouched time.Time - lastRate int64 // last receive rate in bytes -} - -func (p scPeer) String() string { - return fmt.Sprintf("{state %v, base %d, height %d, lastTouched %v, lastRate %d, id %v}", - p.state, p.base, p.height, p.lastTouched, p.lastRate, p.peerID) -} - -func newScPeer(peerID p2p.ID) *scPeer { - return &scPeer{ - peerID: peerID, - state: peerStateNew, - base: -1, - height: -1, - lastTouched: time.Time{}, - } -} - -// The scheduler keep track of the state of each block and each peer. The -// scheduler will attempt to schedule new block requests with `trySchedule` -// events and remove slow peers with `tryPrune` events. -type scheduler struct { - initHeight int64 - - // next block that needs to be processed. All blocks with smaller height are - // in Processed state. - height int64 - - // lastAdvance tracks the last time a block execution happened. - // syncTimeout is the maximum time the scheduler waits to advance in the fast sync process before finishing. - // This covers the cases where there are no peers or all peers have a lower height. - lastAdvance time.Time - syncTimeout time.Duration - - // a map of peerID to scheduler specific peer struct `scPeer` used to keep - // track of peer specific state - peers map[p2p.ID]*scPeer - peerTimeout time.Duration // maximum response time from a peer otherwise prune - minRecvRate int64 // minimum receive rate from peer otherwise prune - - // the maximum number of blocks that should be New, Received or Pending at any point - // in time. This is used to enforce a limit on the blockStates map. - targetPending int - // a list of blocks to be scheduled (New), Pending or Received. Its length should be - // smaller than targetPending. - blockStates map[int64]blockState - - // a map of heights to the peer we are waiting a response from - pendingBlocks map[int64]p2p.ID - - // the time at which a block was put in blockStatePending - pendingTime map[int64]time.Time - - // a map of heights to the peers that put the block in blockStateReceived - receivedBlocks map[int64]p2p.ID -} - -func (sc scheduler) String() string { - return fmt.Sprintf("ih: %d, bst: %v, peers: %v, pblks: %v, ptm %v, rblks: %v", - sc.initHeight, sc.blockStates, sc.peers, sc.pendingBlocks, sc.pendingTime, sc.receivedBlocks) -} - -func newScheduler(initHeight int64, startTime time.Time) *scheduler { - sc := scheduler{ - initHeight: initHeight, - lastAdvance: startTime, - syncTimeout: 60 * time.Second, - height: initHeight, - blockStates: make(map[int64]blockState), - peers: make(map[p2p.ID]*scPeer), - pendingBlocks: make(map[int64]p2p.ID), - pendingTime: make(map[int64]time.Time), - receivedBlocks: make(map[int64]p2p.ID), - targetPending: 10, // TODO - pass as param - peerTimeout: 15 * time.Second, // TODO - pass as param - minRecvRate: 0, // int64(7680), TODO - pass as param - } - - return &sc -} - -func (sc *scheduler) ensurePeer(peerID p2p.ID) *scPeer { - if _, ok := sc.peers[peerID]; !ok { - sc.peers[peerID] = newScPeer(peerID) - } - return sc.peers[peerID] -} - -func (sc *scheduler) touchPeer(peerID p2p.ID, time time.Time) error { - peer, ok := sc.peers[peerID] - if !ok { - return fmt.Errorf("couldn't find peer %s", peerID) - } - - if peer.state != peerStateReady { - return fmt.Errorf("tried to touch peer in state %s, must be Ready", peer.state) - } - - peer.lastTouched = time - - return nil -} - -func (sc *scheduler) removePeer(peerID p2p.ID) { - peer, ok := sc.peers[peerID] - if !ok { - return - } - if peer.state == peerStateRemoved { - return - } - - for height, pendingPeerID := range sc.pendingBlocks { - if pendingPeerID == peerID { - sc.setStateAtHeight(height, blockStateNew) - delete(sc.pendingTime, height) - delete(sc.pendingBlocks, height) - } - } - - for height, rcvPeerID := range sc.receivedBlocks { - if rcvPeerID == peerID { - sc.setStateAtHeight(height, blockStateNew) - delete(sc.receivedBlocks, height) - } - } - - // remove the blocks from blockStates if the peer removal causes the max peer height to be lower. - peer.state = peerStateRemoved - maxPeerHeight := int64(0) - for _, otherPeer := range sc.peers { - if otherPeer.state != peerStateReady { - continue - } - if otherPeer.peerID != peer.peerID && otherPeer.height > maxPeerHeight { - maxPeerHeight = otherPeer.height - } - } - for h := range sc.blockStates { - if h > maxPeerHeight { - delete(sc.blockStates, h) - } - } -} - -// check if the blockPool is running low and add new blocks in New state to be requested. -// This function is called when there is an increase in the maximum peer height or when -// blocks are processed. -func (sc *scheduler) addNewBlocks() { - if len(sc.blockStates) >= sc.targetPending { - return - } - - for i := sc.height; i < int64(sc.targetPending)+sc.height; i++ { - if i > sc.maxHeight() { - break - } - if sc.getStateAtHeight(i) == blockStateUnknown { - sc.setStateAtHeight(i, blockStateNew) - } - } -} - -func (sc *scheduler) setPeerRange(peerID p2p.ID, base int64, height int64) error { - peer := sc.ensurePeer(peerID) - - if peer.state == peerStateRemoved { - return nil // noop - } - - if height < peer.height { - sc.removePeer(peerID) - return fmt.Errorf("cannot move peer height lower. from %d to %d", peer.height, height) - } - - if base > height { - sc.removePeer(peerID) - return fmt.Errorf("cannot set peer base higher than its height") - } - - peer.base = base - peer.height = height - peer.state = peerStateReady - - sc.addNewBlocks() - return nil -} - -func (sc *scheduler) getStateAtHeight(height int64) blockState { - if height < sc.height { - return blockStateProcessed - } else if state, ok := sc.blockStates[height]; ok { - return state - } else { - return blockStateUnknown - } -} - -func (sc *scheduler) getPeersWithHeight(height int64) []p2p.ID { - peers := make([]p2p.ID, 0) - for _, peer := range sc.peers { - if peer.state != peerStateReady { - continue - } - if peer.base <= height && peer.height >= height { - peers = append(peers, peer.peerID) - } - } - return peers -} - -func (sc *scheduler) prunablePeers(peerTimout time.Duration, minRecvRate int64, now time.Time) []p2p.ID { - prunable := make([]p2p.ID, 0) - for peerID, peer := range sc.peers { - if peer.state != peerStateReady { - continue - } - if now.Sub(peer.lastTouched) > peerTimout || peer.lastRate < minRecvRate { - prunable = append(prunable, peerID) - } - } - // Tests for handleTryPrunePeer() may fail without sort due to range non-determinism - sort.Sort(PeerByID(prunable)) - return prunable -} - -func (sc *scheduler) setStateAtHeight(height int64, state blockState) { - sc.blockStates[height] = state -} - -// CONTRACT: peer exists and in Ready state. -func (sc *scheduler) markReceived(peerID p2p.ID, height int64, size int64, now time.Time) error { - peer := sc.peers[peerID] - - if state := sc.getStateAtHeight(height); state != blockStatePending || sc.pendingBlocks[height] != peerID { - return fmt.Errorf("received block %d from peer %s without being requested", height, peerID) - } - - pendingTime, ok := sc.pendingTime[height] - if !ok || now.Sub(pendingTime) <= 0 { - return fmt.Errorf("clock error: block %d received at %s but requested at %s", - height, pendingTime, now) - } - - peer.lastRate = size / now.Sub(pendingTime).Nanoseconds() - - sc.setStateAtHeight(height, blockStateReceived) - delete(sc.pendingBlocks, height) - delete(sc.pendingTime, height) - - sc.receivedBlocks[height] = peerID - - return nil -} - -func (sc *scheduler) markPending(peerID p2p.ID, height int64, time time.Time) error { - state := sc.getStateAtHeight(height) - if state != blockStateNew { - return fmt.Errorf("block %d should be in blockStateNew but is %s", height, state) - } - - peer, ok := sc.peers[peerID] - if !ok { - return fmt.Errorf("cannot find peer %s", peerID) - } - - if peer.state != peerStateReady { - return fmt.Errorf("cannot schedule %d from %s in %s", height, peerID, peer.state) - } - - if height > peer.height { - return fmt.Errorf("cannot request height %d from peer %s that is at height %d", - height, peerID, peer.height) - } - - if height < peer.base { - return fmt.Errorf("cannot request height %d for peer %s with base %d", - height, peerID, peer.base) - } - - sc.setStateAtHeight(height, blockStatePending) - sc.pendingBlocks[height] = peerID - sc.pendingTime[height] = time - - return nil -} - -func (sc *scheduler) markProcessed(height int64) error { - // It is possible that a peer error or timeout is handled after the processor - // has processed the block but before the scheduler received this event, so - // when pcBlockProcessed event is received, the block had been requested - // again => don't check the block state. - sc.lastAdvance = time.Now() - sc.height = height + 1 - delete(sc.pendingBlocks, height) - delete(sc.pendingTime, height) - delete(sc.receivedBlocks, height) - delete(sc.blockStates, height) - sc.addNewBlocks() - return nil -} - -func (sc *scheduler) allBlocksProcessed() bool { - if len(sc.peers) == 0 { - return false - } - return sc.height >= sc.maxHeight() -} - -// returns max peer height or the last processed block, i.e. sc.height -func (sc *scheduler) maxHeight() int64 { - max := sc.height - 1 - for _, peer := range sc.peers { - if peer.state != peerStateReady { - continue - } - if max < peer.height { - max = peer.height - } - } - return max -} - -// lowest block in sc.blockStates with state == blockStateNew or -1 if no new blocks -func (sc *scheduler) nextHeightToSchedule() int64 { - var min int64 = math.MaxInt64 - for height, state := range sc.blockStates { - if state == blockStateNew && height < min { - min = height - } - } - if min == math.MaxInt64 { - min = -1 - } - return min -} - -func (sc *scheduler) pendingFrom(peerID p2p.ID) []int64 { - var heights []int64 - for height, pendingPeerID := range sc.pendingBlocks { - if pendingPeerID == peerID { - heights = append(heights, height) - } - } - return heights -} - -func (sc *scheduler) selectPeer(height int64) (p2p.ID, error) { - peers := sc.getPeersWithHeight(height) - if len(peers) == 0 { - return "", fmt.Errorf("cannot find peer for height %d", height) - } - - // create a map from number of pending requests to a list - // of peers having that number of pending requests. - pendingFrom := make(map[int][]p2p.ID) - for _, peerID := range peers { - numPending := len(sc.pendingFrom(peerID)) - pendingFrom[numPending] = append(pendingFrom[numPending], peerID) - } - - // find the set of peers with minimum number of pending requests. - var minPending int64 = math.MaxInt64 - for mp := range pendingFrom { - if int64(mp) < minPending { - minPending = int64(mp) - } - } - - sort.Sort(PeerByID(pendingFrom[int(minPending)])) - return pendingFrom[int(minPending)][0], nil -} - -// PeerByID is a list of peers sorted by peerID. -type PeerByID []p2p.ID - -func (peers PeerByID) Len() int { - return len(peers) -} -func (peers PeerByID) Less(i, j int) bool { - return bytes.Compare([]byte(peers[i]), []byte(peers[j])) == -1 -} - -func (peers PeerByID) Swap(i, j int) { - peers[i], peers[j] = peers[j], peers[i] -} - -// Handlers - -// This handler gets the block, performs some validation and then passes it on to the processor. -func (sc *scheduler) handleBlockResponse(event bcBlockResponse) (Event, error) { - err := sc.touchPeer(event.peerID, event.time) - if err != nil { - // peer does not exist OR not ready - return noOp, nil - } - - err = sc.markReceived(event.peerID, event.block.Height, event.size, event.time) - if err != nil { - sc.removePeer(event.peerID) - return scPeerError{peerID: event.peerID, reason: err}, nil - } - - return scBlockReceived{peerID: event.peerID, block: event.block}, nil -} - -func (sc *scheduler) handleNoBlockResponse(event bcNoBlockResponse) (Event, error) { - // No such peer or peer was removed. - peer, ok := sc.peers[event.peerID] - if !ok || peer.state == peerStateRemoved { - return noOp, nil - } - - // The peer may have been just removed due to errors, low speed or timeouts. - sc.removePeer(event.peerID) - - return scPeerError{peerID: event.peerID, - reason: fmt.Errorf("peer %v with base %d height %d claims no block for %d", - event.peerID, peer.base, peer.height, event.height)}, nil -} - -func (sc *scheduler) handleBlockProcessed(event pcBlockProcessed) (Event, error) { - if event.height != sc.height { - panic(fmt.Sprintf("processed height %d, but expected height %d", event.height, sc.height)) - } - - err := sc.markProcessed(event.height) - if err != nil { - return scSchedulerFail{reason: err}, nil - } - - if sc.allBlocksProcessed() { - return scFinishedEv{reason: "processed all blocks"}, nil - } - - return noOp, nil -} - -// Handles an error from the processor. The processor had already cleaned the blocks from -// the peers included in this event. Just attempt to remove the peers. -func (sc *scheduler) handleBlockProcessError(event pcBlockVerificationFailure) (Event, error) { - // The peers may have been just removed due to errors, low speed or timeouts. - sc.removePeer(event.firstPeerID) - if event.firstPeerID != event.secondPeerID { - sc.removePeer(event.secondPeerID) - } - - if sc.allBlocksProcessed() { - return scFinishedEv{reason: "error on last block"}, nil - } - - return noOp, nil -} - -func (sc *scheduler) handleAddNewPeer(event bcAddNewPeer) (Event, error) { - sc.ensurePeer(event.peerID) - return noOp, nil -} - -func (sc *scheduler) handleRemovePeer(event bcRemovePeer) (Event, error) { - sc.removePeer(event.peerID) - - if sc.allBlocksProcessed() { - return scFinishedEv{reason: "removed peer"}, nil - } - - // Return scPeerError so the peer (and all associated blocks) is removed from - // the processor. - return scPeerError{peerID: event.peerID, reason: errors.New("peer was stopped")}, nil -} - -func (sc *scheduler) handleTryPrunePeer(event rTryPrunePeer) (Event, error) { - // Check behavior of peer responsible to deliver block at sc.height. - timeHeightAsked, ok := sc.pendingTime[sc.height] - if ok && time.Since(timeHeightAsked) > sc.peerTimeout { - // A request was sent to a peer for block at sc.height but a response was not received - // from that peer within sc.peerTimeout. Remove the peer. This is to ensure that a peer - // will be timed out even if it sends blocks at higher heights but prevents progress by - // not sending the block at current height. - sc.removePeer(sc.pendingBlocks[sc.height]) - } - - prunablePeers := sc.prunablePeers(sc.peerTimeout, sc.minRecvRate, event.time) - if len(prunablePeers) == 0 { - return noOp, nil - } - for _, peerID := range prunablePeers { - sc.removePeer(peerID) - } - - // If all blocks are processed we should finish. - if sc.allBlocksProcessed() { - return scFinishedEv{reason: "after try prune"}, nil - } - - return scPeersPruned{peers: prunablePeers}, nil -} - -func (sc *scheduler) handleResetState(event bcResetState) (Event, error) { - initHeight := event.state.LastBlockHeight + 1 - if initHeight == 1 { - initHeight = event.state.InitialHeight - } - sc.initHeight = initHeight - sc.height = initHeight - sc.lastAdvance = time.Now() - sc.addNewBlocks() - return noOp, nil -} - -func (sc *scheduler) handleTrySchedule(event rTrySchedule) (Event, error) { - if time.Since(sc.lastAdvance) > sc.syncTimeout { - return scFinishedEv{reason: "timeout, no advance"}, nil - } - - nextHeight := sc.nextHeightToSchedule() - if nextHeight == -1 { - return noOp, nil - } - - bestPeerID, err := sc.selectPeer(nextHeight) - if err != nil { - return scSchedulerFail{reason: err}, nil - } - if err := sc.markPending(bestPeerID, nextHeight, event.time); err != nil { - return scSchedulerFail{reason: err}, nil // XXX: peerError might be more appropriate - } - return scBlockRequest{peerID: bestPeerID, height: nextHeight}, nil - -} - -func (sc *scheduler) handleStatusResponse(event bcStatusResponse) (Event, error) { - err := sc.setPeerRange(event.peerID, event.base, event.height) - if err != nil { - return scPeerError{peerID: event.peerID, reason: err}, nil - } - return noOp, nil -} - -func (sc *scheduler) handle(event Event) (Event, error) { - switch event := event.(type) { - case bcResetState: - nextEvent, err := sc.handleResetState(event) - return nextEvent, err - case bcStatusResponse: - nextEvent, err := sc.handleStatusResponse(event) - return nextEvent, err - case bcBlockResponse: - nextEvent, err := sc.handleBlockResponse(event) - return nextEvent, err - case bcNoBlockResponse: - nextEvent, err := sc.handleNoBlockResponse(event) - return nextEvent, err - case rTrySchedule: - nextEvent, err := sc.handleTrySchedule(event) - return nextEvent, err - case bcAddNewPeer: - nextEvent, err := sc.handleAddNewPeer(event) - return nextEvent, err - case bcRemovePeer: - nextEvent, err := sc.handleRemovePeer(event) - return nextEvent, err - case rTryPrunePeer: - nextEvent, err := sc.handleTryPrunePeer(event) - return nextEvent, err - case pcBlockProcessed: - nextEvent, err := sc.handleBlockProcessed(event) - return nextEvent, err - case pcBlockVerificationFailure: - nextEvent, err := sc.handleBlockProcessError(event) - return nextEvent, err - default: - return scSchedulerFail{reason: fmt.Errorf("unknown event %v", event)}, nil - } -} diff --git a/blockchain/v2/scheduler_test.go b/blockchain/v2/scheduler_test.go deleted file mode 100644 index 5d4ebd976..000000000 --- a/blockchain/v2/scheduler_test.go +++ /dev/null @@ -1,2249 +0,0 @@ -package v2 - -import ( - "fmt" - "math" - "sort" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -type scTestParams struct { - peers map[string]*scPeer - initHeight int64 - height int64 - allB []int64 - pending map[int64]p2p.ID - pendingTime map[int64]time.Time - received map[int64]p2p.ID - peerTimeout time.Duration - minRecvRate int64 - targetPending int - startTime time.Time - syncTimeout time.Duration -} - -func verifyScheduler(sc *scheduler) { - missing := 0 - if sc.maxHeight() >= sc.height { - missing = int(math.Min(float64(sc.targetPending), float64(sc.maxHeight()-sc.height+1))) - } - if len(sc.blockStates) != missing { - panic(fmt.Sprintf("scheduler block length %d different than target %d", len(sc.blockStates), missing)) - } -} - -func newTestScheduler(params scTestParams) *scheduler { - peers := make(map[p2p.ID]*scPeer) - var maxHeight int64 - - initHeight := params.initHeight - if initHeight == 0 { - initHeight = 1 - } - sc := newScheduler(initHeight, params.startTime) - if params.height != 0 { - sc.height = params.height - } - - for id, peer := range params.peers { - peer.peerID = p2p.ID(id) - peers[p2p.ID(id)] = peer - if maxHeight < peer.height { - maxHeight = peer.height - } - } - for _, h := range params.allB { - sc.blockStates[h] = blockStateNew - } - for h, pid := range params.pending { - sc.blockStates[h] = blockStatePending - sc.pendingBlocks[h] = pid - } - for h, tm := range params.pendingTime { - sc.pendingTime[h] = tm - } - for h, pid := range params.received { - sc.blockStates[h] = blockStateReceived - sc.receivedBlocks[h] = pid - } - - sc.peers = peers - sc.peerTimeout = params.peerTimeout - if params.syncTimeout == 0 { - sc.syncTimeout = 10 * time.Second - } else { - sc.syncTimeout = params.syncTimeout - } - - if params.targetPending == 0 { - sc.targetPending = 10 - } else { - sc.targetPending = params.targetPending - } - - sc.minRecvRate = params.minRecvRate - - verifyScheduler(sc) - - return sc -} - -func TestScInit(t *testing.T) { - var ( - initHeight int64 = 5 - sc = newScheduler(initHeight, time.Now()) - ) - assert.Equal(t, blockStateProcessed, sc.getStateAtHeight(initHeight-1)) - assert.Equal(t, blockStateUnknown, sc.getStateAtHeight(initHeight)) - assert.Equal(t, blockStateUnknown, sc.getStateAtHeight(initHeight+1)) -} - -func TestScMaxHeights(t *testing.T) { - - tests := []struct { - name string - sc scheduler - wantMax int64 - }{ - { - name: "no peers", - sc: scheduler{height: 11}, - wantMax: 10, - }, - { - name: "one ready peer", - sc: scheduler{ - height: 3, - peers: map[p2p.ID]*scPeer{"P1": {height: 6, state: peerStateReady}}, - }, - wantMax: 6, - }, - { - name: "ready and removed peers", - sc: scheduler{ - height: 1, - peers: map[p2p.ID]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 10, state: peerStateRemoved}}, - }, - wantMax: 4, - }, - { - name: "removed peers", - sc: scheduler{ - height: 1, - peers: map[p2p.ID]*scPeer{ - "P1": {height: 4, state: peerStateRemoved}, - "P2": {height: 10, state: peerStateRemoved}}, - }, - wantMax: 0, - }, - { - name: "new peers", - sc: scheduler{ - height: 1, - peers: map[p2p.ID]*scPeer{ - "P1": {base: -1, height: -1, state: peerStateNew}, - "P2": {base: -1, height: -1, state: peerStateNew}}, - }, - wantMax: 0, - }, - { - name: "mixed peers", - sc: scheduler{ - height: 1, - peers: map[p2p.ID]*scPeer{ - "P1": {height: -1, state: peerStateNew}, - "P2": {height: 10, state: peerStateReady}, - "P3": {height: 20, state: peerStateRemoved}, - "P4": {height: 22, state: peerStateReady}, - }, - }, - wantMax: 22, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - // maxHeight() should not mutate the scheduler - wantSc := tt.sc - - resMax := tt.sc.maxHeight() - assert.Equal(t, tt.wantMax, resMax) - assert.Equal(t, wantSc, tt.sc) - }) - } -} - -func TestScEnsurePeer(t *testing.T) { - - type args struct { - peerID p2p.ID - } - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - }{ - { - name: "add first peer", - fields: scTestParams{}, - args: args{peerID: "P1"}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {base: -1, height: -1, state: peerStateNew}}}, - }, - { - name: "add second peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {base: -1, height: -1, state: peerStateNew}}}, - args: args{peerID: "P2"}, - wantFields: scTestParams{peers: map[string]*scPeer{ - "P1": {base: -1, height: -1, state: peerStateNew}, - "P2": {base: -1, height: -1, state: peerStateNew}}}, - }, - { - name: "add duplicate peer is fine", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1}}}, - args: args{peerID: "P1"}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1}}}, - }, - { - name: "add duplicate peer with existing peer in Ready state is noop", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {state: peerStateReady, height: 3}}, - allB: []int64{1, 2, 3}, - }, - args: args{peerID: "P1"}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {state: peerStateReady, height: 3}}, - allB: []int64{1, 2, 3}, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - sc.ensurePeer(tt.args.peerID) - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc, "wanted peers %v, got %v", wantSc.peers, sc.peers) - }) - } -} - -func TestScTouchPeer(t *testing.T) { - now := time.Now() - - type args struct { - peerID p2p.ID - time time.Time - } - - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - wantErr bool - }{ - { - name: "attempt to touch non existing peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {state: peerStateReady, height: 5}}, - allB: []int64{1, 2, 3, 4, 5}, - }, - args: args{peerID: "P2", time: now}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {state: peerStateReady, height: 5}}, - allB: []int64{1, 2, 3, 4, 5}, - }, - wantErr: true, - }, - { - name: "attempt to touch peer in state New", - fields: scTestParams{peers: map[string]*scPeer{"P1": {}}}, - args: args{peerID: "P1", time: now}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {}}}, - wantErr: true, - }, - { - name: "attempt to touch peer in state Removed", - fields: scTestParams{peers: map[string]*scPeer{"P1": {state: peerStateRemoved}, "P2": {state: peerStateReady}}}, - args: args{peerID: "P1", time: now}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {state: peerStateRemoved}, "P2": {state: peerStateReady}}}, - wantErr: true, - }, - { - name: "touch peer in state Ready", - fields: scTestParams{peers: map[string]*scPeer{"P1": {state: peerStateReady, lastTouched: now}}}, - args: args{peerID: "P1", time: now.Add(3 * time.Second)}, - wantFields: scTestParams{peers: map[string]*scPeer{ - "P1": {state: peerStateReady, lastTouched: now.Add(3 * time.Second)}}}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - if err := sc.touchPeer(tt.args.peerID, tt.args.time); (err != nil) != tt.wantErr { - t.Errorf("touchPeer() wantErr %v, error = %v", tt.wantErr, err) - } - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc, "wanted peers %v, got %v", wantSc.peers, sc.peers) - }) - } -} - -func TestScPrunablePeers(t *testing.T) { - now := time.Now() - - type args struct { - threshold time.Duration - time time.Time - minSpeed int64 - } - - tests := []struct { - name string - fields scTestParams - args args - wantResult []p2p.ID - }{ - { - name: "no peers", - fields: scTestParams{peers: map[string]*scPeer{}}, - args: args{threshold: time.Second, time: now.Add(time.Second + time.Millisecond), minSpeed: 100}, - wantResult: []p2p.ID{}, - }, - { - name: "mixed peers", - fields: scTestParams{peers: map[string]*scPeer{ - // X - removed, active, fast - "P1": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 101}, - // X - ready, active, fast - "P2": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 101}, - // X - removed, active, equal - "P3": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 100}, - // V - ready, inactive, equal - "P4": {state: peerStateReady, lastTouched: now, lastRate: 100}, - // V - ready, inactive, slow - "P5": {state: peerStateReady, lastTouched: now, lastRate: 99}, - // V - ready, active, slow - "P6": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 90}, - }}, - args: args{threshold: time.Second, time: now.Add(time.Second + time.Millisecond), minSpeed: 100}, - wantResult: []p2p.ID{"P4", "P5", "P6"}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - // peersSlowerThan should not mutate the scheduler - wantSc := sc - res := sc.prunablePeers(tt.args.threshold, tt.args.minSpeed, tt.args.time) - assert.Equal(t, tt.wantResult, res) - assert.Equal(t, wantSc, sc) - }) - } -} - -func TestScRemovePeer(t *testing.T) { - - type args struct { - peerID p2p.ID - } - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - wantErr bool - }{ - { - name: "remove non existing peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1}}}, - args: args{peerID: "P2"}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1}}}, - }, - { - name: "remove single New peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1}}}, - args: args{peerID: "P1"}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1, state: peerStateRemoved}}}, - }, - { - name: "remove one of two New peers", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1}, "P2": {height: -1}}}, - args: args{peerID: "P1"}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1, state: peerStateRemoved}, "P2": {height: -1}}}, - }, - { - name: "remove one Ready peer, all peers removed", - fields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 10, state: peerStateRemoved}, - "P2": {height: 5, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5}, - }, - args: args{peerID: "P2"}, - wantFields: scTestParams{peers: map[string]*scPeer{ - "P1": {height: 10, state: peerStateRemoved}, - "P2": {height: 5, state: peerStateRemoved}}, - }, - }, - { - name: "attempt to remove already removed peer", - fields: scTestParams{ - height: 8, - peers: map[string]*scPeer{ - "P1": {height: 10, state: peerStateRemoved}, - "P2": {height: 11, state: peerStateReady}}, - allB: []int64{8, 9, 10, 11}, - }, - args: args{peerID: "P1"}, - wantFields: scTestParams{ - height: 8, - peers: map[string]*scPeer{ - "P1": {height: 10, state: peerStateRemoved}, - "P2": {height: 11, state: peerStateReady}}, - allB: []int64{8, 9, 10, 11}}, - }, - { - name: "remove Ready peer with blocks requested", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady}}, - allB: []int64{1, 2, 3}, - pending: map[int64]p2p.ID{1: "P1"}, - }, - args: args{peerID: "P1"}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateRemoved}}, - allB: []int64{}, - pending: map[int64]p2p.ID{}, - }, - }, - { - name: "remove Ready peer with blocks received", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady}}, - allB: []int64{1, 2, 3}, - received: map[int64]p2p.ID{1: "P1"}, - }, - args: args{peerID: "P1"}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateRemoved}}, - allB: []int64{}, - received: map[int64]p2p.ID{}, - }, - }, - { - name: "remove Ready peer with blocks received and requested (not yet received)", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{1: "P1", 3: "P1"}, - received: map[int64]p2p.ID{2: "P1", 4: "P1"}, - }, - args: args{peerID: "P1"}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateRemoved}}, - allB: []int64{}, - pending: map[int64]p2p.ID{}, - received: map[int64]p2p.ID{}, - }, - }, - { - name: "remove Ready peer from multiple peers set, with blocks received and requested (not yet received)", - fields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 6, state: peerStateReady}, - "P2": {height: 6, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4, 5, 6}, - pending: map[int64]p2p.ID{1: "P1", 3: "P2", 6: "P1"}, - received: map[int64]p2p.ID{2: "P1", 4: "P2", 5: "P2"}, - }, - args: args{peerID: "P1"}, - wantFields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 6, state: peerStateRemoved}, - "P2": {height: 6, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4, 5, 6}, - pending: map[int64]p2p.ID{3: "P2"}, - received: map[int64]p2p.ID{4: "P2", 5: "P2"}, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - sc.removePeer(tt.args.peerID) - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc, "wanted peers %v, got %v", wantSc.peers, sc.peers) - }) - } -} - -func TestScSetPeerRange(t *testing.T) { - - type args struct { - peerID p2p.ID - base int64 - height int64 - } - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - wantErr bool - }{ - { - name: "change height of non existing peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - args: args{peerID: "P2", height: 4}, - wantFields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 2, state: peerStateReady}, - "P2": {height: 4, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4}}, - }, - { - name: "increase height of removed peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - args: args{peerID: "P1", height: 4}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - }, - { - name: "decrease height of single peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - args: args{peerID: "P1", height: 2}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateRemoved}}, - allB: []int64{}}, - wantErr: true, - }, - { - name: "increase height of single peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - args: args{peerID: "P1", height: 4}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - }, - { - name: "noop height change of single peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - args: args{peerID: "P1", height: 4}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - }, - { - name: "add peer with huge height 10**10 ", - fields: scTestParams{ - peers: map[string]*scPeer{"P2": {height: -1, state: peerStateNew}}, - targetPending: 4, - }, - args: args{peerID: "P2", height: 10000000000}, - wantFields: scTestParams{ - targetPending: 4, - peers: map[string]*scPeer{"P2": {height: 10000000000, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - }, - { - name: "add peer with base > height should error", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - args: args{peerID: "P1", base: 6, height: 5}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateRemoved}}}, - wantErr: true, - }, - { - name: "add peer with base == height is fine", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateNew}}, - targetPending: 4, - }, - args: args{peerID: "P1", base: 6, height: 6}, - wantFields: scTestParams{ - targetPending: 4, - peers: map[string]*scPeer{"P1": {base: 6, height: 6, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - err := sc.setPeerRange(tt.args.peerID, tt.args.base, tt.args.height) - if (err != nil) != tt.wantErr { - t.Errorf("setPeerHeight() wantErr %v, error = %v", tt.wantErr, err) - } - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc, "wanted peers %v, got %v", wantSc.peers, sc.peers) - }) - } -} - -func TestScGetPeersWithHeight(t *testing.T) { - - type args struct { - height int64 - } - tests := []struct { - name string - fields scTestParams - args args - wantResult []p2p.ID - }{ - { - name: "no peers", - fields: scTestParams{peers: map[string]*scPeer{}}, - args: args{height: 10}, - wantResult: []p2p.ID{}, - }, - { - name: "only new peers", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1, state: peerStateNew}}}, - args: args{height: 10}, - wantResult: []p2p.ID{}, - }, - { - name: "only Removed peers", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 4, state: peerStateRemoved}}}, - args: args{height: 2}, - wantResult: []p2p.ID{}, - }, - { - name: "one Ready shorter peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 5}, - wantResult: []p2p.ID{}, - }, - { - name: "one Ready equal peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 4}, - wantResult: []p2p.ID{"P1"}, - }, - { - name: "one Ready higher peer", - fields: scTestParams{ - targetPending: 4, - peers: map[string]*scPeer{"P1": {height: 20, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 4}, - wantResult: []p2p.ID{"P1"}, - }, - { - name: "one Ready higher peer at base", - fields: scTestParams{ - targetPending: 4, - peers: map[string]*scPeer{"P1": {base: 4, height: 20, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 4}, - wantResult: []p2p.ID{"P1"}, - }, - { - name: "one Ready higher peer with higher base", - fields: scTestParams{ - targetPending: 4, - peers: map[string]*scPeer{"P1": {base: 10, height: 20, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 4}, - wantResult: []p2p.ID{}, - }, - { - name: "multiple mixed peers", - fields: scTestParams{ - height: 8, - peers: map[string]*scPeer{ - "P1": {height: -1, state: peerStateNew}, - "P2": {height: 10, state: peerStateReady}, - "P3": {height: 5, state: peerStateReady}, - "P4": {height: 20, state: peerStateRemoved}, - "P5": {height: 11, state: peerStateReady}}, - allB: []int64{8, 9, 10, 11}, - }, - args: args{height: 8}, - wantResult: []p2p.ID{"P2", "P5"}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - // getPeersWithHeight should not mutate the scheduler - wantSc := sc - res := sc.getPeersWithHeight(tt.args.height) - sort.Sort(PeerByID(res)) - assert.Equal(t, tt.wantResult, res) - assert.Equal(t, wantSc, sc) - }) - } -} - -func TestScMarkPending(t *testing.T) { - now := time.Now() - - type args struct { - peerID p2p.ID - height int64 - tm time.Time - } - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - wantErr bool - }{ - { - name: "attempt mark pending an unknown block above height", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - args: args{peerID: "P1", height: 3, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - wantErr: true, - }, - { - name: "attempt mark pending an unknown block below base", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {base: 4, height: 6, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6}}, - args: args{peerID: "P1", height: 3, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {base: 4, height: 6, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6}}, - wantErr: true, - }, - { - name: "attempt mark pending from non existing peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - args: args{peerID: "P2", height: 1, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - wantErr: true, - }, - { - name: "mark pending from Removed peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - args: args{peerID: "P1", height: 1, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - wantErr: true, - }, - { - name: "mark pending from New peer", - fields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 4, state: peerStateNew}, - }, - allB: []int64{1, 2, 3, 4}, - }, - args: args{peerID: "P2", height: 2, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 4, state: peerStateNew}, - }, - allB: []int64{1, 2, 3, 4}, - }, - wantErr: true, - }, - { - name: "mark pending from short peer", - fields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 2, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4}, - }, - args: args{peerID: "P2", height: 3, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 2, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4}, - }, - wantErr: true, - }, - { - name: "mark pending all good", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{1: "P1"}, - pendingTime: map[int64]time.Time{1: now}, - }, - args: args{peerID: "P1", height: 2, tm: now.Add(time.Millisecond)}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1"}, - pendingTime: map[int64]time.Time{1: now, 2: now.Add(time.Millisecond)}, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - if err := sc.markPending(tt.args.peerID, tt.args.height, tt.args.tm); (err != nil) != tt.wantErr { - t.Errorf("markPending() wantErr %v, error = %v", tt.wantErr, err) - } - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc) - }) - } -} - -func TestScMarkReceived(t *testing.T) { - now := time.Now() - - type args struct { - peerID p2p.ID - height int64 - size int64 - tm time.Time - } - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - wantErr bool - }{ - { - name: "received from non existing peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - args: args{peerID: "P2", height: 1, size: 1000, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - wantErr: true, - }, - { - name: "received from removed peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - args: args{peerID: "P1", height: 1, size: 1000, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - wantErr: true, - }, - { - name: "received from unsolicited peer", - fields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 4, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{1: "P1", 2: "P2", 3: "P2", 4: "P1"}, - }, - args: args{peerID: "P1", height: 2, size: 1000, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 4, state: peerStateReady}, - }, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{1: "P1", 2: "P2", 3: "P2", 4: "P1"}, - }, - wantErr: true, - }, - { - name: "received but blockRequest not sent", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{}, - }, - args: args{peerID: "P1", height: 2, size: 1000, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{}, - }, - wantErr: true, - }, - { - name: "received with bad timestamp", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1"}, - pendingTime: map[int64]time.Time{1: now, 2: now.Add(time.Second)}, - }, - args: args{peerID: "P1", height: 2, size: 1000, tm: now}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1"}, - pendingTime: map[int64]time.Time{1: now, 2: now.Add(time.Second)}, - }, - wantErr: true, - }, - { - name: "received all good", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1"}, - pendingTime: map[int64]time.Time{1: now, 2: now}, - }, - args: args{peerID: "P1", height: 2, size: 1000, tm: now.Add(time.Millisecond)}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{1: "P1"}, - pendingTime: map[int64]time.Time{1: now}, - received: map[int64]p2p.ID{2: "P1"}, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - if err := sc.markReceived( - tt.args.peerID, - tt.args.height, - tt.args.size, - now.Add(time.Second)); (err != nil) != tt.wantErr { - t.Errorf("markReceived() wantErr %v, error = %v", tt.wantErr, err) - } - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc) - }) - } -} - -func TestScMarkProcessed(t *testing.T) { - now := time.Now() - - type args struct { - height int64 - } - tests := []struct { - name string - fields scTestParams - args args - wantFields scTestParams - wantErr bool - }{ - { - name: "processed an unreceived block", - fields: scTestParams{ - height: 2, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{2}, - pending: map[int64]p2p.ID{2: "P1"}, - pendingTime: map[int64]time.Time{2: now}, - targetPending: 1, - }, - args: args{height: 2}, - wantFields: scTestParams{ - height: 3, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{3}, - targetPending: 1, - }, - }, - { - name: "mark processed success", - fields: scTestParams{ - height: 1, - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - pending: map[int64]p2p.ID{2: "P1"}, - pendingTime: map[int64]time.Time{2: now}, - received: map[int64]p2p.ID{1: "P1"}}, - args: args{height: 1}, - wantFields: scTestParams{ - height: 2, - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{2}, - pending: map[int64]p2p.ID{2: "P1"}, - pendingTime: map[int64]time.Time{2: now}}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - oldBlockState := sc.getStateAtHeight(tt.args.height) - if err := sc.markProcessed(tt.args.height); (err != nil) != tt.wantErr { - t.Errorf("markProcessed() wantErr %v, error = %v", tt.wantErr, err) - } - if tt.wantErr { - assert.Equal(t, oldBlockState, sc.getStateAtHeight(tt.args.height)) - } else { - assert.Equal(t, blockStateProcessed, sc.getStateAtHeight(tt.args.height)) - } - wantSc := newTestScheduler(tt.wantFields) - checkSameScheduler(t, wantSc, sc) - }) - } -} - -func TestScResetState(t *testing.T) { - tests := []struct { - name string - fields scTestParams - state state.State - wantFields scTestParams - }{ - { - name: "updates height and initHeight", - fields: scTestParams{ - height: 0, - initHeight: 0, - }, - state: state.State{LastBlockHeight: 7}, - wantFields: scTestParams{ - height: 8, - initHeight: 8, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - e, err := sc.handleResetState(bcResetState{state: tt.state}) - require.NoError(t, err) - assert.Equal(t, e, noOp) - wantSc := newTestScheduler(tt.wantFields) - checkSameScheduler(t, wantSc, sc) - }) - } -} - -func TestScAllBlocksProcessed(t *testing.T) { - now := time.Now() - - tests := []struct { - name string - fields scTestParams - wantResult bool - }{ - { - name: "no blocks, no peers", - fields: scTestParams{}, - wantResult: false, - }, - { - name: "only New blocks", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - wantResult: false, - }, - { - name: "only Pending blocks", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1", 4: "P1"}, - pendingTime: map[int64]time.Time{1: now, 2: now, 3: now, 4: now}, - }, - wantResult: false, - }, - { - name: "only Received blocks", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - received: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1", 4: "P1"}, - }, - wantResult: false, - }, - { - name: "only Processed blocks plus highest is received", - fields: scTestParams{ - height: 4, - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}}, - allB: []int64{4}, - received: map[int64]p2p.ID{4: "P1"}, - }, - wantResult: true, - }, - { - name: "mixed block states", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{2: "P1", 4: "P1"}, - pendingTime: map[int64]time.Time{2: now, 4: now}, - }, - wantResult: false, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - // allBlocksProcessed() should not mutate the scheduler - wantSc := sc - res := sc.allBlocksProcessed() - assert.Equal(t, tt.wantResult, res) - checkSameScheduler(t, wantSc, sc) - }) - } -} - -func TestScNextHeightToSchedule(t *testing.T) { - now := time.Now() - - tests := []struct { - name string - fields scTestParams - wantHeight int64 - }{ - { - name: "no blocks", - fields: scTestParams{initHeight: 11, height: 11}, - wantHeight: -1, - }, - { - name: "only New blocks", - fields: scTestParams{ - initHeight: 3, - peers: map[string]*scPeer{"P1": {height: 6, state: peerStateReady}}, - allB: []int64{3, 4, 5, 6}, - }, - wantHeight: 3, - }, - { - name: "only Pending blocks", - fields: scTestParams{ - initHeight: 1, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1", 4: "P1"}, - pendingTime: map[int64]time.Time{1: now, 2: now, 3: now, 4: now}, - }, - wantHeight: -1, - }, - { - name: "only Received blocks", - fields: scTestParams{ - initHeight: 1, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - received: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1", 4: "P1"}, - }, - wantHeight: -1, - }, - { - name: "only Processed blocks", - fields: scTestParams{ - initHeight: 1, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - wantHeight: 1, - }, - { - name: "mixed block states", - fields: scTestParams{ - initHeight: 1, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - pending: map[int64]p2p.ID{2: "P1"}, - pendingTime: map[int64]time.Time{2: now}, - }, - wantHeight: 1, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - // nextHeightToSchedule() should not mutate the scheduler - wantSc := sc - - resMin := sc.nextHeightToSchedule() - assert.Equal(t, tt.wantHeight, resMin) - checkSameScheduler(t, wantSc, sc) - }) - } -} - -func TestScSelectPeer(t *testing.T) { - - type args struct { - height int64 - } - tests := []struct { - name string - fields scTestParams - args args - wantResult p2p.ID - wantError bool - }{ - { - name: "no peers", - fields: scTestParams{peers: map[string]*scPeer{}}, - args: args{height: 10}, - wantResult: "", - wantError: true, - }, - { - name: "only new peers", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: -1, state: peerStateNew}}}, - args: args{height: 10}, - wantResult: "", - wantError: true, - }, - { - name: "only Removed peers", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 4, state: peerStateRemoved}}}, - args: args{height: 2}, - wantResult: "", - wantError: true, - }, - { - name: "one Ready shorter peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 5}, - wantResult: "", - wantError: true, - }, - { - name: "one Ready equal peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}, - }, - args: args{height: 4}, - wantResult: "P1", - }, - { - name: "one Ready higher peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 6, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6}, - }, - args: args{height: 4}, - wantResult: "P1", - }, - { - name: "one Ready higher peer with higher base", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {base: 4, height: 6, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6}, - }, - args: args{height: 3}, - wantResult: "", - wantError: true, - }, - { - name: "many Ready higher peers with different number of pending requests", - fields: scTestParams{ - height: 4, - peers: map[string]*scPeer{ - "P1": {height: 8, state: peerStateReady}, - "P2": {height: 9, state: peerStateReady}}, - allB: []int64{4, 5, 6, 7, 8, 9}, - pending: map[int64]p2p.ID{ - 4: "P1", 6: "P1", - 5: "P2", - }, - }, - args: args{height: 4}, - wantResult: "P2", - }, - { - name: "many Ready higher peers with same number of pending requests", - fields: scTestParams{ - peers: map[string]*scPeer{ - "P2": {height: 20, state: peerStateReady}, - "P1": {height: 15, state: peerStateReady}, - "P3": {height: 15, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - pending: map[int64]p2p.ID{ - 1: "P1", 2: "P1", - 3: "P3", 4: "P3", - 5: "P2", 6: "P2", - }, - }, - args: args{height: 7}, - wantResult: "P1", - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - // selectPeer should not mutate the scheduler - wantSc := sc - res, err := sc.selectPeer(tt.args.height) - assert.Equal(t, tt.wantResult, res) - assert.Equal(t, tt.wantError, err != nil) - checkSameScheduler(t, wantSc, sc) - }) - } -} - -// makeScBlock makes an empty block. -func makeScBlock(height int64) *types.Block { - return &types.Block{Header: types.Header{Height: height}} -} - -// used in place of assert.Equal(t, want, actual) to avoid failures due to -// scheduler.lastAdvanced timestamp inequalities. -func checkSameScheduler(t *testing.T, want *scheduler, actual *scheduler) { - assert.Equal(t, want.initHeight, actual.initHeight) - assert.Equal(t, want.height, actual.height) - assert.Equal(t, want.peers, actual.peers) - assert.Equal(t, want.blockStates, actual.blockStates) - assert.Equal(t, want.pendingBlocks, actual.pendingBlocks) - assert.Equal(t, want.pendingTime, actual.pendingTime) - assert.Equal(t, want.blockStates, actual.blockStates) - assert.Equal(t, want.receivedBlocks, actual.receivedBlocks) - assert.Equal(t, want.blockStates, actual.blockStates) -} - -// checkScResults checks scheduler handler test results -func checkScResults(t *testing.T, wantErr bool, err error, wantEvent Event, event Event) { - if (err != nil) != wantErr { - t.Errorf("error = %v, wantErr %v", err, wantErr) - return - } - switch wantEvent := wantEvent.(type) { - case scPeerError: - assert.Equal(t, wantEvent.peerID, event.(scPeerError).peerID) - assert.Equal(t, wantEvent.reason != nil, event.(scPeerError).reason != nil) - case scBlockReceived: - assert.Equal(t, wantEvent.peerID, event.(scBlockReceived).peerID) - assert.Equal(t, wantEvent.block, event.(scBlockReceived).block) - case scSchedulerFail: - assert.Equal(t, wantEvent.reason != nil, event.(scSchedulerFail).reason != nil) - } -} - -func TestScHandleBlockResponse(t *testing.T) { - now := time.Now() - block6FromP1 := bcBlockResponse{ - time: now.Add(time.Millisecond), - peerID: p2p.ID("P1"), - size: 100, - block: makeScBlock(6), - } - - type args struct { - event bcBlockResponse - } - - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "empty scheduler", - fields: scTestParams{}, - args: args{event: block6FromP1}, - wantEvent: noOpEvent{}, - }, - { - name: "block from removed peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 8, state: peerStateRemoved}}}, - args: args{event: block6FromP1}, - wantEvent: noOpEvent{}, - }, - { - name: "block we haven't asked for", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}}, - args: args{event: block6FromP1}, - wantEvent: scPeerError{peerID: "P1", reason: fmt.Errorf("some error")}, - }, - { - name: "block from wrong peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P2": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{6: "P2"}, - pendingTime: map[int64]time.Time{6: now}, - }, - args: args{event: block6FromP1}, - wantEvent: noOpEvent{}, - }, - { - name: "block with bad timestamp", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{6: "P1"}, - pendingTime: map[int64]time.Time{6: now.Add(time.Second)}, - }, - args: args{event: block6FromP1}, - wantEvent: scPeerError{peerID: "P1", reason: fmt.Errorf("some error")}, - }, - { - name: "good block, accept", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{6: "P1"}, - pendingTime: map[int64]time.Time{6: now}, - }, - args: args{event: block6FromP1}, - wantEvent: scBlockReceived{peerID: "P1", block: makeScBlock(6)}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleBlockResponse(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandleNoBlockResponse(t *testing.T) { - now := time.Now() - noBlock6FromP1 := bcNoBlockResponse{ - time: now.Add(time.Millisecond), - peerID: p2p.ID("P1"), - height: 6, - } - - tests := []struct { - name string - fields scTestParams - wantEvent Event - wantFields scTestParams - wantErr bool - }{ - { - name: "empty scheduler", - fields: scTestParams{}, - wantEvent: noOpEvent{}, - wantFields: scTestParams{}, - }, - { - name: "noBlock from removed peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 8, state: peerStateRemoved}}}, - wantEvent: noOpEvent{}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: 8, state: peerStateRemoved}}}, - }, - { - name: "for block we haven't asked for", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}}, - wantEvent: scPeerError{peerID: "P1", reason: fmt.Errorf("some error")}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: 8, state: peerStateRemoved}}}, - }, - { - name: "noBlock from peer we don't have", - fields: scTestParams{ - peers: map[string]*scPeer{"P2": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{6: "P2"}, - pendingTime: map[int64]time.Time{6: now}, - }, - wantEvent: noOpEvent{}, - wantFields: scTestParams{ - peers: map[string]*scPeer{"P2": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{6: "P2"}, - pendingTime: map[int64]time.Time{6: now}, - }, - }, - { - name: "noBlock from existing peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{6: "P1"}, - pendingTime: map[int64]time.Time{6: now}, - }, - wantEvent: scPeerError{peerID: "P1", reason: fmt.Errorf("some error")}, - wantFields: scTestParams{peers: map[string]*scPeer{"P1": {height: 8, state: peerStateRemoved}}}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleNoBlockResponse(noBlock6FromP1) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - wantSc := newTestScheduler(tt.wantFields) - assert.Equal(t, wantSc, sc) - }) - } -} - -func TestScHandleBlockProcessed(t *testing.T) { - now := time.Now() - processed6FromP1 := pcBlockProcessed{ - peerID: p2p.ID("P1"), - height: 6, - } - - type args struct { - event pcBlockProcessed - } - - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "empty scheduler", - fields: scTestParams{height: 6}, - args: args{event: processed6FromP1}, - wantEvent: noOpEvent{}, - }, - { - name: "processed block we don't have", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{6, 7, 8}, - pending: map[int64]p2p.ID{6: "P1"}, - pendingTime: map[int64]time.Time{6: now}, - }, - args: args{event: processed6FromP1}, - wantEvent: noOpEvent{}, - }, - { - name: "processed block ok, we processed all blocks", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 7, state: peerStateReady}}, - allB: []int64{6, 7}, - received: map[int64]p2p.ID{6: "P1", 7: "P1"}, - }, - args: args{event: processed6FromP1}, - wantEvent: scFinishedEv{}, - }, - { - name: "processed block ok, we still have blocks to process", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{6, 7, 8}, - pending: map[int64]p2p.ID{7: "P1", 8: "P1"}, - received: map[int64]p2p.ID{6: "P1"}, - }, - args: args{event: processed6FromP1}, - wantEvent: noOpEvent{}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleBlockProcessed(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandleBlockVerificationFailure(t *testing.T) { - now := time.Now() - - type args struct { - event pcBlockVerificationFailure - } - - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "empty scheduler", - fields: scTestParams{}, - args: args{event: pcBlockVerificationFailure{height: 10, firstPeerID: "P1", secondPeerID: "P1"}}, - wantEvent: noOpEvent{}, - }, - { - name: "failed block we don't have, single peer is still removed", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{6, 7, 8}, - pending: map[int64]p2p.ID{6: "P1"}, - pendingTime: map[int64]time.Time{6: now}, - }, - args: args{event: pcBlockVerificationFailure{height: 10, firstPeerID: "P1", secondPeerID: "P1"}}, - wantEvent: scFinishedEv{}, - }, - { - name: "failed block we don't have, one of two peers are removed", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}, "P2": {height: 8, state: peerStateReady}}, - allB: []int64{6, 7, 8}, - pending: map[int64]p2p.ID{6: "P1"}, - pendingTime: map[int64]time.Time{6: now}, - }, - args: args{event: pcBlockVerificationFailure{height: 10, firstPeerID: "P1", secondPeerID: "P1"}}, - wantEvent: noOpEvent{}, - }, - { - name: "failed block, all blocks are processed after removal", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 7, state: peerStateReady}}, - allB: []int64{6, 7}, - received: map[int64]p2p.ID{6: "P1", 7: "P1"}, - }, - args: args{event: pcBlockVerificationFailure{height: 7, firstPeerID: "P1", secondPeerID: "P1"}}, - wantEvent: scFinishedEv{}, - }, - { - name: "failed block, we still have blocks to process", - fields: scTestParams{ - initHeight: 5, - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}, "P2": {height: 8, state: peerStateReady}}, - allB: []int64{5, 6, 7, 8}, - pending: map[int64]p2p.ID{7: "P1", 8: "P1"}, - received: map[int64]p2p.ID{5: "P1", 6: "P1"}, - }, - args: args{event: pcBlockVerificationFailure{height: 5, firstPeerID: "P1", secondPeerID: "P1"}}, - wantEvent: noOpEvent{}, - }, - { - name: "failed block, H+1 and H+2 delivered by different peers, we still have blocks to process", - fields: scTestParams{ - initHeight: 5, - peers: map[string]*scPeer{ - "P1": {height: 8, state: peerStateReady}, - "P2": {height: 8, state: peerStateReady}, - "P3": {height: 8, state: peerStateReady}, - }, - allB: []int64{5, 6, 7, 8}, - pending: map[int64]p2p.ID{7: "P1", 8: "P1"}, - received: map[int64]p2p.ID{5: "P1", 6: "P1"}, - }, - args: args{event: pcBlockVerificationFailure{height: 5, firstPeerID: "P1", secondPeerID: "P2"}}, - wantEvent: noOpEvent{}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleBlockProcessError(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandleAddNewPeer(t *testing.T) { - addP1 := bcAddNewPeer{ - peerID: p2p.ID("P1"), - } - type args struct { - event bcAddNewPeer - } - - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "add P1 to empty scheduler", - fields: scTestParams{}, - args: args{event: addP1}, - wantEvent: noOpEvent{}, - }, - { - name: "add duplicate peer", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P1": {height: 8, state: peerStateReady}}, - allB: []int64{6, 7, 8}, - }, - args: args{event: addP1}, - wantEvent: noOpEvent{}, - }, - { - name: "add P1 to non empty scheduler", - fields: scTestParams{ - initHeight: 6, - peers: map[string]*scPeer{"P2": {height: 8, state: peerStateReady}}, - allB: []int64{6, 7, 8}, - }, - args: args{event: addP1}, - wantEvent: noOpEvent{}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleAddNewPeer(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandleTryPrunePeer(t *testing.T) { - now := time.Now() - - pruneEv := rTryPrunePeer{ - time: now.Add(time.Second + time.Millisecond), - } - type args struct { - event rTryPrunePeer - } - - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "no peers", - fields: scTestParams{}, - args: args{event: pruneEv}, - wantEvent: noOpEvent{}, - }, - { - name: "no prunable peers", - fields: scTestParams{ - minRecvRate: 100, - peers: map[string]*scPeer{ - // X - removed, active, fast - "P1": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 101}, - // X - ready, active, fast - "P2": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 101}, - // X - removed, active, equal - "P3": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 100}}, - peerTimeout: time.Second, - }, - args: args{event: pruneEv}, - wantEvent: noOpEvent{}, - }, - { - name: "mixed peers", - fields: scTestParams{ - minRecvRate: 100, - peers: map[string]*scPeer{ - // X - removed, active, fast - "P1": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 101, height: 5}, - // X - ready, active, fast - "P2": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 101, height: 5}, - // X - removed, active, equal - "P3": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 100, height: 5}, - // V - ready, inactive, equal - "P4": {state: peerStateReady, lastTouched: now, lastRate: 100, height: 7}, - // V - ready, inactive, slow - "P5": {state: peerStateReady, lastTouched: now, lastRate: 99, height: 7}, - // V - ready, active, slow - "P6": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 90, height: 7}, - }, - allB: []int64{1, 2, 3, 4, 5, 6, 7}, - peerTimeout: time.Second}, - args: args{event: pruneEv}, - wantEvent: scPeersPruned{peers: []p2p.ID{"P4", "P5", "P6"}}, - }, - { - name: "mixed peers, finish after pruning", - fields: scTestParams{ - minRecvRate: 100, - height: 6, - peers: map[string]*scPeer{ - // X - removed, active, fast - "P1": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 101, height: 5}, - // X - ready, active, fast - "P2": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 101, height: 5}, - // X - removed, active, equal - "P3": {state: peerStateRemoved, lastTouched: now.Add(time.Second), lastRate: 100, height: 5}, - // V - ready, inactive, equal - "P4": {state: peerStateReady, lastTouched: now, lastRate: 100, height: 7}, - // V - ready, inactive, slow - "P5": {state: peerStateReady, lastTouched: now, lastRate: 99, height: 7}, - // V - ready, active, slow - "P6": {state: peerStateReady, lastTouched: now.Add(time.Second), lastRate: 90, height: 7}, - }, - allB: []int64{6, 7}, - peerTimeout: time.Second}, - args: args{event: pruneEv}, - wantEvent: scFinishedEv{}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleTryPrunePeer(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandleTrySchedule(t *testing.T) { - now := time.Now() - tryEv := rTrySchedule{ - time: now.Add(time.Second + time.Millisecond), - } - - type args struct { - event rTrySchedule - } - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "no peers", - fields: scTestParams{startTime: now, peers: map[string]*scPeer{}}, - args: args{event: tryEv}, - wantEvent: noOpEvent{}, - }, - { - name: "only new peers", - fields: scTestParams{startTime: now, peers: map[string]*scPeer{"P1": {height: -1, state: peerStateNew}}}, - args: args{event: tryEv}, - wantEvent: noOpEvent{}, - }, - { - name: "only Removed peers", - fields: scTestParams{startTime: now, peers: map[string]*scPeer{"P1": {height: 4, state: peerStateRemoved}}}, - args: args{event: tryEv}, - wantEvent: noOpEvent{}, - }, - { - name: "one Ready shorter peer", - fields: scTestParams{ - startTime: now, - height: 6, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}}, - args: args{event: tryEv}, - wantEvent: noOpEvent{}, - }, - { - name: "one Ready equal peer", - fields: scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 4, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4}}, - args: args{event: tryEv}, - wantEvent: scBlockRequest{peerID: "P1", height: 1}, - }, - { - name: "many Ready higher peers with different number of pending requests", - fields: scTestParams{ - startTime: now, - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady}, - "P2": {height: 5, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5}, - pending: map[int64]p2p.ID{ - 1: "P1", 2: "P1", - 3: "P2", - }, - }, - args: args{event: tryEv}, - wantEvent: scBlockRequest{peerID: "P2", height: 4}, - }, - - { - name: "many Ready higher peers with same number of pending requests", - fields: scTestParams{ - startTime: now, - peers: map[string]*scPeer{ - "P2": {height: 8, state: peerStateReady}, - "P1": {height: 8, state: peerStateReady}, - "P3": {height: 8, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6, 7, 8}, - pending: map[int64]p2p.ID{ - 1: "P1", 2: "P1", - 3: "P3", 4: "P3", - 5: "P2", 6: "P2", - }, - }, - args: args{event: tryEv}, - wantEvent: scBlockRequest{peerID: "P1", height: 7}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleTrySchedule(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandleStatusResponse(t *testing.T) { - now := time.Now() - statusRespP1Ev := bcStatusResponse{ - time: now.Add(time.Second + time.Millisecond), - peerID: "P1", - height: 6, - } - - type args struct { - event bcStatusResponse - } - tests := []struct { - name string - fields scTestParams - args args - wantEvent Event - wantErr bool - }{ - { - name: "change height of non existing peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P2": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}, - }, - args: args{event: statusRespP1Ev}, - wantEvent: noOpEvent{}, - }, - - { - name: "increase height of removed peer", - fields: scTestParams{peers: map[string]*scPeer{"P1": {height: 2, state: peerStateRemoved}}}, - args: args{event: statusRespP1Ev}, - wantEvent: noOpEvent{}, - }, - - { - name: "decrease height of single peer", - fields: scTestParams{ - height: 5, - peers: map[string]*scPeer{"P1": {height: 10, state: peerStateReady}}, - allB: []int64{5, 6, 7, 8, 9, 10}, - }, - args: args{event: statusRespP1Ev}, - wantEvent: scPeerError{peerID: "P1", reason: fmt.Errorf("some error")}, - }, - - { - name: "increase height of single peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 2, state: peerStateReady}}, - allB: []int64{1, 2}}, - args: args{event: statusRespP1Ev}, - wantEvent: noOpEvent{}, - }, - { - name: "noop height change of single peer", - fields: scTestParams{ - peers: map[string]*scPeer{"P1": {height: 6, state: peerStateReady}}, - allB: []int64{1, 2, 3, 4, 5, 6}}, - args: args{event: statusRespP1Ev}, - wantEvent: noOpEvent{}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - sc := newTestScheduler(tt.fields) - event, err := sc.handleStatusResponse(tt.args.event) - checkScResults(t, tt.wantErr, err, tt.wantEvent, event) - }) - } -} - -func TestScHandle(t *testing.T) { - now := time.Now() - - type unknownEv struct { - priorityNormal - } - - t0 := time.Now() - tick := make([]time.Time, 100) - for i := range tick { - tick[i] = t0.Add(time.Duration(i) * time.Millisecond) - } - - type args struct { - event Event - } - type scStep struct { - currentSc *scTestParams - args args - wantEvent Event - wantErr bool - wantSc *scTestParams - } - tests := []struct { - name string - steps []scStep - }{ - { - name: "unknown event", - steps: []scStep{ - { // add P1 - currentSc: &scTestParams{}, - args: args{event: unknownEv{}}, - wantEvent: scSchedulerFail{reason: fmt.Errorf("some error")}, - wantSc: &scTestParams{}, - }, - }, - }, - { - name: "single peer, sync 3 blocks", - steps: []scStep{ - { // add P1 - currentSc: &scTestParams{startTime: now, peers: map[string]*scPeer{}, height: 1}, - args: args{event: bcAddNewPeer{peerID: "P1"}}, - wantEvent: noOpEvent{}, - wantSc: &scTestParams{startTime: now, peers: map[string]*scPeer{ - "P1": {base: -1, height: -1, state: peerStateNew}}, height: 1}, - }, - { // set height of P1 - args: args{event: bcStatusResponse{peerID: "P1", time: tick[0], height: 3}}, - wantEvent: noOpEvent{}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady}}, - allB: []int64{1, 2, 3}, - height: 1, - }, - }, - { // schedule block 1 - args: args{event: rTrySchedule{time: tick[1]}}, - wantEvent: scBlockRequest{peerID: "P1", height: 1}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady}}, - allB: []int64{1, 2, 3}, - pending: map[int64]p2p.ID{1: "P1"}, - pendingTime: map[int64]time.Time{1: tick[1]}, - height: 1, - }, - }, - { // schedule block 2 - args: args{event: rTrySchedule{time: tick[2]}}, - wantEvent: scBlockRequest{peerID: "P1", height: 2}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady}}, - allB: []int64{1, 2, 3}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1"}, - pendingTime: map[int64]time.Time{1: tick[1], 2: tick[2]}, - height: 1, - }, - }, - { // schedule block 3 - args: args{event: rTrySchedule{time: tick[3]}}, - wantEvent: scBlockRequest{peerID: "P1", height: 3}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady}}, - allB: []int64{1, 2, 3}, - pending: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1"}, - pendingTime: map[int64]time.Time{1: tick[1], 2: tick[2], 3: tick[3]}, - height: 1, - }, - }, - { // block response 1 - args: args{event: bcBlockResponse{peerID: "P1", time: tick[4], size: 100, block: makeScBlock(1)}}, - wantEvent: scBlockReceived{peerID: "P1", block: makeScBlock(1)}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady, lastTouched: tick[4]}}, - allB: []int64{1, 2, 3}, - pending: map[int64]p2p.ID{2: "P1", 3: "P1"}, - pendingTime: map[int64]time.Time{2: tick[2], 3: tick[3]}, - received: map[int64]p2p.ID{1: "P1"}, - height: 1, - }, - }, - { // block response 2 - args: args{event: bcBlockResponse{peerID: "P1", time: tick[5], size: 100, block: makeScBlock(2)}}, - wantEvent: scBlockReceived{peerID: "P1", block: makeScBlock(2)}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady, lastTouched: tick[5]}}, - allB: []int64{1, 2, 3}, - pending: map[int64]p2p.ID{3: "P1"}, - pendingTime: map[int64]time.Time{3: tick[3]}, - received: map[int64]p2p.ID{1: "P1", 2: "P1"}, - height: 1, - }, - }, - { // block response 3 - args: args{event: bcBlockResponse{peerID: "P1", time: tick[6], size: 100, block: makeScBlock(3)}}, - wantEvent: scBlockReceived{peerID: "P1", block: makeScBlock(3)}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady, lastTouched: tick[6]}}, - allB: []int64{1, 2, 3}, - received: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1"}, - height: 1, - }, - }, - { // processed block 1 - args: args{event: pcBlockProcessed{peerID: p2p.ID("P1"), height: 1}}, - wantEvent: noOpEvent{}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady, lastTouched: tick[6]}}, - allB: []int64{2, 3}, - received: map[int64]p2p.ID{2: "P1", 3: "P1"}, - height: 2, - }, - }, - { // processed block 2 - args: args{event: pcBlockProcessed{peerID: p2p.ID("P1"), height: 2}}, - wantEvent: scFinishedEv{}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{"P1": {height: 3, state: peerStateReady, lastTouched: tick[6]}}, - allB: []int64{3}, - received: map[int64]p2p.ID{3: "P1"}, - height: 3, - }, - }, - }, - }, - { - name: "block verification failure", - steps: []scStep{ - { // failure processing block 1 - currentSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateReady, lastTouched: tick[6]}, - "P2": {height: 3, state: peerStateReady, lastTouched: tick[6]}}, - allB: []int64{1, 2, 3, 4}, - received: map[int64]p2p.ID{1: "P1", 2: "P1", 3: "P1"}, - height: 1, - }, - args: args{event: pcBlockVerificationFailure{height: 1, firstPeerID: "P1", secondPeerID: "P1"}}, - wantEvent: noOpEvent{}, - wantSc: &scTestParams{ - startTime: now, - peers: map[string]*scPeer{ - "P1": {height: 4, state: peerStateRemoved, lastTouched: tick[6]}, - "P2": {height: 3, state: peerStateReady, lastTouched: tick[6]}}, - allB: []int64{1, 2, 3}, - received: map[int64]p2p.ID{}, - height: 1, - }, - }, - }, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - var sc *scheduler - for i, step := range tt.steps { - // First step must always initialize the currentState as state. - if step.currentSc != nil { - sc = newTestScheduler(*step.currentSc) - } - if sc == nil { - panic("Bad (initial?) step") - } - - nextEvent, err := sc.handle(step.args.event) - wantSc := newTestScheduler(*step.wantSc) - - t.Logf("step %d(%v): %s", i, step.args.event, sc) - checkSameScheduler(t, wantSc, sc) - - checkScResults(t, step.wantErr, err, step.wantEvent, nextEvent) - - // Next step may use the wantedState as their currentState. - sc = newTestScheduler(*step.wantSc) - } - }) - } -} diff --git a/blockchain/v2/types.go b/blockchain/v2/types.go deleted file mode 100644 index 7a73728e4..000000000 --- a/blockchain/v2/types.go +++ /dev/null @@ -1,65 +0,0 @@ -package v2 - -import ( - "github.com/Workiva/go-datastructures/queue" -) - -// Event is the type that can be added to the priority queue. -type Event queue.Item - -type priority interface { - Compare(other queue.Item) int - Priority() int -} - -type priorityLow struct{} -type priorityNormal struct{} -type priorityHigh struct{} - -func (p priorityLow) Priority() int { - return 1 -} - -func (p priorityNormal) Priority() int { - return 2 -} - -func (p priorityHigh) Priority() int { - return 3 -} - -func (p priorityLow) Compare(other queue.Item) int { - op := other.(priority) - if p.Priority() > op.Priority() { - return 1 - } else if p.Priority() == op.Priority() { - return 0 - } - return -1 -} - -func (p priorityNormal) Compare(other queue.Item) int { - op := other.(priority) - if p.Priority() > op.Priority() { - return 1 - } else if p.Priority() == op.Priority() { - return 0 - } - return -1 -} - -func (p priorityHigh) Compare(other queue.Item) int { - op := other.(priority) - if p.Priority() > op.Priority() { - return 1 - } else if p.Priority() == op.Priority() { - return 0 - } - return -1 -} - -type noOpEvent struct { - priorityLow -} - -var noOp = noOpEvent{} diff --git a/cmd/tendermint/commands/debug/dump.go b/cmd/tendermint/commands/debug/dump.go index 678f70791..67025ea5f 100644 --- a/cmd/tendermint/commands/debug/dump.go +++ b/cmd/tendermint/commands/debug/dump.go @@ -3,7 +3,6 @@ package debug import ( "errors" "fmt" - "io/ioutil" "os" "path/filepath" "time" @@ -82,7 +81,7 @@ func dumpCmdHandler(_ *cobra.Command, args []string) error { func dumpDebugData(outDir string, conf *cfg.Config, rpc *rpchttp.HTTP) { start := time.Now().UTC() - tmpDir, err := ioutil.TempDir(outDir, "tendermint_debug_tmp") + tmpDir, err := os.MkdirTemp(outDir, "tendermint_debug_tmp") if err != nil { logger.Error("failed to create temporary directory", "dir", tmpDir, "error", err) return diff --git a/cmd/tendermint/commands/debug/io.go b/cmd/tendermint/commands/debug/io.go index dcfff50c8..bf904cf5c 100644 --- a/cmd/tendermint/commands/debug/io.go +++ b/cmd/tendermint/commands/debug/io.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "os" "path" "path/filepath" @@ -111,5 +110,5 @@ func writeStateJSONToFile(state interface{}, dir, filename string) error { return fmt.Errorf("failed to encode state dump: %w", err) } - return ioutil.WriteFile(path.Join(dir, filename), stateJSON, os.ModePerm) + return os.WriteFile(path.Join(dir, filename), stateJSON, os.ModePerm) } diff --git a/cmd/tendermint/commands/debug/kill.go b/cmd/tendermint/commands/debug/kill.go index a2c7a5fe1..6801746b9 100644 --- a/cmd/tendermint/commands/debug/kill.go +++ b/cmd/tendermint/commands/debug/kill.go @@ -3,7 +3,6 @@ package debug import ( "errors" "fmt" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -56,7 +55,7 @@ func killCmdHandler(cmd *cobra.Command, args []string) error { // Create a temporary directory which will contain all the state dumps and // relevant files and directories that will be compressed into a file. - tmpDir, err := ioutil.TempDir(os.TempDir(), "tendermint_debug_tmp") + tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp") if err != nil { return fmt.Errorf("failed to create temporary directory: %w", err) } diff --git a/cmd/tendermint/commands/debug/util.go b/cmd/tendermint/commands/debug/util.go index 226bfadc7..4c1fa2e75 100644 --- a/cmd/tendermint/commands/debug/util.go +++ b/cmd/tendermint/commands/debug/util.go @@ -3,7 +3,7 @@ package debug import ( "context" "fmt" - "io/ioutil" + "io" "net/http" "os" "path" @@ -73,10 +73,10 @@ func dumpProfile(dir, addr, profile string, debug int) error { } defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) if err != nil { return fmt.Errorf("failed to read %s profile response body: %w", profile, err) } - return ioutil.WriteFile(path.Join(dir, fmt.Sprintf("%s.out", profile)), body, os.ModePerm) + return os.WriteFile(path.Join(dir, fmt.Sprintf("%s.out", profile)), body, os.ModePerm) } diff --git a/cmd/tendermint/commands/reset.go b/cmd/tendermint/commands/reset.go index dd022060b..86beb6a56 100644 --- a/cmd/tendermint/commands/reset.go +++ b/cmd/tendermint/commands/reset.go @@ -29,7 +29,7 @@ var ResetStateCmd = &cobra.Command{ Short: "Remove all the data and WAL", PreRun: deprecateSnakeCase, RunE: func(cmd *cobra.Command, args []string) (err error) { - config, err = ParseConfig() + config, err = ParseConfig(cmd) if err != nil { return err } @@ -54,7 +54,7 @@ var ResetPrivValidatorCmd = &cobra.Command{ // XXX: this is totally unsafe. // it's only suitable for testnets. func resetAllCmd(cmd *cobra.Command, args []string) (err error) { - config, err = ParseConfig() + config, err = ParseConfig(cmd) if err != nil { return err } @@ -71,7 +71,7 @@ func resetAllCmd(cmd *cobra.Command, args []string) (err error) { // XXX: this is totally unsafe. // it's only suitable for testnets. func resetPrivValidator(cmd *cobra.Command, args []string) (err error) { - config, err = ParseConfig() + config, err = ParseConfig(cmd) if err != nil { return err } diff --git a/cmd/tendermint/commands/root.go b/cmd/tendermint/commands/root.go index 2478f95a5..46c9ac7c7 100644 --- a/cmd/tendermint/commands/root.go +++ b/cmd/tendermint/commands/root.go @@ -29,12 +29,25 @@ func registerFlagsRootCmd(cmd *cobra.Command) { // ParseConfig retrieves the default environment configuration, // sets up the Tendermint root and ensures that the root exists -func ParseConfig() (*cfg.Config, error) { +func ParseConfig(cmd *cobra.Command) (*cfg.Config, error) { conf := cfg.DefaultConfig() err := viper.Unmarshal(conf) if err != nil { return nil, err } + + var home string + if os.Getenv("TMHOME") != "" { + home = os.Getenv("TMHOME") + } else { + home, err = cmd.Flags().GetString(cli.HomeFlag) + if err != nil { + return nil, err + } + } + + conf.RootDir = home + conf.SetRoot(conf.RootDir) cfg.EnsureRoot(conf.RootDir) if err := conf.ValidateBasic(); err != nil { @@ -52,7 +65,7 @@ var RootCmd = &cobra.Command{ return nil } - config, err = ParseConfig() + config, err = ParseConfig(cmd) if err != nil { return err } diff --git a/cmd/tendermint/commands/root_test.go b/cmd/tendermint/commands/root_test.go index d1e5964b2..ed9d4c73f 100644 --- a/cmd/tendermint/commands/root_test.go +++ b/cmd/tendermint/commands/root_test.go @@ -2,7 +2,6 @@ package commands import ( "fmt" - "io/ioutil" "os" "path/filepath" "strconv" @@ -168,5 +167,5 @@ func WriteConfigVals(dir string, vals map[string]string) error { data += fmt.Sprintf("%s = \"%s\"\n", k, v) } cfile := filepath.Join(dir, "config.toml") - return ioutil.WriteFile(cfile, []byte(data), 0600) + return os.WriteFile(cfile, []byte(data), 0600) } diff --git a/cmd/tendermint/commands/run_node.go b/cmd/tendermint/commands/run_node.go index fd4408c5a..d78eab2c8 100644 --- a/cmd/tendermint/commands/run_node.go +++ b/cmd/tendermint/commands/run_node.go @@ -46,7 +46,7 @@ func AddNodeFlags(cmd *cobra.Command) { "proxy_app", config.ProxyApp, "proxy app address, or one of: 'kvstore',"+ - " 'persistent_kvstore', 'counter', 'e2e' or 'noop' for local testing.") + " 'persistent_kvstore' or 'noop' for local testing.") cmd.Flags().String("abci", config.ABCI, "specify abci transport (socket | grpc)") // rpc flags @@ -63,6 +63,7 @@ func AddNodeFlags(cmd *cobra.Command) { "p2p.laddr", config.P2P.ListenAddress, "node listen address. (0.0.0.0:0 means any interface, any port)") + cmd.Flags().String("p2p.external-address", config.P2P.ExternalAddress, "ip:port address to advertise to peers for them to dial") cmd.Flags().String("p2p.seeds", config.P2P.Seeds, "comma-delimited ID@host:port seed nodes") cmd.Flags().String("p2p.persistent_peers", config.P2P.PersistentPeers, "comma-delimited ID@host:port persistent peers") cmd.Flags().String("p2p.unconditional_peer_ids", diff --git a/config/config.go b/config/config.go index cdd384dd0..9c8c3c236 100644 --- a/config/config.go +++ b/config/config.go @@ -897,10 +897,8 @@ func (cfg *FastSyncConfig) ValidateBasic() error { switch cfg.Version { case "v0": return nil - case "v1": - return nil - case "v2": - return nil + case "v1", "v2": + return fmt.Errorf("fast sync version %s has been deprecated. Please use v0 instead", cfg.Version) default: return fmt.Errorf("unknown fastsync version %s", cfg.Version) } diff --git a/config/config_test.go b/config/config_test.go index 6a46933bc..cdcddf427 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -134,7 +134,7 @@ func TestFastSyncConfigValidateBasic(t *testing.T) { // tamper with version cfg.Version = "v1" - assert.NoError(t, cfg.ValidateBasic()) + assert.Error(t, cfg.ValidateBasic()) cfg.Version = "invalid" assert.Error(t, cfg.ValidateBasic()) diff --git a/config/toml.go b/config/toml.go index c57b4db84..a0112351d 100644 --- a/config/toml.go +++ b/config/toml.go @@ -3,7 +3,7 @@ package config import ( "bytes" "fmt" - "io/ioutil" + "os" "path/filepath" "strings" "text/template" @@ -434,9 +434,11 @@ chunk_fetchers = "{{ .StateSync.ChunkFetchers }}" [fastsync] # Fast Sync version to use: -# 1) "v0" (default) - the legacy fast sync implementation -# 2) "v1" - refactor of v0 version for better testability -# 2) "v2" - complete redesign of v0, optimized for testability & readability +# +# In v0.37, v1 and v2 of the fast sync protocol were deprecated. +# Please use v0 instead. +# +# 1) "v0" - the default fast sync implementation version = "{{ .FastSync.Version }}" ####################################################### @@ -533,7 +535,7 @@ func ResetTestRoot(testName string) *Config { func ResetTestRootWithChainID(testName string, chainID string) *Config { // create a unique, concurrency-safe test directory under os.TempDir() - rootDir, err := ioutil.TempDir("", fmt.Sprintf("%s-%s_", chainID, testName)) + rootDir, err := os.MkdirTemp("", fmt.Sprintf("%s-%s_", chainID, testName)) if err != nil { panic(err) } diff --git a/config/toml_test.go b/config/toml_test.go index f19710687..48b7a0dc2 100644 --- a/config/toml_test.go +++ b/config/toml_test.go @@ -1,7 +1,6 @@ package config import ( - "io/ioutil" "os" "path/filepath" "strings" @@ -23,7 +22,7 @@ func TestEnsureRoot(t *testing.T) { require := require.New(t) // setup temp dir for test - tmpDir, err := ioutil.TempDir("", "config-test") + tmpDir, err := os.MkdirTemp("", "config-test") require.Nil(err) defer os.RemoveAll(tmpDir) @@ -31,7 +30,7 @@ func TestEnsureRoot(t *testing.T) { EnsureRoot(tmpDir) // make sure config is set properly - data, err := ioutil.ReadFile(filepath.Join(tmpDir, defaultConfigFilePath)) + data, err := os.ReadFile(filepath.Join(tmpDir, defaultConfigFilePath)) require.Nil(err) if !checkConfig(string(data)) { @@ -52,7 +51,7 @@ func TestEnsureTestRoot(t *testing.T) { rootDir := cfg.RootDir // make sure config is set properly - data, err := ioutil.ReadFile(filepath.Join(rootDir, defaultConfigFilePath)) + data, err := os.ReadFile(filepath.Join(rootDir, defaultConfigFilePath)) require.Nil(err) if !checkConfig(string(data)) { diff --git a/consensus/byzantine_test.go b/consensus/byzantine_test.go index 594c7d9a2..fa5e90e65 100644 --- a/consensus/byzantine_test.go +++ b/consensus/byzantine_test.go @@ -42,7 +42,7 @@ func TestByzantinePrevoteEquivocation(t *testing.T) { const prevoteHeight = int64(2) testName := "consensus_byzantine_test" tickerFunc := newMockTickerFunc(true) - appFunc := newCounter + appFunc := newKVStore genDoc, privVals := randGenesisDoc(nValidators, false, 30) css := make([]*State, nValidators) @@ -304,7 +304,7 @@ func TestByzantinePrevoteEquivocation(t *testing.T) { func TestByzantineConflictingProposalsWithPartition(t *testing.T) { N := 4 logger := consensusLogger().With("test", "byzantine") - app := newCounter + app := newKVStore css, cleanup := randConsensusNet(N, "consensus_byzantine_test", newMockTickerFunc(false), app) defer cleanup() @@ -448,8 +448,8 @@ func TestByzantineConflictingProposalsWithPartition(t *testing.T) { case <-done: case <-tick.C: for i, reactor := range reactors { - t.Log(fmt.Sprintf("Consensus Reactor %v", i)) - t.Log(fmt.Sprintf("%v", reactor)) + t.Logf("Consensus Reactor %v", i) + t.Logf("%v", reactor) } t.Fatalf("Timed out waiting for all validators to commit first block") } diff --git a/consensus/common_test.go b/consensus/common_test.go index dc8b6c28b..1bcfae08d 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "fmt" - "io/ioutil" "os" "path/filepath" "sort" @@ -21,7 +20,6 @@ import ( dbm "github.com/tendermint/tm-db" abcicli "github.com/tendermint/tendermint/abci/client" - "github.com/tendermint/tendermint/abci/example/counter" "github.com/tendermint/tendermint/abci/example/kvstore" abci "github.com/tendermint/tendermint/abci/types" cfg "github.com/tendermint/tendermint/config" @@ -463,7 +461,7 @@ func loadPrivValidator(config *cfg.Config) *privval.FilePV { } func randState(nValidators int) (*State, []*validatorStub) { - return randStateWithApp(nValidators, counter.NewApplication(true)) + return randStateWithApp(nValidators, kvstore.NewApplication()) } func randStateWithApp(nValidators int, app abci.Application) (*State, []*validatorStub) { @@ -804,11 +802,11 @@ func randConsensusNetWithPeers( if i < nValidators { privVal = privVals[i] } else { - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") if err != nil { panic(err) } - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") if err != nil { panic(err) } @@ -923,20 +921,18 @@ func (m *mockTicker) Chan() <-chan timeoutInfo { func (*mockTicker) SetLogger(log.Logger) {} -//------------------------------------ - -func newCounter() abci.Application { - return counter.NewApplication(true) -} - func newPersistentKVStore() abci.Application { - dir, err := ioutil.TempDir("", "persistent-kvstore") + dir, err := os.MkdirTemp("", "persistent-kvstore") if err != nil { panic(err) } return kvstore.NewPersistentKVStoreApplication(dir) } +func newKVStore() abci.Application { + return kvstore.NewApplication() +} + func newPersistentKVStoreWithPath(dbDir string) abci.Application { return kvstore.NewPersistentKVStoreApplication(dbDir) } diff --git a/consensus/invalid_test.go b/consensus/invalid_test.go index 907693c57..f96018157 100644 --- a/consensus/invalid_test.go +++ b/consensus/invalid_test.go @@ -18,7 +18,7 @@ import ( // Ensure a testnet makes blocks func TestReactorInvalidPrecommit(t *testing.T) { N := 4 - css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter) + css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore) defer cleanup() for i := 0; i < 4; i++ { diff --git a/consensus/metrics.gen.go b/consensus/metrics.gen.go new file mode 100644 index 000000000..70be488ef --- /dev/null +++ b/consensus/metrics.gen.go @@ -0,0 +1,195 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package consensus + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "height", + Help: "Height of the chain.", + }, labels).With(labelsAndValues...), + ValidatorLastSignedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_last_signed_height", + Help: "Last height signed by this validator if the node is a validator.", + }, append(labels, "validator_address")).With(labelsAndValues...), + Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "rounds", + Help: "Number of rounds.", + }, labels).With(labelsAndValues...), + RoundDurationSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "round_duration_seconds", + Help: "Histogram of round duration.", + + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, labels).With(labelsAndValues...), + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validators", + Help: "Number of validators.", + }, labels).With(labelsAndValues...), + ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validators_power", + Help: "Total power of all validators.", + }, labels).With(labelsAndValues...), + ValidatorPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_power", + Help: "Power of a validator.", + }, append(labels, "validator_address")).With(labelsAndValues...), + ValidatorMissedBlocks: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "validator_missed_blocks", + Help: "Amount of blocks missed per validator.", + }, append(labels, "validator_address")).With(labelsAndValues...), + MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "missing_validators", + Help: "Number of validators who did not sign.", + }, labels).With(labelsAndValues...), + MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "missing_validators_power", + Help: "Total power of the missing validators.", + }, labels).With(labelsAndValues...), + ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "byzantine_validators", + Help: "Number of validators who tried to double sign.", + }, labels).With(labelsAndValues...), + ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "byzantine_validators_power", + Help: "Total power of the byzantine validators.", + }, labels).With(labelsAndValues...), + BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_interval_seconds", + Help: "Time between this and the last block.", + }, labels).With(labelsAndValues...), + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "num_txs", + Help: "Number of transactions.", + }, labels).With(labelsAndValues...), + BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_size_bytes", + Help: "Size of the block.", + }, labels).With(labelsAndValues...), + TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "total_txs", + Help: "Total number of transactions.", + }, labels).With(labelsAndValues...), + CommittedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "latest_block_height", + Help: "The latest block height.", + }, labels).With(labelsAndValues...), + FastSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "fast_syncing", + Help: "Whether or not a node is fast syncing. 1 if yes, 0 if no.", + }, labels).With(labelsAndValues...), + StateSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "state_syncing", + Help: "Whether or not a node is state syncing. 1 if yes, 0 if no.", + }, labels).With(labelsAndValues...), + BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_parts", + Help: "Number of block parts transmitted by each peer.", + }, append(labels, "peer_id")).With(labelsAndValues...), + StepDurationSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "step_duration_seconds", + Help: "Histogram of durations for each step in the consensus protocol.", + + Buckets: stdprometheus.ExponentialBucketsRange(0.1, 100, 8), + }, append(labels, "step")).With(labelsAndValues...), + BlockGossipPartsReceived: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_gossip_parts_received", + Help: "Number of block parts received by the node, separated by whether the part was relevant to the block the node is trying to gather or not.", + }, append(labels, "matches_current")).With(labelsAndValues...), + QuorumPrevoteDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "quorum_prevote_delay", + Help: "Interval in seconds between the proposal timestamp and the timestamp of the earliest prevote that achieved a quorum.", + }, append(labels, "proposer_address")).With(labelsAndValues...), + FullPrevoteDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "full_prevote_delay", + Help: "Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted.", + }, append(labels, "proposer_address")).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + Height: discard.NewGauge(), + ValidatorLastSignedHeight: discard.NewGauge(), + Rounds: discard.NewGauge(), + RoundDurationSeconds: discard.NewHistogram(), + Validators: discard.NewGauge(), + ValidatorsPower: discard.NewGauge(), + ValidatorPower: discard.NewGauge(), + ValidatorMissedBlocks: discard.NewGauge(), + MissingValidators: discard.NewGauge(), + MissingValidatorsPower: discard.NewGauge(), + ByzantineValidators: discard.NewGauge(), + ByzantineValidatorsPower: discard.NewGauge(), + BlockIntervalSeconds: discard.NewHistogram(), + NumTxs: discard.NewGauge(), + BlockSizeBytes: discard.NewGauge(), + TotalTxs: discard.NewGauge(), + CommittedHeight: discard.NewGauge(), + FastSyncing: discard.NewGauge(), + StateSyncing: discard.NewGauge(), + BlockParts: discard.NewCounter(), + StepDurationSeconds: discard.NewHistogram(), + BlockGossipPartsReceived: discard.NewCounter(), + QuorumPrevoteDelay: discard.NewGauge(), + FullPrevoteDelay: discard.NewGauge(), + } +} diff --git a/consensus/metrics.go b/consensus/metrics.go index 527d53744..de0da45de 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -1,11 +1,12 @@ package consensus import ( - "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/discard" + "strings" + "time" - prometheus "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" + "github.com/go-kit/kit/metrics" + cstypes "github.com/tendermint/tendermint/consensus/types" + "github.com/tendermint/tendermint/types" ) const ( @@ -14,25 +15,30 @@ const ( MetricsSubsystem = "consensus" ) +//go:generate go run ../scripts/metricsgen -struct=Metrics + // Metrics contains metrics exposed by this package. type Metrics struct { // Height of the chain. Height metrics.Gauge - // ValidatorLastSignedHeight of a validator. - ValidatorLastSignedHeight metrics.Gauge + // Last height signed by this validator if the node is a validator. + ValidatorLastSignedHeight metrics.Gauge `metrics_labels:"validator_address"` // Number of rounds. Rounds metrics.Gauge + // Histogram of round duration. + RoundDurationSeconds metrics.Histogram `metrics_buckettype:"exprange" metrics_bucketsizes:"0.1, 100, 8"` + // Number of validators. Validators metrics.Gauge // Total power of all validators. ValidatorsPower metrics.Gauge // Power of a validator. - ValidatorPower metrics.Gauge - // Amount of blocks missed by a validator. - ValidatorMissedBlocks metrics.Gauge + ValidatorPower metrics.Gauge `metrics_labels:"validator_address"` + // Amount of blocks missed per validator. + ValidatorMissedBlocks metrics.Gauge `metrics_labels:"validator_address"` // Number of validators who did not sign. MissingValidators metrics.Gauge // Total power of the missing validators. @@ -52,14 +58,22 @@ type Metrics struct { // Total number of transactions. TotalTxs metrics.Gauge // The latest block height. - CommittedHeight metrics.Gauge + CommittedHeight metrics.Gauge `metrics_name:"latest_block_height"` // Whether or not a node is fast syncing. 1 if yes, 0 if no. FastSyncing metrics.Gauge // Whether or not a node is state syncing. 1 if yes, 0 if no. StateSyncing metrics.Gauge - // Number of blockparts transmitted by peer. - BlockParts metrics.Counter + // Number of block parts transmitted by each peer. + BlockParts metrics.Counter `metrics_labels:"peer_id"` + + // Histogram of durations for each step in the consensus protocol. + StepDurationSeconds metrics.Histogram `metrics_labels:"step" metrics_buckettype:"exprange" metrics_bucketsizes:"0.1, 100, 8"` + stepStart time.Time + + // Number of block parts received by the node, separated by whether the part + // was relevant to the block the node is trying to gather or not. + BlockGossipPartsReceived metrics.Counter `metrics_labels:"matches_current"` // QuroumPrevoteMessageDelay is the interval in seconds between the proposal // timestamp and the timestamp of the earliest prevote that achieved a quorum @@ -70,183 +84,35 @@ type Metrics struct { // be above 2/3 of the total voting power of the network defines the endpoint // the endpoint of the interval. Subtract the proposal timestamp from this endpoint // to obtain the quorum delay. - QuorumPrevoteMessageDelay metrics.Gauge + //metrics:Interval in seconds between the proposal timestamp and the timestamp of the earliest prevote that achieved a quorum. + QuorumPrevoteDelay metrics.Gauge `metrics_labels:"proposer_address"` - // FullPrevoteMessageDelay is the interval in seconds between the proposal + // FullPrevoteDelay is the interval in seconds between the proposal // timestamp and the timestamp of the latest prevote in a round where 100% // of the voting power on the network issued prevotes. - FullPrevoteMessageDelay metrics.Gauge + //metrics:Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted. + FullPrevoteDelay metrics.Gauge `metrics_labels:"proposer_address"` } -// PrometheusMetrics returns Metrics build using Prometheus client library. -// Optionally, labels can be provided along with their values ("foo", -// "fooValue"). -func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { - labels := []string{} - for i := 0; i < len(labelsAndValues); i += 2 { - labels = append(labels, labelsAndValues[i]) - } - return &Metrics{ - Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "height", - Help: "Height of the chain.", - }, labels).With(labelsAndValues...), - Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "rounds", - Help: "Number of rounds.", - }, labels).With(labelsAndValues...), - - Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "validators", - Help: "Number of validators.", - }, labels).With(labelsAndValues...), - ValidatorLastSignedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "validator_last_signed_height", - Help: "Last signed height for a validator", - }, append(labels, "validator_address")).With(labelsAndValues...), - ValidatorMissedBlocks: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "validator_missed_blocks", - Help: "Total missed blocks for a validator", - }, append(labels, "validator_address")).With(labelsAndValues...), - ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "validators_power", - Help: "Total power of all validators.", - }, labels).With(labelsAndValues...), - ValidatorPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "validator_power", - Help: "Power of a validator", - }, append(labels, "validator_address")).With(labelsAndValues...), - MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "missing_validators", - Help: "Number of validators who did not sign.", - }, labels).With(labelsAndValues...), - MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "missing_validators_power", - Help: "Total power of the missing validators.", - }, labels).With(labelsAndValues...), - ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "byzantine_validators", - Help: "Number of validators who tried to double sign.", - }, labels).With(labelsAndValues...), - ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "byzantine_validators_power", - Help: "Total power of the byzantine validators.", - }, labels).With(labelsAndValues...), - BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "block_interval_seconds", - Help: "Time between this and the last block.", - }, labels).With(labelsAndValues...), - NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "num_txs", - Help: "Number of transactions.", - }, labels).With(labelsAndValues...), - BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "block_size_bytes", - Help: "Size of the block.", - }, labels).With(labelsAndValues...), - TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "total_txs", - Help: "Total number of transactions.", - }, labels).With(labelsAndValues...), - CommittedHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "latest_block_height", - Help: "The latest block height.", - }, labels).With(labelsAndValues...), - FastSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "fast_syncing", - Help: "Whether or not a node is fast syncing. 1 if yes, 0 if no.", - }, labels).With(labelsAndValues...), - StateSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "state_syncing", - Help: "Whether or not a node is state syncing. 1 if yes, 0 if no.", - }, labels).With(labelsAndValues...), - BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "block_parts", - Help: "Number of blockparts transmitted by peer.", - }, append(labels, "peer_id")).With(labelsAndValues...), - QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "quorum_prevote_message_delay", - Help: "Difference in seconds between the proposal timestamp and the timestamp " + - "of the latest prevote that achieved a quorum in the prevote step.", - }, labels).With(labelsAndValues...), - FullPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "full_prevote_message_delay", - Help: "Difference in seconds between the proposal timestamp and the timestamp " + - "of the latest prevote that achieved 100% of the voting power in the prevote step.", - }, labels).With(labelsAndValues...), - } +// RecordConsMetrics uses for recording the block related metrics during fast-sync. +func (m *Metrics) RecordConsMetrics(block *types.Block) { + m.NumTxs.Set(float64(len(block.Data.Txs))) + m.TotalTxs.Add(float64(len(block.Data.Txs))) + m.BlockSizeBytes.Set(float64(block.Size())) + m.CommittedHeight.Set(float64(block.Height)) } -// NopMetrics returns no-op Metrics. -func NopMetrics() *Metrics { - return &Metrics{ - Height: discard.NewGauge(), - - ValidatorLastSignedHeight: discard.NewGauge(), - - Rounds: discard.NewGauge(), - - Validators: discard.NewGauge(), - ValidatorsPower: discard.NewGauge(), - ValidatorPower: discard.NewGauge(), - ValidatorMissedBlocks: discard.NewGauge(), - MissingValidators: discard.NewGauge(), - MissingValidatorsPower: discard.NewGauge(), - ByzantineValidators: discard.NewGauge(), - ByzantineValidatorsPower: discard.NewGauge(), - - BlockIntervalSeconds: discard.NewHistogram(), - - NumTxs: discard.NewGauge(), - BlockSizeBytes: discard.NewGauge(), - TotalTxs: discard.NewGauge(), - CommittedHeight: discard.NewGauge(), - FastSyncing: discard.NewGauge(), - StateSyncing: discard.NewGauge(), - BlockParts: discard.NewCounter(), - QuorumPrevoteMessageDelay: discard.NewGauge(), - FullPrevoteMessageDelay: discard.NewGauge(), - } +func (m *Metrics) MarkRound(r int32, st time.Time) { + m.Rounds.Set(float64(r)) + roundTime := time.Since(st).Seconds() + m.RoundDurationSeconds.Observe(roundTime) +} + +func (m *Metrics) MarkStep(s cstypes.RoundStepType) { + if !m.stepStart.IsZero() { + stepTime := time.Since(m.stepStart).Seconds() + stepName := strings.TrimPrefix(s.String(), "RoundStep") + m.StepDurationSeconds.With("step", stepName).Observe(stepTime) + } + m.stepStart = time.Now() } diff --git a/consensus/reactor_test.go b/consensus/reactor_test.go index 5d68cd9b7..1997bcbfa 100644 --- a/consensus/reactor_test.go +++ b/consensus/reactor_test.go @@ -112,7 +112,7 @@ func stopConsensusNet(logger log.Logger, reactors []*Reactor, eventBuses []*type // Ensure a testnet makes blocks func TestReactorBasic(t *testing.T) { N := 4 - css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter) + css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore) defer cleanup() reactors, blocksSubs, eventBuses := startConsensusNet(t, css, N) defer stopConsensusNet(log.TestingLogger(), reactors, eventBuses) @@ -127,11 +127,11 @@ func TestReactorWithEvidence(t *testing.T) { nValidators := 4 testName := "consensus_reactor_test" tickerFunc := newMockTickerFunc(true) - appFunc := newCounter + appFunc := newKVStore // heed the advice from https://www.sandimetz.com/blog/2016/1/20/the-wrong-abstraction // to unroll unwieldy abstractions. Here we duplicate the code from: - // css := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter) + // css := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore) genDoc, privVals := randGenesisDoc(nValidators, false, 30) css := make([]*State, nValidators) @@ -233,7 +233,7 @@ func TestReactorWithEvidence(t *testing.T) { // Ensure a testnet makes blocks when there are txs func TestReactorCreatesBlockWhenEmptyBlocksFalse(t *testing.T) { N := 4 - css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter, + css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore, func(c *cfg.Config) { c.Consensus.CreateEmptyBlocks = false }) @@ -254,7 +254,7 @@ func TestReactorCreatesBlockWhenEmptyBlocksFalse(t *testing.T) { func TestReactorReceiveDoesNotPanicIfAddPeerHasntBeenCalledYet(t *testing.T) { N := 1 - css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter) + css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore) defer cleanup() reactors, _, eventBuses := startConsensusNet(t, css, N) defer stopConsensusNet(log.TestingLogger(), reactors, eventBuses) @@ -277,7 +277,7 @@ func TestReactorReceiveDoesNotPanicIfAddPeerHasntBeenCalledYet(t *testing.T) { func TestReactorReceivePanicsIfInitPeerHasntBeenCalledYet(t *testing.T) { N := 1 - css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter) + css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore) defer cleanup() reactors, _, eventBuses := startConsensusNet(t, css, N) defer stopConsensusNet(log.TestingLogger(), reactors, eventBuses) @@ -300,7 +300,7 @@ func TestReactorReceivePanicsIfInitPeerHasntBeenCalledYet(t *testing.T) { // Test we record stats about votes and block parts from other peers. func TestReactorRecordsVotesAndBlockParts(t *testing.T) { N := 4 - css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newCounter) + css, cleanup := randConsensusNet(N, "consensus_reactor_test", newMockTickerFunc(true), newKVStore) defer cleanup() reactors, blocksSubs, eventBuses := startConsensusNet(t, css, N) defer stopConsensusNet(log.TestingLogger(), reactors, eventBuses) @@ -521,7 +521,7 @@ func TestReactorValidatorSetChanges(t *testing.T) { // Check we can make blocks with skip_timeout_commit=false func TestReactorWithTimeoutCommit(t *testing.T) { N := 4 - css, cleanup := randConsensusNet(N, "consensus_reactor_with_timeout_commit_test", newMockTickerFunc(false), newCounter) + css, cleanup := randConsensusNet(N, "consensus_reactor_with_timeout_commit_test", newMockTickerFunc(false), newKVStore) defer cleanup() // override default SkipTimeoutCommit == true for tests for i := 0; i < N; i++ { diff --git a/consensus/replay_file.go b/consensus/replay_file.go index 4bf7466ab..9ec5403cc 100644 --- a/consensus/replay_file.go +++ b/consensus/replay_file.go @@ -309,7 +309,7 @@ func newConsensusStateForReplay(config cfg.BaseConfig, csConfig *cfg.ConsensusCo // Create proxyAppConn connection (consensus, mempool, query) clientCreator := proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()) - proxyApp := proxy.NewAppConns(clientCreator) + proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics()) err = proxyApp.Start() if err != nil { tmos.Exit(fmt.Sprintf("Error starting proxy app conns: %v", err)) diff --git a/consensus/replay_stubs.go b/consensus/replay_stubs.go index 9b4b3b062..aa74f9420 100644 --- a/consensus/replay_stubs.go +++ b/consensus/replay_stubs.go @@ -66,7 +66,7 @@ func newMockProxyApp(appHash []byte, abciResponses *tmstate.ABCIResponses) proxy if err != nil { panic(err) } - return proxy.NewAppConnConsensus(cli) + return proxy.NewAppConnConsensus(cli, proxy.NopMetrics()) } type mockProxyApp struct { diff --git a/consensus/replay_test.go b/consensus/replay_test.go index 61bc5f438..d32c61dc2 100644 --- a/consensus/replay_test.go +++ b/consensus/replay_test.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "path/filepath" "runtime" @@ -79,7 +78,7 @@ func startNewStateAndWaitForBlock(t *testing.T, consensusReplayConfig *cfg.Confi ) cs.SetLogger(logger) - bytes, _ := ioutil.ReadFile(cs.config.WalFile()) + bytes, _ := os.ReadFile(cs.config.WalFile()) t.Logf("====== WAL: \n\r%X\n", bytes) err := cs.Start() @@ -643,7 +642,7 @@ func TestMockProxyApp(t *testing.T) { } func tempWALWithData(data []byte) string { - walFile, err := ioutil.TempFile("", "wal") + walFile, err := os.CreateTemp("", "wal") if err != nil { panic(fmt.Sprintf("failed to create temp WAL file: %v", err)) } @@ -719,7 +718,7 @@ func testHandshakeReplay(t *testing.T, config *cfg.Config, nBlocks int, mode uin if nBlocks > 0 { // run nBlocks against a new client to build up the app state. // use a throwaway tendermint state - proxyApp := proxy.NewAppConns(clientCreator2) + proxyApp := proxy.NewAppConns(clientCreator2, proxy.NopMetrics()) stateDB1 := dbm.NewMemDB() stateStore := sm.NewStore(stateDB1) err := stateStore.Save(genesisState) @@ -739,7 +738,7 @@ func testHandshakeReplay(t *testing.T, config *cfg.Config, nBlocks int, mode uin // now start the app using the handshake - it should sync genDoc, _ := sm.MakeGenesisDocFromFile(config.GenesisFile()) handshaker := NewHandshaker(stateStore, state, store, genDoc) - proxyApp := proxy.NewAppConns(clientCreator2) + proxyApp := proxy.NewAppConns(clientCreator2, proxy.NopMetrics()) if err := proxyApp.Start(); err != nil { t.Fatalf("Error starting proxy app connections: %v", err) } @@ -849,7 +848,7 @@ func buildTMStateFromChain( clientCreator := proxy.NewLocalClientCreator( kvstore.NewPersistentKVStoreApplication( filepath.Join(config.DBDir(), fmt.Sprintf("replay_test_%d_%d_t", nBlocks, mode)))) - proxyApp := proxy.NewAppConns(clientCreator) + proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics()) if err := proxyApp.Start(); err != nil { panic(err) } @@ -917,7 +916,7 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) { { app := &badApp{numBlocks: 3, allHashesAreWrong: true} clientCreator := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(clientCreator) + proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) t.Cleanup(func() { @@ -941,7 +940,7 @@ func TestHandshakePanicsIfAppReturnsWrongAppHash(t *testing.T) { { app := &badApp{numBlocks: 3, onlyLastHashIsWrong: true} clientCreator := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(clientCreator) + proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) t.Cleanup(func() { @@ -1025,7 +1024,7 @@ func makeBlockchainFromWAL(wal WAL) ([]*types.Block, []*types.Commit, error) { // if its not the first one, we have a full block if thisBlockParts != nil { var pbb = new(tmproto.Block) - bz, err := ioutil.ReadAll(thisBlockParts.GetReader()) + bz, err := io.ReadAll(thisBlockParts.GetReader()) if err != nil { panic(err) } @@ -1064,7 +1063,7 @@ func makeBlockchainFromWAL(wal WAL) ([]*types.Block, []*types.Commit, error) { } } // grab the last block too - bz, err := ioutil.ReadAll(thisBlockParts.GetReader()) + bz, err := io.ReadAll(thisBlockParts.GetReader()) if err != nil { panic(err) } @@ -1207,7 +1206,7 @@ func TestHandshakeUpdatesValidators(t *testing.T) { // now start the app using the handshake - it should sync genDoc, _ := sm.MakeGenesisDocFromFile(config.GenesisFile()) handshaker := NewHandshaker(stateStore, state, store, genDoc) - proxyApp := proxy.NewAppConns(clientCreator) + proxyApp := proxy.NewAppConns(clientCreator, proxy.NopMetrics()) if err := proxyApp.Start(); err != nil { t.Fatalf("Error starting proxy app connections: %v", err) } diff --git a/consensus/state.go b/consensus/state.go index 29fb5abca..ba0e5bf5e 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "io" "os" "runtime/debug" "sort" @@ -523,6 +523,14 @@ func (cs *State) updateHeight(height int64) { } func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { + if !cs.replayMode { + if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound { + cs.metrics.MarkRound(cs.Round, cs.StartTime) + } + if cs.Step != step { + cs.metrics.MarkStep(cs.Step) + } + } cs.Round = round cs.Step = step } @@ -1021,9 +1029,6 @@ func (cs *State) enterNewRound(height int64, round int32) { if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { cs.Logger.Error("failed publishing new round", "err", err) } - - cs.metrics.Rounds.Set(float64(round)) - // Wait for txs to be available in the mempool // before we enterPropose in round 0. If the last block changed the app hash, // we may need an empty "proof" block, and enterPropose immediately. @@ -1890,11 +1895,13 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add // Blocks might be reused, so round mismatch is OK if cs.Height != height { cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) + cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) return false, nil } // We're not expecting a block part. if cs.ProposalBlockParts == nil { + cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) // NOTE: this can happen when we've gone to a higher round and // then receive parts from the previous round - not necessarily a bad peer. cs.Logger.Debug( @@ -1909,15 +1916,21 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add added, err = cs.ProposalBlockParts.AddPart(part) if err != nil { + if errors.Is(err, types.ErrPartSetInvalidProof) || errors.Is(err, types.ErrPartSetUnexpectedIndex) { + cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) + } return added, err } + + cs.metrics.BlockGossipPartsReceived.With("matches_current", "true").Add(1) + if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, ) } if added && cs.ProposalBlockParts.IsComplete() { - bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) + bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader()) if err != nil { return added, err } @@ -2346,12 +2359,12 @@ func (cs *State) calculatePrevoteMessageDelayMetrics() { _, val := cs.Validators.GetByAddress(v.ValidatorAddress) votingPowerSeen += val.VotingPower if votingPowerSeen >= cs.Validators.TotalVotingPower()*2/3+1 { - cs.metrics.QuorumPrevoteMessageDelay.Set(v.Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) + cs.metrics.QuorumPrevoteDelay.With("proposer_address", cs.Validators.GetProposer().Address.String()).Set(v.Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) break } } if ps.HasAll() { - cs.metrics.FullPrevoteMessageDelay.Set(pl[len(pl)-1].Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) + cs.metrics.FullPrevoteDelay.With("proposer_address", cs.Validators.GetProposer().Address.String()).Set(pl[len(pl)-1].Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) } } diff --git a/consensus/state_test.go b/consensus/state_test.go index fa3004855..3f795d708 100644 --- a/consensus/state_test.go +++ b/consensus/state_test.go @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "github.com/tendermint/tendermint/abci/example/counter" + "github.com/tendermint/tendermint/abci/example/kvstore" abci "github.com/tendermint/tendermint/abci/types" abcimocks "github.com/tendermint/tendermint/abci/types/mocks" cstypes "github.com/tendermint/tendermint/consensus/types" @@ -665,7 +665,7 @@ func TestStateLockPOLRelock(t *testing.T) { signAddVotes(cs1, tmproto.PrecommitType, nil, types.PartSetHeader{}, vs2, vs3, vs4) // before we timeout to the new round set the new proposal - cs2 := newState(cs1.state, vs2, counter.NewApplication(true)) + cs2 := newState(cs1.state, vs2, kvstore.NewApplication()) prop, propBlock := decideProposal(t, cs2, vs2, vs2.Height, vs2.Round+1) if prop == nil || propBlock == nil { t.Fatal("Failed to create proposal block with vs2") @@ -853,7 +853,7 @@ func TestStateLockPOLUnlockOnUnknownBlock(t *testing.T) { signAddVotes(cs1, tmproto.PrecommitType, nil, types.PartSetHeader{}, vs2, vs3, vs4) // before we timeout to the new round set the new proposal - cs2 := newState(cs1.state, vs2, counter.NewApplication(true)) + cs2 := newState(cs1.state, vs2, kvstore.NewApplication()) prop, propBlock := decideProposal(t, cs2, vs2, vs2.Height, vs2.Round+1) if prop == nil || propBlock == nil { t.Fatal("Failed to create proposal block with vs2") @@ -899,7 +899,7 @@ func TestStateLockPOLUnlockOnUnknownBlock(t *testing.T) { signAddVotes(cs1, tmproto.PrecommitType, nil, types.PartSetHeader{}, vs2, vs3, vs4) // before we timeout to the new round set the new proposal - cs3 := newState(cs1.state, vs3, counter.NewApplication(true)) + cs3 := newState(cs1.state, vs3, kvstore.NewApplication()) prop, propBlock = decideProposal(t, cs3, vs3, vs3.Height, vs3.Round+1) if prop == nil || propBlock == nil { t.Fatal("Failed to create proposal block with vs2") diff --git a/consensus/wal_generator.go b/consensus/wal_generator.go index 1c449717b..5a4670456 100644 --- a/consensus/wal_generator.go +++ b/consensus/wal_generator.go @@ -59,7 +59,7 @@ func WALGenerateNBlocks(t *testing.T, wr io.Writer, numBlocks int) (err error) { blockStore := store.NewBlockStore(blockStoreDB) - proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app)) + proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app), proxy.NopMetrics()) proxyApp.SetLogger(logger.With("module", "proxy")) if err := proxyApp.Start(); err != nil { return fmt.Errorf("failed to start proxy app connections: %w", err) diff --git a/consensus/wal_test.go b/consensus/wal_test.go index 4ee813609..12b775b41 100644 --- a/consensus/wal_test.go +++ b/consensus/wal_test.go @@ -3,7 +3,6 @@ package consensus import ( "bytes" "crypto/rand" - "io/ioutil" "os" "path/filepath" @@ -27,7 +26,7 @@ const ( ) func TestWALTruncate(t *testing.T) { - walDir, err := ioutil.TempDir("", "wal") + walDir, err := os.MkdirTemp("", "wal") require.NoError(t, err) defer os.RemoveAll(walDir) @@ -109,7 +108,7 @@ func TestWALEncoderDecoder(t *testing.T) { } func TestWALWrite(t *testing.T) { - walDir, err := ioutil.TempDir("", "wal") + walDir, err := os.MkdirTemp("", "wal") require.NoError(t, err) defer os.RemoveAll(walDir) walFile := filepath.Join(walDir, "wal") @@ -177,7 +176,7 @@ func TestWALSearchForEndHeight(t *testing.T) { } func TestWALPeriodicSync(t *testing.T) { - walDir, err := ioutil.TempDir("", "wal") + walDir, err := os.MkdirTemp("", "wal") require.NoError(t, err) defer os.RemoveAll(walDir) diff --git a/crypto/armor/armor.go b/crypto/armor/armor.go index bfc2193e9..3f346ff75 100644 --- a/crypto/armor/armor.go +++ b/crypto/armor/armor.go @@ -3,7 +3,7 @@ package armor import ( "bytes" "fmt" - "io/ioutil" + "io" "golang.org/x/crypto/openpgp/armor" // nolint: staticcheck ) @@ -31,7 +31,7 @@ func DecodeArmor(armorStr string) (blockType string, headers map[string]string, if err != nil { return "", nil, nil, err } - data, err = ioutil.ReadAll(block.Body) + data, err = io.ReadAll(block.Body) if err != nil { return "", nil, nil, err } diff --git a/docs/app-dev/abci-cli.md b/docs/app-dev/abci-cli.md index 1645c4f8c..913adbbc4 100644 --- a/docs/app-dev/abci-cli.md +++ b/docs/app-dev/abci-cli.md @@ -31,7 +31,6 @@ Available Commands: check_tx Validate a tx commit Commit the application state and return the Merkle root hash console Start an interactive abci console for multiple commands - counter ABCI demo example deliver_tx Deliver a new tx to the application kvstore ABCI demo example echo Have the application echo a message @@ -214,140 +213,9 @@ we do `deliver_tx "abc=efg"` it will store `(abc, efg)`. Similarly, you could put the commands in a file and run `abci-cli --verbose batch < myfile`. -## Counter - Another Example - -Now that we've got the hang of it, let's try another application, the -"counter" app. - -Like the kvstore app, its code can be found -[here](https://github.com/tendermint/tendermint/blob/v0.34.x/abci/cmd/abci-cli/abci-cli.go) -and looks like: - -```go -func cmdCounter(cmd *cobra.Command, args []string) error { - - app := counter.NewCounterApplication(flagSerial) - - logger := log.NewTMLogger(log.NewSyncWriter(os.Stdout)) - - // Start the listener - srv, err := server.NewServer(flagAddrC, flagAbci, app) - if err != nil { - return err - } - srv.SetLogger(logger.With("module", "abci-server")) - if err := srv.Start(); err != nil { - return err - } - - // Stop upon receiving SIGTERM or CTRL-C. - tmos.TrapSignal(logger, func() { - // Cleanup - srv.Stop() - }) - - // Run forever. - select {} -} -``` - -The counter app doesn't use a Merkle tree, it just counts how many times -we've sent a transaction, asked for a hash, or committed the state. The -result of `commit` is just the number of transactions sent. - -This application has two modes: `serial=off` and `serial=on`. - -When `serial=on`, transactions must be a big-endian encoded incrementing -integer, starting at 0. - -If `serial=off`, there are no restrictions on transactions. - -We can toggle the value of `serial` using the `set_option` ABCI message. - -When `serial=on`, some transactions are invalid. In a live blockchain, -transactions collect in memory before they are committed into blocks. To -avoid wasting resources on invalid transactions, ABCI provides the -`check_tx` message, which application developers can use to accept or -reject transactions, before they are stored in memory or gossipped to -other peers. - -In this instance of the counter app, `check_tx` only allows transactions -whose integer is greater than the last committed one. - -Let's kill the console and the kvstore application, and start the -counter app: - -```sh -abci-cli counter -``` - -In another window, start the `abci-cli console`: - -```sh -> set_option serial on --> code: OK --> log: OK (SetOption doesn't return anything.) - -> check_tx 0x00 --> code: OK - -> check_tx 0xff --> code: OK - -> deliver_tx 0x00 --> code: OK - -> check_tx 0x00 --> code: BadNonce --> log: Invalid nonce. Expected >= 1, got 0 - -> deliver_tx 0x01 --> code: OK - -> deliver_tx 0x04 --> code: BadNonce --> log: Invalid nonce. Expected 2, got 4 - -> info --> code: OK --> data: {"hashes":0,"txs":2} --> data.hex: 0x7B22686173686573223A302C22747873223A327D -``` - -This is a very simple application, but between `counter` and `kvstore`, -its easy to see how you can build out arbitrary application states on -top of the ABCI. [Hyperledger's -Burrow](https://github.com/hyperledger/burrow) also runs atop ABCI, -bringing with it Ethereum-like accounts, the Ethereum virtual-machine, -Monax's permissioning scheme, and native contracts extensions. - -But the ultimate flexibility comes from being able to write the -application easily in any language. - -We have implemented the counter in a number of languages [see the -example directory](https://github.com/tendermint/tendermint/tree/master/abci/example). - -To run the Node.js version, fist download & install [the Javascript ABCI server](https://github.com/tendermint/js-abci): - -```sh -git clone https://github.com/tendermint/js-abci.git -cd js-abci -npm install abci -``` - -Now you can start the app: - -```sh -node example/counter.js -``` - -(you'll have to kill the other counter application process). In another -window, run the console and those previous ABCI commands. You should get -the same results as for the Go version. - ## Bounties -Want to write the counter app in your favorite language?! We'd be happy +Want to write an app in your favorite language?! We'd be happy to add you to our [ecosystem](https://github.com/tendermint/awesome#ecosystem)! See [funding](https://github.com/interchainio/funding) opportunities from the [Interchain Foundation](https://interchain.io/) for implementations in new languages and more. diff --git a/docs/app-dev/getting-started.md b/docs/app-dev/getting-started.md index 9ba6bab26..af1aff5ff 100644 --- a/docs/app-dev/getting-started.md +++ b/docs/app-dev/getting-started.md @@ -37,8 +37,8 @@ cd $GOPATH/src/github.com/tendermint/tendermint make install_abci ``` -Now you should have the `abci-cli` installed; you'll see a couple of -commands (`counter` and `kvstore`) that are example applications written +Now you should have the `abci-cli` installed; you'll notice the `kvstore` +command, an example application written in Go. See below for an application written in JavaScript. Now, let's run some apps! @@ -165,92 +165,6 @@ curl -s 'localhost:26657/abci_query?data="name"' Try some other transactions and queries to make sure everything is working! -## Counter - Another Example - -Now that we've got the hang of it, let's try another application, the -`counter` app. - -The counter app doesn't use a Merkle tree, it just counts how many times -we've sent a transaction, or committed the state. - -This application has two modes: `serial=off` and `serial=on`. - -When `serial=on`, transactions must be a big-endian encoded incrementing -integer, starting at 0. - -If `serial=off`, there are no restrictions on transactions. - -In a live blockchain, transactions collect in memory before they are -committed into blocks. To avoid wasting resources on invalid -transactions, ABCI provides the `CheckTx` message, which application -developers can use to accept or reject transactions, before they are -stored in memory or gossipped to other peers. - -In this instance of the counter app, with `serial=on`, `CheckTx` only -allows transactions whose integer is greater than the last committed -one. - -Let's kill the previous instance of `tendermint` and the `kvstore` -application, and start the counter app. We can enable `serial=on` with a -flag: - -```sh -abci-cli counter --serial -``` - -In another window, reset then start Tendermint: - -```sh -tendermint unsafe_reset_all -tendermint node -``` - -Once again, you can see the blocks streaming by. Let's send some -transactions. Since we have set `serial=on`, the first transaction must -be the number `0`: - -```sh -curl localhost:26657/broadcast_tx_commit?tx=0x00 -``` - -Note the empty (hence successful) response. The next transaction must be -the number `1`. If instead, we try to send a `5`, we get an error: - -```json -> curl localhost:26657/broadcast_tx_commit?tx=0x05 -{ - "jsonrpc": "2.0", - "id": "", - "result": { - "check_tx": {}, - "deliver_tx": { - "code": 2, - "log": "Invalid nonce. Expected 1, got 5" - }, - "hash": "33B93DFF98749B0D6996A70F64071347060DC19C", - "height": 34 - } -} -``` - -But if we send a `1`, it works again: - -```json -> curl localhost:26657/broadcast_tx_commit?tx=0x01 -{ - "jsonrpc": "2.0", - "id": "", - "result": { - "check_tx": {}, - "deliver_tx": {}, - "hash": "F17854A977F6FA7EEA1BD758E296710B86F72F3D", - "height": 60 - } -} -``` - -For more details on the `broadcast_tx` API, see [the guide on using -Tendermint](../tendermint-core/using-tendermint.md). ## CounterJS - Example in Another Language diff --git a/docs/architecture/README.md b/docs/architecture/README.md new file mode 100644 index 000000000..e75896f38 --- /dev/null +++ b/docs/architecture/README.md @@ -0,0 +1,120 @@ +--- +order: 1 +parent: + order: false +--- + +# Architecture Decision Records (ADR) + +This is a location to record all high-level architecture decisions in the tendermint project. + +You can read more about the ADR concept in this [blog post](https://product.reverb.com/documenting-architecture-decisions-the-reverb-way-a3563bb24bd0#.78xhdix6t). + +An ADR should provide: + +- Context on the relevant goals and the current state +- Proposed changes to achieve the goals +- Summary of pros and cons +- References +- Changelog + +Note the distinction between an ADR and a spec. The ADR provides the context, intuition, reasoning, and +justification for a change in architecture, or for the architecture of something +new. The spec is much more compressed and streamlined summary of everything as +it stands today. + +If recorded decisions turned out to be lacking, convene a discussion, record the new decisions here, and then modify the code to match. + +Note the context/background should be written in the present tense. + +## Table of Contents + +### Implemented + +- [ADR-001: Logging](./adr-001-logging.md) +- [ADR-002: Event-Subscription](./adr-002-event-subscription.md) +- [ADR-003: ABCI-APP-RPC](./adr-003-abci-app-rpc.md) +- [ADR-004: Historical-Validators](./adr-004-historical-validators.md) +- [ADR-005: Consensus-Params](./adr-005-consensus-params.md) +- [ADR-008: Priv-Validator](./adr-008-priv-validator.md) +- [ADR-009: ABCI-Design](./adr-009-ABCI-design.md) +- [ADR-010: Crypto-Changes](./adr-010-crypto-changes.md) +- [ADR-011: Monitoring](./adr-011-monitoring.md) +- [ADR-014: Secp-Malleability](./adr-014-secp-malleability.md) +- [ADR-015: Crypto-Encoding](./adr-015-crypto-encoding.md) +- [ADR-016: Protocol-Versions](./adr-016-protocol-versions.md) +- [ADR-017: Chain-Versions](./adr-017-chain-versions.md) +- [ADR-018: ABCI-Validators](./adr-018-ABCI-Validators.md) +- [ADR-019: Multisigs](./adr-019-multisigs.md) +- [ADR-020: Block-Size](./adr-020-block-size.md) +- [ADR-021: ABCI-Events](./adr-021-abci-events.md) +- [ADR-025: Commit](./adr-025-commit.md) +- [ADR-026: General-Merkle-Proof](./adr-026-general-merkle-proof.md) +- [ADR-033: Pubsub](./adr-033-pubsub.md) +- [ADR-034: Priv-Validator-File-Structure](./adr-034-priv-validator-file-structure.md) +- [ADR-043: Blockchain-RiRi-Org](./adr-043-blockchain-riri-org.md) +- [ADR-044: Lite-Client-With-Weak-Subjectivity](./adr-044-lite-client-with-weak-subjectivity.md) +- [ADR-046: Light-Client-Implementation](./adr-046-light-client-implementation.md) +- [ADR-047: Handling-Evidence-From-Light-Client](./adr-047-handling-evidence-from-light-client.md) +- [ADR-051: Double-Signing-Risk-Reduction](./adr-051-double-signing-risk-reduction.md) +- [ADR-052: Tendermint-Mode](./adr-052-tendermint-mode.md) +- [ADR-053: State-Sync-Prototype](./adr-053-state-sync-prototype.md) +- [ADR-054: Crypto-Encoding-2](./adr-054-crypto-encoding-2.md) +- [ADR-055: Protobuf-Design](./adr-055-protobuf-design.md) +- [ADR-056: Light-Client-Amnesia-Attacks](./adr-056-light-client-amnesia-attacks.md) +- [ADR-059: Evidence-Composition-and-Lifecycle](./adr-059-evidence-composition-and-lifecycle.md) +- [ADR-062: P2P-Architecture](./adr-062-p2p-architecture.md) +- [ADR-063: Privval-gRPC](./adr-063-privval-grpc.md) +- [ADR-066: E2E-Testing](./adr-066-e2e-testing.md) +- [ADR-072: Restore Requests for Comments](./adr-072-request-for-comments.md) +- [ADR-077: Block Retention](./adr-077-block-retention.md) +- [ADR-078: Non-zero Genesis](./adr-078-nonzero-genesis.md) +- [ADR-079: ED25519 Verification](./adr-079-ed25519-verification.md) +- [ADR-080: Reverse Sync](./adr-080-reverse-sync.md) + +### Accepted + +- [ADR-006: Trust-Metric](./adr-006-trust-metric.md) +- [ADR-024: Sign-Bytes](./adr-024-sign-bytes.md) +- [ADR-035: Documentation](./adr-035-documentation.md) +- [ADR-039: Peer-Behaviour](./adr-039-peer-behaviour.md) +- [ADR-060: Go-API-Stability](./adr-060-go-api-stability.md) +- [ADR-061: P2P-Refactor-Scope](./adr-061-p2p-refactor-scope.md) +- [ADR-065: Custom Event Indexing](./adr-065-custom-event-indexing.md) +- [ADR-068: Reverse-Sync](./adr-068-reverse-sync.md) +- [ADR-067: Mempool Refactor](./adr-067-mempool-refactor.md) +- [ADR-075: RPC Event Subscription Interface](./adr-075-rpc-subscription.md) +- [ADR-076: Combine Spec and Tendermint Repositories](./adr-076-combine-spec-repo.md) +- [ADR-081: Protocol Buffers Management](./adr-081-protobuf-mgmt.md) + +### Deprecated + +None + +### Rejected + +- [ADR-023: ABCI-Propose-tx](./adr-023-ABCI-propose-tx.md) +- [ADR-029: Check-Tx-Consensus](./adr-029-check-tx-consensus.md) +- [ADR-058: Event-Hashing](./adr-058-event-hashing.md) + +### Proposed + +- [ADR-007: Trust-Metric-Usage](./adr-007-trust-metric-usage.md) +- [ADR-012: Peer-Transport](./adr-012-peer-transport.md) +- [ADR-013: Symmetric-Crypto](./adr-013-symmetric-crypto.md) +- [ADR-022: ABCI-Errors](./adr-022-abci-errors.md) +- [ADR-030: Consensus-Refactor](./adr-030-consensus-refactor.md) +- [ADR-036: Empty Blocks via ABCI](./adr-036-empty-blocks-abci.md) +- [ADR-037: Deliver-Block](./adr-037-deliver-block.md) +- [ADR-038: Non-Zero-Start-Height](./adr-038-non-zero-start-height.md) +- [ADR-040: Blockchain Reactor Refactor](./adr-040-blockchain-reactor-refactor.md) +- [ADR-041: Proposer-Selection-via-ABCI](./adr-041-proposer-selection-via-abci.md) +- [ADR-042: State Sync Design](./adr-042-state-sync.md) +- [ADR-045: ABCI-Evidence](./adr-045-abci-evidence.md) +- [ADR-050: Improved Trusted Peering](./adr-050-improved-trusted-peering.md) +- [ADR-057: RPC](./adr-057-RPC.md) +- [ADR-064: Batch Verification](./adr-064-batch-verification.md) +- [ADR-069: Node Initialization](./adr-069-flexible-node-initialization.md) +- [ADR-071: Proposer-Based Timestamps](./adr-071-proposer-based-timestamps.md) +- [ADR-073: Adopt LibP2P](./adr-073-libp2p.md) +- [ADR-074: Migrate Timeout Parameters to Consensus Parameters](./adr-074-timeout-params.md) diff --git a/docs/architecture/adr-001-logging.md b/docs/architecture/adr-001-logging.md new file mode 100644 index 000000000..b5df8bf7f --- /dev/null +++ b/docs/architecture/adr-001-logging.md @@ -0,0 +1,216 @@ +# ADR 1: Logging + +## Context + +Current logging system in Tendermint is very static and not flexible enough. + +Issues: [358](https://github.com/tendermint/tendermint/issues/358), [375](https://github.com/tendermint/tendermint/issues/375). + +What we want from the new system: + +- per package dynamic log levels +- dynamic logger setting (logger tied to the processing struct) +- conventions +- be more visually appealing + +"dynamic" here means the ability to set smth in runtime. + +## Decision + +### 1) An interface + +First, we will need an interface for all of our libraries (`tmlibs`, Tendermint, etc.). My personal preference is go-kit `Logger` interface (see Appendix A.), but that is too much a bigger change. Plus we will still need levels. + +```go +# log.go +type Logger interface { + Debug(msg string, keyvals ...interface{}) error + Info(msg string, keyvals ...interface{}) error + Error(msg string, keyvals ...interface{}) error + + With(keyvals ...interface{}) Logger +} +``` + +On a side note: difference between `Info` and `Notice` is subtle. We probably +could do without `Notice`. Don't think we need `Panic` or `Fatal` as a part of +the interface. These funcs could be implemented as helpers. In fact, we already +have some in `tmlibs/common`. + +- `Debug` - extended output for devs +- `Info` - all that is useful for a user +- `Error` - errors + +`Notice` should become `Info`, `Warn` either `Error` or `Debug` depending on the message, `Crit` -> `Error`. + +This interface should go into `tmlibs/log`. All libraries which are part of the core (tendermint/tendermint) should obey it. + +### 2) Logger with our current formatting + +On top of this interface, we will need to implement a stdout logger, which will be used when Tendermint is configured to output logs to STDOUT. + +Many people say that they like the current output, so let's stick with it. + +``` +NOTE[2017-04-25|14:45:08] ABCI Replay Blocks module=consensus appHeight=0 storeHeight=0 stateHeight=0 +``` + +Couple of minor changes: + +``` +I[2017-04-25|14:45:08.322] ABCI Replay Blocks module=consensus appHeight=0 storeHeight=0 stateHeight=0 +``` + +Notice the level is encoded using only one char plus milliseconds. + +Note: there are many other formats out there like [logfmt](https://brandur.org/logfmt). + +This logger could be implemented using any logger - [logrus](https://github.com/sirupsen/logrus), [go-kit/log](https://github.com/go-kit/kit/tree/master/log), [zap](https://github.com/uber-go/zap), log15 so far as it + +a) supports coloring output
+b) is moderately fast (buffering)
+c) conforms to the new interface or adapter could be written for it
+d) is somewhat configurable
+ +go-kit is my favorite so far. Check out how easy it is to color errors in red https://github.com/go-kit/kit/blob/master/log/term/example_test.go#L12. Although, coloring could only be applied to the whole string :( + +``` +go-kit +: flexible, modular +go-kit “-”: logfmt format https://brandur.org/logfmt + +logrus +: popular, feature rich (hooks), API and output is more like what we want +logrus -: not so flexible +``` + +```go +# tm_logger.go +// NewTmLogger returns a logger that encodes keyvals to the Writer in +// tm format. +func NewTmLogger(w io.Writer) Logger { + return &tmLogger{kitlog.NewLogfmtLogger(w)} +} + +func (l tmLogger) SetLevel(level string() { + switch (level) { + case "debug": + l.sourceLogger = level.NewFilter(l.sourceLogger, level.AllowDebug()) + } +} + +func (l tmLogger) Info(msg string, keyvals ...interface{}) error { + l.sourceLogger.Log("msg", msg, keyvals...) +} + +# log.go +func With(logger Logger, keyvals ...interface{}) Logger { + kitlog.With(logger.sourceLogger, keyvals...) +} +``` + +Usage: + +```go +logger := log.NewTmLogger(os.Stdout) +logger.SetLevel(config.GetString("log_level")) +node.SetLogger(log.With(logger, "node", Name)) +``` + +**Other log formatters** + +In the future, we may want other formatters like JSONFormatter. + +``` +{ "level": "notice", "time": "2017-04-25 14:45:08.562471297 -0400 EDT", "module": "consensus", "msg": "ABCI Replay Blocks", "appHeight": 0, "storeHeight": 0, "stateHeight": 0 } +``` + +### 3) Dynamic logger setting + +https://dave.cheney.net/2017/01/23/the-package-level-logger-anti-pattern + +This is the hardest part and where the most work will be done. logger should be tied to the processing struct, or the context if it adds some fields to the logger. + +```go +type BaseService struct { + log log15.Logger + name string + started uint32 // atomic + stopped uint32 // atomic +... +} +``` + +BaseService already contains `log` field, so most of the structs embedding it should be fine. We should rename it to `logger`. + +The only thing missing is the ability to set logger: + +``` +func (bs *BaseService) SetLogger(l log.Logger) { + bs.logger = l +} +``` + +### 4) Conventions + +Important keyvals should go first. Example: + +``` +correct +I[2017-04-25|14:45:08.322] ABCI Replay Blocks module=consensus instance=1 appHeight=0 storeHeight=0 stateHeight=0 +``` + +not + +``` +wrong +I[2017-04-25|14:45:08.322] ABCI Replay Blocks module=consensus appHeight=0 storeHeight=0 stateHeight=0 instance=1 +``` + +for that in most cases you'll need to add `instance` field to a logger upon creating, not when u log a particular message: + +```go +colorFn := func(keyvals ...interface{}) term.FgBgColor { + for i := 1; i < len(keyvals); i += 2 { + if keyvals[i] == "instance" && keyvals[i+1] == "1" { + return term.FgBgColor{Fg: term.Blue} + } else if keyvals[i] == "instance" && keyvals[i+1] == "1" { + return term.FgBgColor{Fg: term.Red} + } + } + return term.FgBgColor{} + } +logger := term.NewLogger(os.Stdout, log.NewTmLogger, colorFn) + +c1 := NewConsensusReactor(...) +c1.SetLogger(log.With(logger, "instance", 1)) + +c2 := NewConsensusReactor(...) +c2.SetLogger(log.With(logger, "instance", 2)) +``` + +## Status + +Implemented + +## Consequences + +### Positive + +Dynamic logger, which could be turned off for some modules at runtime. Public interface for other projects using Tendermint libraries. + +### Negative + +We may loose the ability to color keys in keyvalue pairs. go-kit allow you to easily change foreground / background colors of the whole string, but not its parts. + +### Neutral + +## Appendix A. + +I really like a minimalistic approach go-kit took with his logger https://github.com/go-kit/kit/tree/master/log: + +``` +type Logger interface { + Log(keyvals ...interface{}) error +} +``` + +See [The Hunt for a Logger Interface](https://web.archive.org/web/20210902161539/https://go-talks.appspot.com/github.com/ChrisHines/talks/structured-logging/structured-logging.slide#1). The advantage is greater composability (check out how go-kit defines colored logging or log-leveled logging on top of this interface https://github.com/go-kit/kit/tree/master/log). diff --git a/docs/architecture/adr-002-event-subscription.md b/docs/architecture/adr-002-event-subscription.md new file mode 100644 index 000000000..4cc2affbc --- /dev/null +++ b/docs/architecture/adr-002-event-subscription.md @@ -0,0 +1,88 @@ +# ADR 2: Event Subscription + +## Context + +In the light client (or any other client), the user may want to **subscribe to +a subset of transactions** (rather than all of them) using `/subscribe?event=X`. For +example, I want to subscribe for all transactions associated with a particular +account. Same for fetching. The user may want to **fetch transactions based on +some filter** (rather than fetching all the blocks). For example, I want to get +all transactions for a particular account in the last two weeks (`tx's block time >= '2017-06-05'`). + +Now you can't even subscribe to "all txs" in Tendermint. + +The goal is a simple and easy to use API for doing that. + +![Tx Send Flow Diagram](img/tags1.png) + +## Decision + +ABCI app return tags with a `DeliverTx` response inside the `data` field (_for +now, later we may create a separate field_). Tags is a list of key-value pairs, +protobuf encoded. + +Example data: + +```json +{ + "abci.account.name": "Igor", + "abci.account.address": "0xdeadbeef", + "tx.gas": 7 +} +``` + +### Subscribing for transactions events + +If the user wants to receive only a subset of transactions, ABCI-app must +return a list of tags with a `DeliverTx` response. These tags will be parsed and +matched with the current queries (subscribers). If the query matches the tags, +subscriber will get the transaction event. + +``` +/subscribe?query="tm.event = Tx AND tx.hash = AB0023433CF0334223212243BDD AND abci.account.invoice.number = 22" +``` + +A new package must be developed to replace the current `events` package. It +will allow clients to subscribe to a different types of events in the future: + +``` +/subscribe?query="abci.account.invoice.number = 22" +/subscribe?query="abci.account.invoice.owner CONTAINS Igor" +``` + +### Fetching transactions + +This is a bit tricky because a) we want to support a number of indexers, all of +which have a different API b) we don't know whenever tags will be sufficient +for the most apps (I guess we'll see). + +``` +/txs/search?query="tx.hash = AB0023433CF0334223212243BDD AND abci.account.owner CONTAINS Igor" +/txs/search?query="abci.account.owner = Igor" +``` + +For historic queries we will need a indexing storage (Postgres, SQLite, ...). + +### Issues + +- https://github.com/tendermint/tendermint/issues/376 +- https://github.com/tendermint/tendermint/issues/287 +- https://github.com/tendermint/tendermint/issues/525 (related) + +## Status + +Implemented + +## Consequences + +### Positive + +- same format for event notifications and search APIs +- powerful enough query + +### Negative + +- performance of the `match` function (where we have too many queries / subscribers) +- there is an issue where there are too many txs in the DB + +### Neutral diff --git a/docs/architecture/adr-003-abci-app-rpc.md b/docs/architecture/adr-003-abci-app-rpc.md new file mode 100644 index 000000000..3bc46498c --- /dev/null +++ b/docs/architecture/adr-003-abci-app-rpc.md @@ -0,0 +1,34 @@ +# ADR 3: Must an ABCI-app have an RPC server? + +## Context + +ABCI-server could expose its own RPC-server and act as a proxy to Tendermint. + +The idea was for the Tendermint RPC to just be a transparent proxy to the app. +Clients need to talk to Tendermint for proofs, unless we burden all app devs +with exposing Tendermint proof stuff. Also seems less complex to lock down one +server than two, but granted it makes querying a bit more kludgy since it needs +to be passed as a `Query`. Also, **having a very standard rpc interface means +the light-client can work with all apps and handle proofs**. The only +app-specific logic is decoding the binary data to a more readable form (eg. +json). This is a huge advantage for code-reuse and standardization. + +## Decision + +We dont expose an RPC server on any of our ABCI-apps. + +## Status + +Implemented + +## Consequences + +### Positive + +- Unified interface for all apps + +### Negative + +- `Query` interface + +### Neutral diff --git a/docs/architecture/adr-004-historical-validators.md b/docs/architecture/adr-004-historical-validators.md new file mode 100644 index 000000000..7341c4735 --- /dev/null +++ b/docs/architecture/adr-004-historical-validators.md @@ -0,0 +1,38 @@ +# ADR 004: Historical Validators + +## Context + +Right now, we can query the present validator set, but there is no history. +If you were offline for a long time, there is no way to reconstruct past validators. This is needed for the light client and we agreed needs enhancement of the API. + +## Decision + +For every block, store a new structure that contains either the latest validator set, +or the height of the last block for which the validator set changed. Note this is not +the height of the block which returned the validator set change itself, but the next block, +ie. the first block it comes into effect for. + +Storing the validators will be handled by the `state` package. + +At some point in the future, we may consider more efficient storage in the case where the validators +are updated frequently - for instance by only saving the diffs, rather than the whole set. + +An alternative approach suggested keeping the validator set, or diffs of it, in a merkle IAVL tree. +While it might afford cheaper proofs that a validator set has not changed, it would be more complex, +and likely less efficient. + +## Status + +Implemented + +## Consequences + +### Positive + +- Can query old validator sets, with proof. + +### Negative + +- Writes an extra structure to disk with every block. + +### Neutral diff --git a/docs/architecture/adr-005-consensus-params.md b/docs/architecture/adr-005-consensus-params.md new file mode 100644 index 000000000..550e9fc0c --- /dev/null +++ b/docs/architecture/adr-005-consensus-params.md @@ -0,0 +1,85 @@ +# ADR 005: Consensus Params + +## Context + +Consensus critical parameters controlling blockchain capacity have until now been hard coded, loaded from a local config, or neglected. +Since they may be need to be different in different networks, and potentially to evolve over time within +networks, we seek to initialize them in a genesis file, and expose them through the ABCI. + +While we have some specific parameters now, like maximum block and transaction size, we expect to have more in the future, +such as a period over which evidence is valid, or the frequency of checkpoints. + +## Decision + +### ConsensusParams + +No consensus critical parameters should ever be found in the `config.toml`. + +A new `ConsensusParams` is optionally included in the `genesis.json` file, +and loaded into the `State`. Any items not included are set to their default value. +A value of 0 is undefined (see ABCI, below). A value of -1 is used to indicate the parameter does not apply. +The parameters are used to determine the validity of a block (and tx) via the union of all relevant parameters. + +``` +type ConsensusParams struct { + BlockSize + TxSize + BlockGossip +} + +type BlockSize struct { + MaxBytes int + MaxTxs int + MaxGas int +} + +type TxSize struct { + MaxBytes int + MaxGas int +} + +type BlockGossip struct { + BlockPartSizeBytes int +} +``` + +The `ConsensusParams` can evolve over time by adding new structs that cover different aspects of the consensus rules. + +The `BlockPartSizeBytes` and the `BlockSize.MaxBytes` are enforced to be greater than 0. +The former because we need a part size, the latter so that we always have at least some sanity check over the size of blocks. + +### ABCI + +#### InitChain + +InitChain currently takes the initial validator set. It should be extended to also take parts of the ConsensusParams. +There is some case to be made for it to take the entire Genesis, except there may be things in the genesis, +like the BlockPartSize, that the app shouldn't really know about. + +#### EndBlock + +The EndBlock response includes a `ConsensusParams`, which includes BlockSize and TxSize, but not BlockGossip. +Other param struct can be added to `ConsensusParams` in the future. +The `0` value is used to denote no change. +Any other value will update that parameter in the `State.ConsensusParams`, to be applied for the next block. +Tendermint should have hard-coded upper limits as sanity checks. + +## Status + +Implemented + +## Consequences + +### Positive + +- Alternative capacity limits and consensus parameters can be specified without re-compiling the software. +- They can also change over time under the control of the application + +### Negative + +- More exposed parameters is more complexity +- Different rules at different heights in the blockchain complicates fast sync + +### Neutral + +- The TxSize, which checks validity, may be in conflict with the config's `max_block_size_tx`, which determines proposal sizes diff --git a/docs/architecture/adr-006-trust-metric.md b/docs/architecture/adr-006-trust-metric.md new file mode 100644 index 000000000..608978207 --- /dev/null +++ b/docs/architecture/adr-006-trust-metric.md @@ -0,0 +1,229 @@ +# ADR 006: Trust Metric Design + +## Context + +The proposed trust metric will allow Tendermint to maintain local trust rankings for peers it has directly interacted with, which can then be used to implement soft security controls. The calculations were obtained from the [TrustGuard](https://dl.acm.org/citation.cfm?id=1060808) project. + +### Background + +The Tendermint Core project developers would like to improve Tendermint security and reliability by keeping track of the level of trustworthiness peers have demonstrated within the peer-to-peer network. This way, undesirable outcomes from peers will not immediately result in them being dropped from the network (potentially causing drastic changes to take place). Instead, peers behavior can be monitored with appropriate metrics and be removed from the network once Tendermint Core is certain the peer is a threat. For example, when the PEXReactor makes a request for peers network addresses from a already known peer, and the returned network addresses are unreachable, this untrustworthy behavior should be tracked. Returning a few bad network addresses probably shouldn’t cause a peer to be dropped, while excessive amounts of this behavior does qualify the peer being dropped. + +Trust metrics can be circumvented by malicious nodes through the use of strategic oscillation techniques, which adapts the malicious node’s behavior pattern in order to maximize its goals. For instance, if the malicious node learns that the time interval of the Tendermint trust metric is _X_ hours, then it could wait _X_ hours in-between malicious activities. We could try to combat this issue by increasing the interval length, yet this will make the system less adaptive to recent events. + +Instead, having shorter intervals, but keeping a history of interval values, will give our metric the flexibility needed in order to keep the network stable, while also making it resilient against a strategic malicious node in the Tendermint peer-to-peer network. Also, the metric can access trust data over a rather long period of time while not greatly increasing its history size by aggregating older history values over a larger number of intervals, and at the same time, maintain great precision for the recent intervals. This approach is referred to as fading memories, and closely resembles the way human beings remember their experiences. The trade-off to using history data is that the interval values should be preserved in-between executions of the node. + +### References + +S. Mudhakar, L. Xiong, and L. Liu, “TrustGuard: Countering Vulnerabilities in Reputation Management for Decentralized Overlay Networks,” in _Proceedings of the 14th international conference on World Wide Web, pp. 422-431_, May 2005. + +## Decision + +The proposed trust metric will allow a developer to inform the trust metric store of all good and bad events relevant to a peer's behavior, and at any time, the metric can be queried for a peer's current trust ranking. + +The three subsections below will cover the process being considered for calculating the trust ranking, the concept of the trust metric store, and the interface for the trust metric. + +### Proposed Process + +The proposed trust metric will count good and bad events relevant to the object, and calculate the percent of counters that are good over an interval with a predefined duration. This is the procedure that will continue for the life of the trust metric. When the trust metric is queried for the current **trust value**, a resilient equation will be utilized to perform the calculation. + +The equation being proposed resembles a Proportional-Integral-Derivative (PID) controller used in control systems. The proportional component allows us to be sensitive to the value of the most recent interval, while the integral component allows us to incorporate trust values stored in the history data, and the derivative component allows us to give weight to sudden changes in the behavior of a peer. We compute the trust value of a peer in interval i based on its current trust ranking, its trust rating history prior to interval _i_ (over the past _maxH_ number of intervals) and its trust ranking fluctuation. We will break up the equation into the three components. + +```math +(1) Proportional Value = a * R[i] +``` + +where _R_[*i*] denotes the raw trust value at time interval _i_ (where _i_ == 0 being current time) and _a_ is the weight applied to the contribution of the current reports. The next component of our equation uses a weighted sum over the last _maxH_ intervals to calculate the history value for time _i_: + +`H[i] =` ![formula1](img/formula1.png "Weighted Sum Formula") + +The weights can be chosen either optimistically or pessimistically. An optimistic weight creates larger weights for newer history data values, while the the pessimistic weight creates larger weights for time intervals with lower scores. The default weights used during the calculation of the history value are optimistic and calculated as _Wk_ = 0.8^_k_, for time interval _k_. With the history value available, we can now finish calculating the integral value: + +```math +(2) Integral Value = b * H[i] +``` + +Where _H_[*i*] denotes the history value at time interval _i_ and _b_ is the weight applied to the contribution of past performance for the object being measured. The derivative component will be calculated as follows: + +```math +D[i] = R[i] – H[i] + +(3) Derivative Value = c(D[i]) * D[i] +``` + +Where the value of _c_ is selected based on the _D_[*i*] value relative to zero. The default selection process makes _c_ equal to 0 unless _D_[*i*] is a negative value, in which case c is equal to 1. The result is that the maximum penalty is applied when current behavior is lower than previously experienced behavior. If the current behavior is better than the previously experienced behavior, then the Derivative Value has no impact on the trust value. With the three components brought together, our trust value equation is calculated as follows: + +```math +TrustValue[i] = a * R[i] + b * H[i] + c(D[i]) * D[i] +``` + +As a performance optimization that will keep the amount of raw interval data being saved to a reasonable size of _m_, while allowing us to represent 2^_m_ - 1 history intervals, we can employ the fading memories technique that will trade space and time complexity for the precision of the history data values by summarizing larger quantities of less recent values. While our equation above attempts to access up to _maxH_ (which can be 2^_m_ - 1), we will map those requests down to _m_ values using equation 4 below: + +```math +(4) j = index, where index > 0 +``` + +Where _j_ is one of _(0, 1, 2, … , m – 1)_ indices used to access history interval data. Now we can access the raw intervals using the following calculations: + +```math +R[0] = raw data for current time interval +``` + +`R[j] =` ![formula2](img/formula2.png "Fading Memories Formula") + +### Trust Metric Store + +Similar to the P2P subsystem AddrBook, the trust metric store will maintain information relevant to Tendermint peers. Additionally, the trust metric store will ensure that trust metrics will only be active for peers that a node is currently and directly engaged with. + +Reactors will provide a peer key to the trust metric store in order to retrieve the associated trust metric. The trust metric can then record new positive and negative events experienced by the reactor, as well as provided the current trust score calculated by the metric. + +When the node is shutting down, the trust metric store will save history data for trust metrics associated with all known peers. This saved information allows experiences with a peer to be preserved across node executions, which can span a tracking windows of days or weeks. The trust history data is loaded automatically during OnStart. + +### Interface Detailed Design + +Each trust metric allows for the recording of positive/negative events, querying the current trust value/score, and the stopping/pausing of tracking over time intervals. This can be seen below: + +```go +// TrustMetric - keeps track of peer reliability +type TrustMetric struct { + // Private elements. +} + +// Pause tells the metric to pause recording data over time intervals. +// All method calls that indicate events will unpause the metric +func (tm *TrustMetric) Pause() {} + +// Stop tells the metric to stop recording data over time intervals +func (tm *TrustMetric) Stop() {} + +// BadEvents indicates that an undesirable event(s) took place +func (tm *TrustMetric) BadEvents(num int) {} + +// GoodEvents indicates that a desirable event(s) took place +func (tm *TrustMetric) GoodEvents(num int) {} + +// TrustValue gets the dependable trust value; always between 0 and 1 +func (tm *TrustMetric) TrustValue() float64 {} + +// TrustScore gets a score based on the trust value always between 0 and 100 +func (tm *TrustMetric) TrustScore() int {} + +// NewMetric returns a trust metric with the default configuration +func NewMetric() *TrustMetric {} + +//------------------------------------------------------------------------------------------------ +// For example + +tm := NewMetric() + +tm.BadEvents(1) +score := tm.TrustScore() + +tm.Stop() +``` + +Some of the trust metric parameters can be configured. The weight values should probably be left alone in more cases, yet the time durations for the tracking window and individual time interval should be considered. + +```go +// TrustMetricConfig - Configures the weight functions and time intervals for the metric +type TrustMetricConfig struct { + // Determines the percentage given to current behavior + ProportionalWeight float64 + + // Determines the percentage given to prior behavior + IntegralWeight float64 + + // The window of time that the trust metric will track events across. + // This can be set to cover many days without issue + TrackingWindow time.Duration + + // Each interval should be short for adapability. + // Less than 30 seconds is too sensitive, + // and greater than 5 minutes will make the metric numb + IntervalLength time.Duration +} + +// DefaultConfig returns a config with values that have been tested and produce desirable results +func DefaultConfig() TrustMetricConfig {} + +// NewMetricWithConfig returns a trust metric with a custom configuration +func NewMetricWithConfig(tmc TrustMetricConfig) *TrustMetric {} + +//------------------------------------------------------------------------------------------------ +// For example + +config := TrustMetricConfig{ + TrackingWindow: time.Minute * 60 * 24, // one day + IntervalLength: time.Minute * 2, +} + +tm := NewMetricWithConfig(config) + +tm.BadEvents(10) +tm.Pause() +tm.GoodEvents(1) // becomes active again +``` + +A trust metric store should be created with a DB that has persistent storage so it can save history data across node executions. All trust metrics instantiated by the store will be created with the provided TrustMetricConfig configuration. + +When you attempt to fetch the trust metric for a peer, and an entry does not exist in the trust metric store, a new metric is automatically created and the entry made within the store. + +In additional to the fetching method, GetPeerTrustMetric, the trust metric store provides a method to call when a peer has disconnected from the node. This is so the metric can be paused (history data will not be saved) for periods of time when the node is not having direct experiences with the peer. + +```go +// TrustMetricStore - Manages all trust metrics for peers +type TrustMetricStore struct { + cmn.BaseService + + // Private elements +} + +// OnStart implements Service +func (tms *TrustMetricStore) OnStart(context.Context) error { return nil } + +// OnStop implements Service +func (tms *TrustMetricStore) OnStop() {} + +// NewTrustMetricStore returns a store that saves data to the DB +// and uses the config when creating new trust metrics +func NewTrustMetricStore(db dbm.DB, tmc TrustMetricConfig) *TrustMetricStore {} + +// Size returns the number of entries in the trust metric store +func (tms *TrustMetricStore) Size() int {} + +// GetPeerTrustMetric returns a trust metric by peer key +func (tms *TrustMetricStore) GetPeerTrustMetric(key string) *TrustMetric {} + +// PeerDisconnected pauses the trust metric associated with the peer identified by the key +func (tms *TrustMetricStore) PeerDisconnected(key string) {} + +//------------------------------------------------------------------------------------------------ +// For example + +db := dbm.NewDB("trusthistory", "goleveldb", dirPathStr) +tms := NewTrustMetricStore(db, DefaultConfig()) + +tm := tms.GetPeerTrustMetric(key) +tm.BadEvents(1) + +tms.PeerDisconnected(key) +``` + +## Status + +Approved. + +## Consequences + +### Positive + +- The trust metric will allow Tendermint to make non-binary security and reliability decisions +- Will help Tendermint implement deterrents that provide soft security controls, yet avoids disruption on the network +- Will provide useful profiling information when analyzing performance over time related to peer interaction + +### Negative + +- Requires saving the trust metric history data across node executions + +### Neutral + +- Keep in mind that, good events need to be recorded just as bad events do using this implementation diff --git a/docs/architecture/adr-007-trust-metric-usage.md b/docs/architecture/adr-007-trust-metric-usage.md new file mode 100644 index 000000000..0daa4bb33 --- /dev/null +++ b/docs/architecture/adr-007-trust-metric-usage.md @@ -0,0 +1,106 @@ +# ADR 007: Trust Metric Usage Guide + +## Context + +Tendermint is required to monitor peer quality in order to inform its peer dialing and peer exchange strategies. + +When a node first connects to the network, it is important that it can quickly find good peers. +Thus, while a node has fewer connections, it should prioritize connecting to higher quality peers. +As the node becomes well connected to the rest of the network, it can dial lesser known or lesser +quality peers and help assess their quality. Similarly, when queried for peers, a node should make +sure they dont return low quality peers. + +Peer quality can be tracked using a trust metric that flags certain behaviors as good or bad. When enough +bad behavior accumulates, we can mark the peer as bad and disconnect. +For example, when the PEXReactor makes a request for peers network addresses from an already known peer, and the returned network addresses are unreachable, this undesirable behavior should be tracked. Returning a few bad network addresses probably shouldn’t cause a peer to be dropped, while excessive amounts of this behavior does qualify the peer for removal. The originally proposed approach and design document for the trust metric can be found in the [ADR 006](adr-006-trust-metric.md) document. + +The trust metric implementation allows a developer to obtain a peer's trust metric from a trust metric store, and track good and bad events relevant to a peer's behavior, and at any time, the peer's metric can be queried for a current trust value. The current trust value is calculated with a formula that utilizes current behavior, previous behavior, and change between the two. Current behavior is calculated as the percentage of good behavior within a time interval. The time interval is short; probably set between 30 seconds and 5 minutes. On the other hand, the historic data can estimate a peer's behavior over days worth of tracking. At the end of a time interval, the current behavior becomes part of the historic data, and a new time interval begins with the good and bad counters reset to zero. + +These are some important things to keep in mind regarding how the trust metrics handle time intervals and scoring: + +- Each new time interval begins with a perfect score +- Bad events quickly bring the score down and good events cause the score to slowly rise +- When the time interval is over, the percentage of good events becomes historic data. + +Some useful information about the inner workings of the trust metric: + +- When a trust metric is first instantiated, a timer (ticker) periodically fires in order to handle transitions between trust metric time intervals +- If a peer is disconnected from a node, the timer should be paused, since the node is no longer connected to that peer +- The ability to pause the metric is supported with the store **PeerDisconnected** method and the metric **Pause** method +- After a pause, if a good or bad event method is called on a metric, it automatically becomes unpaused and begins a new time interval. + +## Decision + +The trust metric capability is now available, yet, it still leaves the question of how should it be applied throughout Tendermint in order to properly track the quality of peers? + +### Proposed Process + +Peers are managed using an address book and a trust metric: + +- The address book keeps a record of peers and provides selection methods +- The trust metric tracks the quality of the peers + +#### Presence in Address Book + +Outbound peers are added to the address book before they are dialed, +and inbound peers are added once the peer connection is set up. +Peers are also added to the address book when they are received in response to +a pexRequestMessage. + +While a node has less than `needAddressThreshold`, it will periodically request more, +via pexRequestMessage, from randomly selected peers and from newly dialed outbound peers. + +When a new address is added to an address book that has more than `0.5*needAddressThreshold` addresses, +then with some low probability, a randomly chosen low quality peer is removed. + +#### Outbound Peers + +Peers attempt to maintain a minimum number of outbound connections by +repeatedly querying the address book for peers to connect to. +While a node has few to no outbound connections, the address book is biased to return +higher quality peers. As the node increases the number of outbound connections, +the address book is biased to return less-vetted or lower-quality peers. + +#### Inbound Peers + +Peers also maintain a maximum number of total connections, MaxNumPeers. +If a peer has MaxNumPeers, new incoming connections will be accepted with low probability. +When such a new connection is accepted, the peer disconnects from a probabilistically chosen low ranking peer +so it does not exceed MaxNumPeers. + +#### Peer Exchange + +When a peer receives a pexRequestMessage, it returns a random sample of high quality peers from the address book. Peers with no score or low score should not be inclided in a response to pexRequestMessage. + +#### Peer Quality + +Peer quality is tracked in the connection and across the reactors by storing the TrustMetric in the peer's +thread safe Data store. + +Peer behavior is then defined as one of the following: + +- Fatal - something outright malicious that causes us to disconnect the peer and ban it from the address book for some amount of time +- Bad - Any kind of timeout, messages that don't unmarshal, fail other validity checks, or messages we didn't ask for or aren't expecting (usually worth one bad event) +- Neutral - Unknown channels/message types/version upgrades (no good or bad events recorded) +- Correct - Normal correct behavior (worth one good event) +- Good - some random majority of peers per reactor sending us useful messages (worth more than one good event). + +Note that Fatal behavior causes us to remove the peer, and neutral behavior does not affect the score. + +## Status + +Proposed. + +## Consequences + +### Positive + +- Bringing the address book and trust metric store together will cause the network to be built in a way that encourages greater security and reliability. + +### Negative + +- TBD + +### Neutral + +- Keep in mind that, good events need to be recorded just as bad events do using this implementation. diff --git a/docs/architecture/adr-008-priv-validator.md b/docs/architecture/adr-008-priv-validator.md new file mode 100644 index 000000000..a3d31048a --- /dev/null +++ b/docs/architecture/adr-008-priv-validator.md @@ -0,0 +1,35 @@ +# ADR 008: SocketPV + +Tendermint node's should support only two in-process PrivValidator +implementations: + +- FilePV uses an unencrypted private key in a "priv_validator.json" file - no + configuration required (just `tendermint init validator`). +- TCPVal and IPCVal use TCP and Unix sockets respectively to send signing requests + to another process - the user is responsible for starting that process themselves. + +Both TCPVal and IPCVal addresses can be provided via flags at the command line +or in the configuration file; TCPVal addresses must be of the form +`tcp://:` and IPCVal addresses `unix:///path/to/file.sock` - +doing so will cause Tendermint to ignore any private validator files. + +TCPVal will listen on the given address for incoming connections from an external +private validator process. It will halt any operation until at least one external +process successfully connected. + +The external priv_validator process will dial the address to connect to +Tendermint, and then Tendermint will send requests on the ensuing connection to +sign votes and proposals. Thus the external process initiates the connection, +but the Tendermint process makes all requests. In a later stage we're going to +support multiple validators for fault tolerance. To prevent double signing they +need to be synced, which is deferred to an external solution (see #1185). + +Conversely, IPCVal will make an outbound connection to an existing socket opened +by the external validator process. + +In addition, Tendermint will provide implementations that can be run in that +external process. These include: + +- FilePV will encrypt the private key, and the user must enter password to + decrypt key when process is started. +- LedgerPV uses a Ledger Nano S to handle all signing. diff --git a/docs/architecture/adr-009-ABCI-design.md b/docs/architecture/adr-009-ABCI-design.md new file mode 100644 index 000000000..c1ad3a614 --- /dev/null +++ b/docs/architecture/adr-009-ABCI-design.md @@ -0,0 +1,271 @@ +# ADR 009: ABCI UX Improvements + +## Changelog + +23-06-2018: Some minor fixes from review +07-06-2018: Some updates based on discussion with Jae +07-06-2018: Initial draft to match what was released in ABCI v0.11 + +## Context + +The ABCI was first introduced in late 2015. It's purpose is to be: + +- a generic interface between state machines and their replication engines +- agnostic to the language the state machine is written in +- agnostic to the replication engine that drives it + +This means ABCI should provide an interface for both pluggable applications and +pluggable consensus engines. + +To achieve this, it uses Protocol Buffers (proto3) for message types. The dominant +implementation is in Go. + +After some recent discussions with the community on github, the following were +identified as pain points: + +- Amino encoded types +- Managing validator sets +- Imports in the protobuf file + +See the [references](#references) for more. + +### Imports + +The native proto library in Go generates inflexible and verbose code. +Many in the Go community have adopted a fork called +[gogoproto](https://github.com/gogo/protobuf) that provides a +variety of features aimed to improve the developer experience. +While `gogoproto` is nice, it creates an additional dependency, and compiling +the protobuf types for other languages has been reported to fail when `gogoproto` is used. + +### Amino + +Amino is an encoding protocol designed to improve over insufficiencies of protobuf. +It's goal is to be proto4. + +Many people are frustrated by incompatibility with protobuf, +and with the requirement for Amino to be used at all within ABCI. + +We intend to make Amino successful enough that we can eventually use it for ABCI +message types directly. By then it should be called proto4. In the meantime, +we want it to be easy to use. + +### PubKey + +PubKeys are encoded using Amino (and before that, go-wire). +Ideally, PubKeys are an interface type where we don't know all the +implementation types, so its unfitting to use `oneof` or `enum`. + +### Addresses + +The address for ED25519 pubkey is the RIPEMD160 of the Amino +encoded pubkey. This introduces an Amino dependency in the address generation, +a functionality that is widely required and should be easy to compute as +possible. + +### Validators + +To change the validator set, applications can return a list of validator updates +with ResponseEndBlock. In these updates, the public key _must_ be included, +because Tendermint requires the public key to verify validator signatures. This +means ABCI developers have to work with PubKeys. That said, it would also be +convenient to work with address information, and for it to be simple to do so. + +### AbsentValidators + +Tendermint also provides a list of validators in BeginBlock who did not sign the +last block. This allows applications to reflect availability behavior in the +application, for instance by punishing validators for not having votes included +in commits. + +### InitChain + +Tendermint passes in a list of validators here, and nothing else. It would +benefit the application to be able to control the initial validator set. For +instance the genesis file could include application-based information about the +initial validator set that the application could process to determine the +initial validator set. Additionally, InitChain would benefit from getting all +the genesis information. + +### Header + +ABCI provides the Header in RequestBeginBlock so the application can have +important information about the latest state of the blockchain. + +## Decision + +### Imports + +Move away from gogoproto. In the short term, we will just maintain a second +protobuf file without the gogoproto annotations. In the medium term, we will +make copies of all the structs in Golang and shuttle back and forth. In the long +term, we will use Amino. + +### Amino + +To simplify ABCI application development in the short term, +Amino will be completely removed from the ABCI: + +- It will not be required for PubKey encoding +- It will not be required for computing PubKey addresses + +That said, we are working to make Amino a huge success, and to become proto4. +To facilitate adoption and cross-language compatibility in the near-term, Amino +v1 will: + +- be fully compatible with the subset of proto3 that excludes `oneof` +- use the Amino prefix system to provide interface types, as opposed to `oneof` + style union types. + +That said, an Amino v2 will be worked on to improve the performance of the +format and its useability in cryptographic applications. + +### PubKey + +Encoding schemes infect software. As a generic middleware, ABCI aims to have +some cross scheme compatibility. For this it has no choice but to include opaque +bytes from time to time. While we will not enforce Amino encoding for these +bytes yet, we need to provide a type system. The simplest way to do this is to +use a type string. + +PubKey will now look like: + +``` +message PubKey { + string type + bytes data +} +``` + +where `type` can be: + +- "ed225519", with `data = ` +- "secp256k1", with `data = <33-byte OpenSSL compressed pubkey>` + +As we want to retain flexibility here, and since ideally, PubKey would be an +interface type, we do not use `enum` or `oneof`. + +### Addresses + +To simplify and improve computing addresses, we change it to the first 20-bytes of the SHA256 +of the raw 32-byte public key. + +We continue to use the Bitcoin address scheme for secp256k1 keys. + +### Validators + +Add a `bytes address` field: + +``` +message Validator { + bytes address + PubKey pub_key + int64 power +} +``` + +### RequestBeginBlock and AbsentValidators + +To simplify this, RequestBeginBlock will include the complete validator set, +including the address, and voting power of each validator, along +with a boolean for whether or not they voted: + +``` +message RequestBeginBlock { + bytes hash + Header header + LastCommitInfo last_commit_info + repeated Evidence byzantine_validators +} + +message LastCommitInfo { + int32 CommitRound + repeated SigningValidator validators +} + +message SigningValidator { + Validator validator + bool signed_last_block +} +``` + +Note that in Validators in RequestBeginBlock, we DO NOT include public keys. Public keys are +larger than addresses and in the future, with quantum computers, will be much +larger. The overhead of passing them, especially during fast-sync, is +significant. + +Additional, addresses are changing to be simpler to compute, further removing +the need to include pubkeys here. + +In short, ABCI developers must be aware of both addresses and public keys. + +### ResponseEndBlock + +Since ResponseEndBlock includes Validator, it must now include their address. + +### InitChain + +Change RequestInitChain to give the app all the information from the genesis file: + +``` +message RequestInitChain { + int64 time + string chain_id + ConsensusParams consensus_params + repeated Validator validators + bytes app_state_bytes +} +``` + +Change ResponseInitChain to allow the app to specify the initial validator set +and consensus parameters. + +``` +message ResponseInitChain { + ConsensusParams consensus_params + repeated Validator validators +} +``` + +### Header + +Now that Tendermint Amino will be compatible with proto3, the Header in ABCI +should exactly match the Tendermint header - they will then be encoded +identically in ABCI and in Tendermint Core. + +## Status + +Implemented + +## Consequences + +### Positive + +- Easier for developers to build on the ABCI +- ABCI and Tendermint headers are identically serialized + +### Negative + +- Maintenance overhead of alternative type encoding scheme +- Performance overhead of passing all validator info every block (at least its + only addresses, and not also pubkeys) +- Maintenance overhead of duplicate types + +### Neutral + +- ABCI developers must know about validator addresses + +## References + +- [ABCI v0.10.3 Specification (before this + proposal)](https://github.com/tendermint/abci/blob/v0.10.3/specification.rst) +- [ABCI v0.11.0 Specification (implementing first draft of this + proposal)](https://github.com/tendermint/abci/blob/v0.11.0/specification.md) +- [Ed25519 addresses](https://github.com/tendermint/go-crypto/issues/103) +- [InitChain contains the + Genesis](https://github.com/tendermint/abci/issues/216) +- [PubKeys](https://github.com/tendermint/tendermint/issues/1524) +- [Notes on + Header](https://github.com/tendermint/tendermint/issues/1605) +- [Gogoproto issues](https://github.com/tendermint/abci/issues/256) +- [Absent Validators](https://github.com/tendermint/abci/issues/231) diff --git a/docs/architecture/adr-010-crypto-changes.md b/docs/architecture/adr-010-crypto-changes.md new file mode 100644 index 000000000..41d15da35 --- /dev/null +++ b/docs/architecture/adr-010-crypto-changes.md @@ -0,0 +1,77 @@ +# ADR 010: Crypto Changes + +## Context + +Tendermint is a cryptographic protocol that uses and composes a variety of cryptographic primitives. + +After nearly 4 years of development, Tendermint has recently undergone multiple security reviews to search for vulnerabilities and to assess the the use and composition of cryptographic primitives. + +### Hash Functions + +Tendermint uses RIPEMD160 universally as a hash function, most notably in its Merkle tree implementation. + +RIPEMD160 was chosen because it provides the shortest fingerprint that is long enough to be considered secure (ie. birthday bound of 80-bits). +It was also developed in the open academic community, unlike NSA-designed algorithms like SHA256. + +That said, the cryptographic community appears to unanimously agree on the security of SHA256. It has become a universal standard, especially now that SHA1 is broken, being required in TLS connections and having optimized support in hardware. + +### Merkle Trees + +Tendermint uses a simple Merkle tree to compute digests of large structures like transaction batches +and even blockchain headers. The Merkle tree length prefixes byte arrays before concatenating and hashing them. +It uses RIPEMD160. + +### Addresses + +ED25519 addresses are computed using the RIPEMD160 of the Amino encoding of the public key. +RIPEMD160 is generally considered an outdated hash function, and is much slower +than more modern functions like SHA256 or Blake2. + +### Authenticated Encryption + +Tendermint P2P connections use authenticated encryption to provide privacy and authentication in the communications. +This is done using the simple Station-to-Station protocol with the NaCL Ed25519 library. + +While there have been no vulnerabilities found in the implementation, there are some concerns: + +- NaCL uses Salsa20, a not-widely used and relatively out-dated stream cipher that has been obsoleted by ChaCha20 +- Connections use RIPEMD160 to compute a value that is used for the encryption nonce with subtle requirements on how it's used + +## Decision + +### Hash Functions + +Use the first 20-bytes of the SHA256 hash instead of RIPEMD160 for everything + +### Merkle Trees + +TODO + +### Addresses + +Compute ED25519 addresses as the first 20-bytes of the SHA256 of the raw 32-byte public key + +### Authenticated Encryption + +Make the following changes: + +- Use xChaCha20 instead of xSalsa20 - https://github.com/tendermint/tendermint/issues/1124 +- Use an HKDF instead of RIPEMD160 to compute nonces - https://github.com/tendermint/tendermint/issues/1165 + +## Status + +Implemented + +## Consequences + +### Positive + +- More modern and standard cryptographic functions with wider adoption and hardware acceleration + +### Negative + +- Exact authenticated encryption construction isn't already provided in a well-used library + +### Neutral + +## References diff --git a/docs/architecture/adr-011-monitoring.md b/docs/architecture/adr-011-monitoring.md new file mode 100644 index 000000000..e4b62c261 --- /dev/null +++ b/docs/architecture/adr-011-monitoring.md @@ -0,0 +1,116 @@ +# ADR 011: Monitoring + +## Changelog + +08-06-2018: Initial draft +11-06-2018: Reorg after @xla comments +13-06-2018: Clarification about usage of labels + +## Context + +In order to bring more visibility into Tendermint, we would like it to report +metrics and, maybe later, traces of transactions and RPC queries. See +https://github.com/tendermint/tendermint/issues/986. + +A few solutions were considered: + +1. [Prometheus](https://prometheus.io) + a) Prometheus API + b) [go-kit metrics package](https://github.com/go-kit/kit/tree/master/metrics) as an interface plus Prometheus + c) [telegraf](https://github.com/influxdata/telegraf) + d) new service, which will listen to events emitted by pubsub and report metrics +2. [OpenCensus](https://opencensus.io/introduction/) + +### 1. Prometheus + +Prometheus seems to be the most popular product out there for monitoring. It has +a Go client library, powerful queries, alerts. + +**a) Prometheus API** + +We can commit to using Prometheus in Tendermint, but I think Tendermint users +should be free to choose whatever monitoring tool they feel will better suit +their needs (if they don't have existing one already). So we should try to +abstract interface enough so people can switch between Prometheus and other +similar tools. + +**b) go-kit metrics package as an interface** + +metrics package provides a set of uniform interfaces for service +instrumentation and offers adapters to popular metrics packages: + +https://godoc.org/github.com/go-kit/kit/metrics#pkg-subdirectories + +Comparing to Prometheus API, we're losing customisability and control, but gaining +freedom in choosing any instrument from the above list given we will extract +metrics creation into a separate function (see "providers" in node/node.go). + +**c) telegraf** + +Unlike already discussed options, telegraf does not require modifying Tendermint +source code. You create something called an input plugin, which polls +Tendermint RPC every second and calculates the metrics itself. + +While it may sound good, but some metrics we want to report are not exposed via +RPC or pubsub, therefore can't be accessed externally. + +**d) service, listening to pubsub** + +Same issue as the above. + +### 2. opencensus + +opencensus provides both metrics and tracing, which may be important in the +future. It's API looks different from go-kit and Prometheus, but looks like it +covers everything we need. + +Unfortunately, OpenCensus go client does not define any +interfaces, so if we want to abstract away metrics we +will need to write interfaces ourselves. + +### List of metrics + +| | Name | Type | Description | +| --- | ------------------------------------ | ------ | ----------------------------------------------------------------------------- | +| A | consensus_height | Gauge | | +| A | consensus_validators | Gauge | Number of validators who signed | +| A | consensus_validators_power | Gauge | Total voting power of all validators | +| A | consensus_missing_validators | Gauge | Number of validators who did not sign | +| A | consensus_missing_validators_power | Gauge | Total voting power of the missing validators | +| A | consensus_byzantine_validators | Gauge | Number of validators who tried to double sign | +| A | consensus_byzantine_validators_power | Gauge | Total voting power of the byzantine validators | +| A | consensus_block_interval | Timing | Time between this and last block (Block.Header.Time) | +| | consensus_block_time | Timing | Time to create a block (from creating a proposal to commit) | +| | consensus_time_between_blocks | Timing | Time between committing last block and (receiving proposal creating proposal) | +| A | consensus_rounds | Gauge | Number of rounds | +| | consensus_prevotes | Gauge | | +| | consensus_precommits | Gauge | | +| | consensus_prevotes_total_power | Gauge | | +| | consensus_precommits_total_power | Gauge | | +| A | consensus_num_txs | Gauge | | +| A | mempool_size | Gauge | | +| A | consensus_total_txs | Gauge | | +| A | consensus_block_size | Gauge | In bytes | +| A | p2p_peers | Gauge | Number of peers node's connected to | + +`A` - will be implemented in the fist place. + +**Proposed solution** + +## Status + +Implemented + +## Consequences + +### Positive + +Better visibility, support of variety of monitoring backends + +### Negative + +One more library to audit, messing metrics reporting code with business domain. + +### Neutral + +- diff --git a/docs/architecture/adr-012-peer-transport.md b/docs/architecture/adr-012-peer-transport.md new file mode 100644 index 000000000..1cf4fb80b --- /dev/null +++ b/docs/architecture/adr-012-peer-transport.md @@ -0,0 +1,113 @@ +# ADR 012: PeerTransport + +## Context + +One of the more apparent problems with the current architecture in the p2p +package is that there is no clear separation of concerns between different +components. Most notably the `Switch` is currently doing physical connection +handling. An artifact is the dependency of the Switch on +`[config.P2PConfig`](https://github.com/tendermint/tendermint/blob/05a76fb517f50da27b4bfcdc7b4cf185fc61eff6/config/config.go#L272-L339). + +Addresses: + +- [#2046](https://github.com/tendermint/tendermint/issues/2046) +- [#2047](https://github.com/tendermint/tendermint/issues/2047) + +First iteraton in [#2067](https://github.com/tendermint/tendermint/issues/2067) + +## Decision + +Transport concerns will be handled by a new component (`PeerTransport`) which +will provide Peers at its boundary to the caller. In turn `Switch` will use +this new component accept new `Peer`s and dial them based on `NetAddress`. + +### PeerTransport + +Responsible for emitting and connecting to Peers. The implementation of `Peer` +is left to the transport, which implies that the chosen transport dictates the +characteristics of the implementation handed back to the `Switch`. Each +transport implementation is responsible to filter establishing peers specific +to its domain, for the default multiplexed implementation the following will +apply: + +- connections from our own node +- handshake fails +- upgrade to secret connection fails +- prevent duplicate ip +- prevent duplicate id +- nodeinfo incompatibility + +```go +// PeerTransport proxies incoming and outgoing peer connections. +type PeerTransport interface { + // Accept returns a newly connected Peer. + Accept() (Peer, error) + + // Dial connects to a Peer. + Dial(NetAddress) (Peer, error) +} + +// EXAMPLE OF DEFAULT IMPLEMENTATION + +// multiplexTransport accepts tcp connections and upgrades to multiplexted +// peers. +type multiplexTransport struct { + listener net.Listener + + acceptc chan accept + closec <-chan struct{} + listenc <-chan struct{} + + dialTimeout time.Duration + handshakeTimeout time.Duration + nodeAddr NetAddress + nodeInfo NodeInfo + nodeKey NodeKey + + // TODO(xla): Remove when MConnection is refactored into mPeer. + mConfig conn.MConnConfig +} + +var _ PeerTransport = (*multiplexTransport)(nil) + +// NewMTransport returns network connected multiplexed peers. +func NewMTransport( + nodeAddr NetAddress, + nodeInfo NodeInfo, + nodeKey NodeKey, +) *multiplexTransport +``` + +### Switch + +From now the Switch will depend on a fully setup `PeerTransport` to +retrieve/reach out to its peers. As the more low-level concerns are pushed to +the transport, we can omit passing the `config.P2PConfig` to the Switch. + +```go +func NewSwitch(transport PeerTransport, opts ...SwitchOption) *Switch +``` + +## Status + +In Review. + +## Consequences + +### Positive + +- free Switch from transport concerns - simpler implementation +- pluggable transport implementation - simpler test setup +- remove Switch dependency on P2PConfig - easier to test + +### Negative + +- more setup for tests which depend on Switches + +### Neutral + +- multiplexed will be the default implementation + +[0] These guards could be potentially extended to be pluggable much like +middlewares to express different concerns required by differentally configured +environments. diff --git a/docs/architecture/adr-013-symmetric-crypto.md b/docs/architecture/adr-013-symmetric-crypto.md new file mode 100644 index 000000000..69bfc2f29 --- /dev/null +++ b/docs/architecture/adr-013-symmetric-crypto.md @@ -0,0 +1,99 @@ +# ADR 013: Need for symmetric cryptography + +## Context + +We require symmetric ciphers to handle how we encrypt keys in the sdk, +and to potentially encrypt `priv_validator.json` in tendermint. + +Currently we use AEAD's to support symmetric encryption, +which is great since we want data integrity in addition to privacy and authenticity. +We don't currently have a scenario where we want to encrypt without data integrity, +so it is fine to optimize our code to just use AEAD's. +Currently there is not a way to switch out AEAD's easily, this ADR outlines a way +to easily swap these out. + +### How do we encrypt with AEAD's + +AEAD's typically require a nonce in addition to the key. +For the purposes we require symmetric cryptography for, +we need encryption to be stateless. +Because of this we use random nonces. +(Thus the AEAD must support random nonces) + +We currently construct a random nonce, and encrypt the data with it. +The returned value is `nonce || encrypted data`. +The limitation of this is that does not provide a way to identify +which algorithm was used in encryption. +Consequently decryption with multiple algoritms is sub-optimal. +(You have to try them all) + +## Decision + +We should create the following two methods in a new `crypto/encoding/symmetric` package: + +```golang +func Encrypt(aead cipher.AEAD, plaintext []byte) (ciphertext []byte, err error) +func Decrypt(key []byte, ciphertext []byte) (plaintext []byte, err error) +func Register(aead cipher.AEAD, algo_name string, NewAead func(key []byte) (cipher.Aead, error)) error +``` + +This allows you to specify the algorithm in encryption, but not have to specify +it in decryption. +This is intended for ease of use in downstream applications, in addition to people +looking at the file directly. +One downside is that for the encrypt function you must have already initialized an AEAD, +but I don't really see this as an issue. + +If there is no error in encryption, Encrypt will return `algo_name || nonce || aead_ciphertext`. +`algo_name` should be length prefixed, using standard varuint encoding. +This will be binary data, but thats not a problem considering the nonce and ciphertext are also binary. + +This solution requires a mapping from aead type to name. +We can achieve this via reflection. + +```golang +func getType(myvar interface{}) string { + if t := reflect.TypeOf(myvar); t.Kind() == reflect.Ptr { + return "*" + t.Elem().Name() + } else { + return t.Name() + } +} +``` + +Then we maintain a map from the name returned from `getType(aead)` to `algo_name`. + +In decryption, we read the `algo_name`, and then instantiate a new AEAD with the key. +Then we call the AEAD's decrypt method on the provided nonce/ciphertext. + +`Register` allows a downstream user to add their own desired AEAD to the symmetric package. +It will error if the AEAD name is already registered. +This prevents a malicious import from modifying / nullifying an AEAD at runtime. + +## Implementation strategy + +The golang implementation of what is proposed is rather straight forward. +The concern is that we will break existing private keys if we just switch to this. +If this is concerning, we can make a simple script which doesn't require decoding privkeys, +for converting from the old format to the new one. + +## Status + +Proposed. + +## Consequences + +### Positive + +- Allows us to support new AEAD's, in a way that makes decryption easier +- Allows downstream users to add their own AEAD + +### Negative + +- We will have to break all private keys stored on disk. + They can be recovered using seed words, and upgrade scripts are simple. + +### Neutral + +- Caller has to instantiate the AEAD with the private key. + However it forces them to be aware of what signing algorithm they are using, which is a positive. diff --git a/docs/architecture/adr-014-secp-malleability.md b/docs/architecture/adr-014-secp-malleability.md new file mode 100644 index 000000000..33f9d0044 --- /dev/null +++ b/docs/architecture/adr-014-secp-malleability.md @@ -0,0 +1,63 @@ +# ADR 014: Secp256k1 Signature Malleability + +## Context + +Secp256k1 has two layers of malleability. +The signer has a random nonce, and thus can produce many different valid signatures. +This ADR is not concerned with that. +The second layer of malleability basically allows one who is given a signature +to produce exactly one more valid signature for the same message from the same public key. +(They don't even have to know the message!) +The math behind this will be explained in the subsequent section. + +Note that in many downstream applications, signatures will appear in a transaction, and therefore in the tx hash. +This means that if someone broadcasts a transaction with secp256k1 signature, the signature can be altered into the other form by anyone in the p2p network. +Thus the tx hash will change, and this altered tx hash may be committed instead. +This breaks the assumption that you can broadcast a valid transaction and just wait for its hash to be included on chain. +One example is if you are broadcasting a tx in cosmos, +and you wait for it to appear on chain before incrementing your sequence number. +You may never increment your sequence number if a different tx hash got committed. +Removing this second layer of signature malleability concerns could ease downstream development. + +### ECDSA context + +Secp256k1 is ECDSA over a particular curve. +The signature is of the form `(r, s)`, where `s` is a field element. +(The particular field is the `Z_n`, where the elliptic curve has order `n`) +However `(r, -s)` is also another valid solution. +Note that anyone can negate a group element, and therefore can get this second signature. + +## Decision + +We can just distinguish a canonical form for the ECDSA signatures. +Then we require that all ECDSA signatures be in the form which we defined as canonical. +We reject signatures in non-canonical form. + +A canonical form is rather easy to define and check. +It would just be the smaller of the two values for `s`, defined lexicographically. +This is a simple check, instead of checking if `s < n`, instead check `s <= (n - 1)/2`. +An example of another cryptosystem using this +is the parity definition here https://github.com/zkcrypto/pairing/pull/30#issuecomment-372910663. + +This is the same solution Ethereum has chosen for solving secp malleability. + +## Proposed Implementation + +Fork https://github.com/btcsuite/btcd, and just update the [parse sig method](https://github.com/btcsuite/btcd/blob/11fcd83963ab0ecd1b84b429b1efc1d2cdc6d5c5/btcec/signature.go#L195) and serialize functions to enforce our canonical form. + +## Status + +Implemented + +## Consequences + +### Positive + +- Lets us maintain the ability to expect a tx hash to appear in the blockchain. + +### Negative + +- More work in all future implementations (Though this is a very simple check) +- Requires us to maintain another fork + +### Neutral diff --git a/docs/architecture/adr-015-crypto-encoding.md b/docs/architecture/adr-015-crypto-encoding.md new file mode 100644 index 000000000..bb0a8cd80 --- /dev/null +++ b/docs/architecture/adr-015-crypto-encoding.md @@ -0,0 +1,84 @@ +# ADR 015: Crypto encoding + +## Context + +We must standardize our method for encoding public keys and signatures on chain. +Currently we amino encode the public keys and signatures. +The reason we are using amino here is primarily due to ease of support in +parsing for other languages. +We don't need its upgradability properties in cryptosystems, as a change in +the crypto that requires adapting the encoding, likely warrants being deemed +a new cryptosystem. +(I.e. using new public parameters) + +## Decision + +### Public keys + +For public keys, we will continue to use amino encoding on the canonical +representation of the pubkey. +(Canonical as defined by the cryptosystem itself) +This has two significant drawbacks. +Amino encoding is less space-efficient, due to requiring support for upgradability. +Amino encoding support requires forking protobuf and adding this new interface support +option in the language of choice. + +The reason for continuing to use amino however is that people can create code +more easily in languages that already have an up to date amino library. +It is possible that this will change in the future, if it is deemed that +requiring amino for interacting with Tendermint cryptography is unnecessary. + +The arguments for space efficiency here are refuted on the basis that there are +far more egregious wastages of space in the SDK. +The space requirement of the public keys doesn't cause many problems beyond +increasing the space attached to each validator / account. + +The alternative to using amino here would be for us to create an enum type. +Switching to just an enum type is worthy of investigation post-launch. +For reference, part of amino encoding interfaces is basically a 4 byte enum +type definition. +Enum types would just change that 4 bytes to be a variant, and it would remove +the protobuf overhead, but it would be hard to integrate into the existing API. + +### Signatures + +Signatures should be switched to be `[]byte`. +Spatial efficiency in the signatures is quite important, +as it directly affects the gas cost of every transaction, +and the throughput of the chain. +Signatures don't need to encode what type they are for (unlike public keys) +since public keys must already be known. +Therefore we can validate the signature without needing to encode its type. + +When placed in state, signatures will still be amino encoded, but it will be the +primitive type `[]byte` getting encoded. + +#### Ed25519 + +Use the canonical representation for signatures. + +#### Secp256k1 + +There isn't a clear canonical representation here. +Signatures have two elements `r,s`. +These bytes are encoded as `r || s`, where `r` and `s` are both exactly +32 bytes long, encoded big-endian. +This is basically Ethereum's encoding, but without the leading recovery bit. + +## Status + +Implemented + +## Consequences + +### Positive + +- More space efficient signatures + +### Negative + +- We have an amino dependency for cryptography. + +### Neutral + +- No change to public keys diff --git a/docs/architecture/adr-016-protocol-versions.md b/docs/architecture/adr-016-protocol-versions.md new file mode 100644 index 000000000..e52743e5c --- /dev/null +++ b/docs/architecture/adr-016-protocol-versions.md @@ -0,0 +1,308 @@ +# ADR 016: Protocol Versions + +## TODO + +- How to / should we version the authenticated encryption handshake itself (ie. + upfront protocol negotiation for the P2PVersion) +- How to / should we version ABCI itself? Should it just be absorbed by the + BlockVersion? + +## Changelog + +- 18-09-2018: Updates after working a bit on implementation + - ABCI Handshake needs to happen independently of starting the app + conns so we can see the result + - Add question about ABCI protocol version +- 16-08-2018: Updates after discussion with SDK team + - Remove signalling for next version from Header/ABCI +- 03-08-2018: Updates from discussion with Jae: + - ProtocolVersion contains Block/AppVersion, not Current/Next + - signal upgrades to Tendermint using EndBlock fields + - dont restrict peer compatibilty by version to simplify syncing old nodes +- 28-07-2018: Updates from review + - split into two ADRs - one for protocol, one for chains + - include signalling for upgrades in header +- 16-07-2018: Initial draft - was originally joint ADR for protocol and chain + versions + +## Context + +Here we focus on software-agnostic protocol versions. + +The Software Version is covered by SemVer and described elsewhere. +It is not relevant to the protocol description, suffice to say that if any protocol version +changes, the software version changes, but not necessarily vice versa. + +Software version should be included in NodeInfo for convenience/diagnostics. + +We are also interested in versioning across different blockchains in a +meaningful way, for instance to differentiate branches of a contentious +hard-fork. We leave that for a later ADR. + +## Requirements + +We need to version components of the blockchain that may be independently upgraded. +We need to do it in a way that is scalable and maintainable - we can't just litter +the code with conditionals. + +We can consider the complete version of the protocol to contain the following sub-versions: +BlockVersion, P2PVersion, AppVersion. These versions reflect the major sub-components +of the software that are likely to evolve together, at different rates, and in different ways, +as described below. + +The BlockVersion defines the core of the blockchain data structures and +should change infrequently. + +The P2PVersion defines how peers connect and communicate with eachother - it's +not part of the blockchain data structures, but defines the protocols used to build the +blockchain. It may change gradually. + +The AppVersion determines how we compute app specific information, like the +AppHash and the Results. + +All of these versions may change over the life of a blockchain, and we need to +be able to help new nodes sync up across version changes. This means we must be willing +to connect to peers with older version. + +### BlockVersion + +- All tendermint hashed data-structures (headers, votes, txs, responses, etc.). + - Note the semantic meaning of a transaction may change according to the AppVersion, but the way txs are merklized into the header is part of the BlockVersion +- It should be the least frequent/likely to change. + - Tendermint should be stabilizing - it's just Atomic Broadcast. + - We can start considering for Tendermint v2.0 in a year +- It's easy to determine the version of a block from its serialized form + +### P2PVersion + +- All p2p and reactor messaging (messages, detectable behavior) +- Will change gradually as reactors evolve to improve performance and support new features - eg proposed new message types BatchTx in the mempool and HasBlockPart in the consensus +- It's easy to determine the version of a peer from its first serialized message/s +- New versions must be compatible with at least one old version to allow gradual upgrades + +### AppVersion + +- The ABCI state machine (txs, begin/endblock behavior, commit hashing) +- Behaviour and message types will change abruptly in the course of the life of a chain +- Need to minimize complexity of the code for supporting different AppVersions at different heights +- Ideally, each version of the software supports only a _single_ AppVersion at one time + - this means we checkout different versions of the software at different heights instead of littering the code + with conditionals + - minimize the number of data migrations required across AppVersion (ie. most AppVersion should be able to read the same state from disk as previous AppVersion). + +## Ideal + +Each component of the software is independently versioned in a modular way and its easy to mix and match and upgrade. + +## Proposal + +Each of BlockVersion, AppVersion, P2PVersion, is a monotonically increasing uint64. + +To use these versions, we need to update the block Header, the p2p NodeInfo, and the ABCI. + +### Header + +Block Header should include a `Version` struct as its first field like: + +``` +type Version struct { + Block uint64 + App uint64 +} +``` + +Here, `Version.Block` defines the rules for the current block, while +`Version.App` defines the app version that processed the last block and computed +the `AppHash` in the current block. Together they provide a complete description +of the consensus-critical protocol. + +Since we have settled on a proto3 header, the ability to read the BlockVersion out of the serialized header is unanimous. + +Using a Version struct gives us more flexibility to add fields without breaking +the header. + +The ProtocolVersion struct includes both the Block and App versions - it should +serve as a complete description of the consensus-critical protocol. + +### NodeInfo + +NodeInfo should include a Version struct as its first field like: + +``` +type Version struct { + P2P uint64 + Block uint64 + App uint64 + + Other []string +} +``` + +Note this effectively makes `Version.P2P` the first field in the NodeInfo, so it +should be easy to read this out of the serialized header if need be to facilitate an upgrade. + +The `Version.Other` here should include additional information like the name of the software client and +it's SemVer version - this is for convenience only. Eg. +`tendermint-core/v0.22.8`. It's a `[]string` so it can include information about +the version of Tendermint, of the app, of Tendermint libraries, etc. + +### ABCI + +Since the ABCI is responsible for keeping Tendermint and the App in sync, we +need to communicate version information through it. + +On startup, we use Info to perform a basic handshake. It should include all the +version information. + +We also need to be able to update versions in the life of a blockchain. The +natural place to do this is EndBlock. + +Note that currently the result of the Handshake isn't exposed anywhere, as the +handshaking happens inside the `proxy.AppConns` abstraction. We will need to +remove the handshaking from the `proxy` package so we can call it independently +and get the result, which should contain the application version. + +#### Info + +RequestInfo should add support for protocol versions like: + +``` +message RequestInfo { + string version + uint64 block_version + uint64 p2p_version +} +``` + +Similarly, ResponseInfo should return the versions: + +``` +message ResponseInfo { + string data + + string version + uint64 app_version + + int64 last_block_height + bytes last_block_app_hash +} +``` + +The existing `version` fields should be called `software_version` but we leave +them for now to reduce the number of breaking changes. + +#### EndBlock + +Updating the version could be done either with new fields or by using the +existing `tags`. Since we're trying to communicate information that will be +included in Tendermint block Headers, it should be native to the ABCI, and not +something embedded through some scheme in the tags. Thus, version updates should +be communicated through EndBlock. + +EndBlock already contains `ConsensusParams`. We can add version information to +the ConsensusParams as well: + +``` +message ConsensusParams { + + BlockSize block_size + EvidenceParams evidence_params + VersionParams version +} + +message VersionParams { + uint64 block_version + uint64 app_version +} +``` + +For now, the `block_version` will be ignored, as we do not allow block version +to be updated live. If the `app_version` is set, it signals that the app's +protocol version has changed, and the new `app_version` will be included in the +`Block.Header.Version.App` for the next block. + +### BlockVersion + +BlockVersion is included in both the Header and the NodeInfo. + +Changing BlockVersion should happen quite infrequently and ideally only for +critical upgrades. For now, it is not encoded in ABCI, though it's always +possible to use tags to signal an external process to co-ordinate an upgrade. + +Note Ethereum has not had to make an upgrade like this (everything has been at state machine level, AFAIK). + +### P2PVersion + +P2PVersion is not included in the block Header, just the NodeInfo. + +P2PVersion is the first field in the NodeInfo. NodeInfo is also proto3 so this is easy to read out. + +Note we need the peer/reactor protocols to take the versions of peers into account when sending messages: + +- don't send messages they don't understand +- don't send messages they don't expect + +Doing this will be specific to the upgrades being made. + +Note we also include the list of reactor channels in the NodeInfo and already don't send messages for channels the peer doesn't understand. +If upgrades always use new channels, this simplifies the development cost of backwards compatibility. + +Note NodeInfo is only exchanged after the authenticated encryption handshake to ensure that it's private. +Doing any version exchange before encrypting could be considered information leakage, though I'm not sure +how much that matters compared to being able to upgrade the protocol. + +XXX: if needed, can we change the meaning of the first byte of the first message to encode a handshake version? +this is the first byte of a 32-byte ed25519 pubkey. + +### AppVersion + +AppVersion is also included in the block Header and the NodeInfo. + +AppVersion essentially defines how the AppHash and LastResults are computed. + +### Peer Compatibility + +Restricting peer compatibility based on version is complicated by the need to +help old peers, possibly on older versions, sync the blockchain. + +We might be tempted to say that we only connect to peers with the same +AppVersion and BlockVersion (since these define the consensus critical +computations), and a select list of P2PVersions (ie. those compatible with +ours), but then we'd need to make accomodations for connecting to peers with the +right Block/AppVersion for the height they're on. + +For now, we will connect to peers with any version and restrict compatibility +solely based on the ChainID. We leave more restrictive rules on peer +compatibiltiy to a future proposal. + +### Future Changes + +It may be valuable to support an `/unsafe_stop?height=_` endpoint to tell Tendermint to shutdown at a given height. +This could be use by an external manager process that oversees upgrades by +checking out and installing new software versions and restarting the process. It +would subscribe to the relevant upgrade event (needs to be implemented) and call `/unsafe_stop` at +the correct height (of course only after getting approval from its user!) + +## Consequences + +### Positive + +- Make tendermint and application versions native to the ABCI to more clearly + communicate about them +- Distinguish clearly between protocol versions and software version to + facilitate implementations in other languages +- Versions included in key data structures in easy to discern way +- Allows proposers to signal for upgrades and apps to decide when to actually change the + version (and start signalling for a new version) + +### Neutral + +- Unclear how to version the initial P2P handshake itself +- Versions aren't being used (yet) to restrict peer compatibility +- Signalling for a new version happens through the proposer and must be + tallied/tracked in the app. + +### Negative + +- Adds more fields to the ABCI +- Implies that a single codebase must be able to handle multiple versions diff --git a/docs/architecture/adr-017-chain-versions.md b/docs/architecture/adr-017-chain-versions.md new file mode 100644 index 000000000..7113dbaee --- /dev/null +++ b/docs/architecture/adr-017-chain-versions.md @@ -0,0 +1,99 @@ +# ADR 017: Chain Versions + +## TODO + +- clarify how to handle slashing when ChainID changes + +## Changelog + +- 28-07-2018: Updates from review + - split into two ADRs - one for protocol, one for chains +- 16-07-2018: Initial draft - was originally joint ADR for protocol and chain + versions + +## Context + +Software and Protocol versions are covered in a separate ADR. + +Here we focus on chain versions. + +## Requirements + +We need to version blockchains across protocols, networks, forks, etc. +We need chain identifiers and descriptions so we can talk about a multitude of chains, +and especially the differences between them, in a meaningful way. + +### Networks + +We need to support many independent networks running the same version of the software, +even possibly starting from the same initial state. +They must have distinct identifiers so that peers know which one they are joining and so +validators and users can prevent replay attacks. + +Call this the `NetworkName` (note we currently call this `ChainID` in the software. In this +ADR, ChainID has a different meaning). +It represents both the application being run and the community or intention +of running it. + +Peers only connect to other peers with the same NetworkName. + +### Forks + +We need to support existing networks upgrading and forking, wherein they may do any of: + + - revert back to some height, continue with the same versions but new blocks + - arbitrarily mutate state at some height, continue with the same versions (eg. Dao Fork) + - change the AppVersion at some height + +Note because of Tendermint's voting power threshold rules, a chain can only be extended under the "original" rules and under the new rules +if 1/3 or more is double signing, which is expressly prohibited, and is supposed to result in their punishment on both chains. Since they can censor +the punishment, the chain is expected to be hardforked to remove the validators. Thus, if both branches are to continue after a fork, +they will each require a new identifier, and the old chain identifier will be retired (ie. only useful for syncing history, not for new blocks).. + +TODO: explain how to handle slashing when chain id changed! + +We need a consistent way to describe forks. + +## Proposal + +### ChainDescription + +ChainDescription is a complete immutable description of a blockchain. It takes the following form: + +``` +ChainDescription = ///// +``` + +Here, StateHash is the merkle root of the initial state, ValHash is the merkle root of the initial Tendermint validator set, +and ConsensusParamsHash is the merkle root of the initial Tendermint consensus parameters. + +The `genesis.json` file must contain enough information to compute this value. It need not contain the StateHash or ValHash itself, +but contain the state from which they can be computed with the given protocol versions. + +NOTE: consider splitting NetworkName into NetworkName and AppName - this allows +folks to independently use the same application for different networks (ie we +could imagine multiple communities of validators wanting to put up a Hub using +the same app but having a distinct network name. Arguably not needed if +differences will come via different initial state / validators). + +#### ChainID + +Define `ChainID = TMHASH(ChainDescriptor)`. It's the unique ID of a blockchain. + +It should be Bech32 encoded when handled by users, eg. with `cosmoschain` prefix. + +#### Forks and Uprades + +When a chain forks or upgrades but continues the same history, it takes a new ChainDescription as follows: + +``` +ChainDescription = /x// +``` + +Where + +- ChainID is the ChainID from the previous ChainDescription (ie. its hash) +- `x` denotes that a change occured +- `Height` is the height the change occured +- ForkDescription has the same form as ChainDescription but for the fork +- this allows forks to specify new versions for tendermint or the app, as well as arbitrary changes to the state or validator set diff --git a/docs/architecture/adr-018-ABCI-Validators.md b/docs/architecture/adr-018-ABCI-Validators.md new file mode 100644 index 000000000..b517c3694 --- /dev/null +++ b/docs/architecture/adr-018-ABCI-Validators.md @@ -0,0 +1,100 @@ +# ADR 018: ABCI Validator Improvements + +## Changelog + +016-08-2018: Follow up from review: - Revert changes to commit round - Remind about justification for removing pubkey - Update pros/cons +05-08-2018: Initial draft + +## Context + +ADR 009 introduced major improvements to the ABCI around validators and the use +of Amino. Here we follow up with some additional changes to improve the naming +and expected use of Validator messages. + +## Decision + +### Validator + +Currently a Validator contains `address` and `pub_key`, and one or the other is +optional/not-sent depending on the use case. Instead, we should have a +`Validator` (with just the address, used for RequestBeginBlock) +and a `ValidatorUpdate` (with the pubkey, used for ResponseEndBlock): + +``` +message Validator { + bytes address + int64 power +} + +message ValidatorUpdate { + PubKey pub_key + int64 power +} +``` + +As noted in [ADR-009](adr-009-ABCI-design.md), +the `Validator` does not contain a pubkey because quantum public keys are +quite large and it would be wasteful to send them all over ABCI with every block. +Thus, applications that want to take advantage of the information in BeginBlock +are _required_ to store pubkeys in state (or use much less efficient lazy means +of verifying BeginBlock data). + +### RequestBeginBlock + +LastCommitInfo currently has an array of `SigningValidator` that contains +information for each validator in the entire validator set. +Instead, this should be called `VoteInfo`, since it is information about the +validator votes. + +Note that all votes in a commit must be from the same round. + +``` +message LastCommitInfo { + int64 round + repeated VoteInfo commit_votes +} + +message VoteInfo { + Validator validator + bool signed_last_block +} +``` + +### ResponseEndBlock + +Use ValidatorUpdates instead of Validators. Then it's clear we don't need an +address, and we do need a pubkey. + +We could require the address here as well as a sanity check, but it doesn't seem +necessary. + +### InitChain + +Use ValidatorUpdates for both Request and Response. InitChain +is about setting/updating the initial validator set, unlike BeginBlock +which is just informational. + +## Status + +Implemented + +## Consequences + +### Positive + +- Clarifies the distinction between the different uses of validator information + +### Negative + +- Apps must still store the public keys in state to utilize the RequestBeginBlock info + +### Neutral + +- ResponseEndBlock does not require an address + +## References + +- [Latest ABCI Spec](https://github.com/tendermint/tendermint/blob/v0.22.8/docs/app-dev/abci-spec.md) +- [ADR-009](https://github.com/tendermint/tendermint/blob/v0.22.8/docs/architecture/adr-009-ABCI-design.md) +- [Issue #1712 - Don't send PubKey in + RequestBeginBlock](https://github.com/tendermint/tendermint/issues/1712) diff --git a/docs/architecture/adr-019-multisigs.md b/docs/architecture/adr-019-multisigs.md new file mode 100644 index 000000000..7fd3aab0a --- /dev/null +++ b/docs/architecture/adr-019-multisigs.md @@ -0,0 +1,162 @@ +# ADR 019: Encoding standard for Multisignatures + +## Changelog + +06-08-2018: Minor updates + +27-07-2018: Update draft to use amino encoding + +11-07-2018: Initial Draft + +5-26-2021: Multisigs were moved into the Cosmos-sdk + +## Context + +Multisignatures, or technically _Accountable Subgroup Multisignatures_ (ASM), +are signature schemes which enable any subgroup of a set of signers to sign any message, +and reveal to the verifier exactly who the signers were. +This allows for complex conditionals of when to validate a signature. + +Suppose the set of signers is of size _n_. +If we validate a signature if any subgroup of size _k_ signs a message, +this becomes what is commonly reffered to as a _k of n multisig_ in Bitcoin. + +This ADR specifies the encoding standard for general accountable subgroup multisignatures, +k of n accountable subgroup multisignatures, and its weighted variant. + +In the future, we can also allow for more complex conditionals on the accountable subgroup. + +## Proposed Solution + +### New structs + +Every ASM will then have its own struct, implementing the crypto.Pubkey interface. + +This ADR assumes that [replacing crypto.Signature with []bytes](https://github.com/tendermint/tendermint/issues/1957) has been accepted. + +#### K of N threshold signature + +The pubkey is the following struct: + +```golang +type ThresholdMultiSignaturePubKey struct { // K of N threshold multisig + K uint `json:"threshold"` + Pubkeys []crypto.Pubkey `json:"pubkeys"` +} +``` + +We will derive N from the length of pubkeys. (For spatial efficiency in encoding) + +`Verify` will expect an `[]byte` encoded version of the Multisignature. +(Multisignature is described in the next section) +The multisignature will be rejected if the bitmap has less than k indices, +or if any signature at any of the k indices is not a valid signature from +the kth public key on the message. +(If more than k signatures are included, all must be valid) + +`Bytes` will be the amino encoded version of the pubkey. + +Address will be `Hash(amino_encoded_pubkey)` + +The reason this doesn't use `log_8(n)` bytes per signer is because that heavily optimizes for the case where a very small number of signers are required. +e.g. for `n` of size `24`, that would only be more space efficient for `k < 3`. +This seems less likely, and that it should not be the case optimized for. + +#### Weighted threshold signature + +The pubkey is the following struct: + +```golang +type WeightedThresholdMultiSignaturePubKey struct { + Weights []uint `json:"weights"` + Threshold uint `json:"threshold"` + Pubkeys []crypto.Pubkey `json:"pubkeys"` +} +``` + +Weights and Pubkeys must be of the same length. +Everything else proceeds identically to the K of N multisig, +except the multisig fails if the sum of the weights is less than the threshold. + +#### Multisignature + +The inter-mediate phase of the signatures (as it accrues more signatures) will be the following struct: + +```golang +type Multisignature struct { + BitArray CryptoBitArray // Documented later + Sigs [][]byte +``` + +It is important to recall that each private key will output a signature on the provided message itself. +So no signing algorithm ever outputs the multisignature. +The UI will take a signature, cast into a multisignature, and then keep adding +new signatures into it, and when done marshal into `[]byte`. +This will require the following helper methods: + +```golang +func SigToMultisig(sig []byte, n int) +func GetIndex(pk crypto.Pubkey, []crypto.Pubkey) +func AddSignature(sig Signature, index int, multiSig *Multisignature) +``` + +The multisignature will be converted to an `[]byte` using amino.MarshalBinaryBare. \* + +#### Bit Array + +We would be using a new implementation of a bitarray. The struct it would be encoded/decoded from is + +```golang +type CryptoBitArray struct { + ExtraBitsStored byte `json:"extra_bits"` // The number of extra bits in elems. + Elems []byte `json:"elems"` +} +``` + +The reason for not using the BitArray currently implemented in `libs/common/bit_array.go` +is that it is less space efficient, due to a space / time trade-off. +Evidence for this is outlined in [this issue](https://github.com/tendermint/tendermint/issues/2077). + +In the multisig, we will not be performing arithmetic operations, +so there is no performance increase with the current implementation, +and just loss of spatial efficiency. +Implementing this new bit array with `[]byte` _should_ be simple, as no +arithmetic operations between bit arrays are required, and save a couple of bytes. +(Explained in that same issue) + +When this bit array encoded, the number of elements is encoded due to amino. +However we may be encoding a full byte for what we actually only need 1-7 bits for. +We store that difference in ExtraBitsStored. +This allows for us to have an unbounded number of signers, and is more space efficient than what is currently used in `libs/common`. +Again the implementation of this space saving feature is straight forward. + +### Encoding the structs + +We will use straight forward amino encoding. This is chosen for ease of compatibility in other languages. + +### Future points of discussion + +If desired, we can use ed25519 batch verification for all ed25519 keys. +This is a future point of discussion, but would be backwards compatible as this information won't need to be marshalled. +(There may even be cofactor concerns without ristretto) +Aggregation of pubkeys / sigs in Schnorr sigs / BLS sigs is not backwards compatible, and would need to be a new ASM type. + +## Status + +Implemented (moved to cosmos-sdk) + +## Consequences + +### Positive + +- Supports multisignatures, in a way that won't require any special cases in our downstream verification code. +- Easy to serialize / deserialize +- Unbounded number of signers + +### Negative + +- Larger codebase, however this should reside in a subfolder of tendermint/crypto, as it provides no new interfaces. (Ref #https://github.com/tendermint/go-crypto/issues/136) +- Space inefficient due to utilization of amino encoding +- Suggested implementation requires a new struct for every ASM. + +### Neutral diff --git a/docs/architecture/adr-020-block-size.md b/docs/architecture/adr-020-block-size.md new file mode 100644 index 000000000..f32ed7ab5 --- /dev/null +++ b/docs/architecture/adr-020-block-size.md @@ -0,0 +1,104 @@ +# ADR 020: Limiting txs size inside a block + +## Changelog + +13-08-2018: Initial Draft +15-08-2018: Second version after Dev's comments +28-08-2018: Third version after Ethan's comments +30-08-2018: AminoOverheadForBlock => MaxAminoOverheadForBlock +31-08-2018: Bounding evidence and chain ID +13-01-2019: Add section on MaxBytes vs MaxDataBytes + +## Context + +We currently use MaxTxs to reap txs from the mempool when proposing a block, +but enforce MaxBytes when unmarshaling a block, so we could easily propose a +block thats too large to be valid. + +We should just remove MaxTxs all together and stick with MaxBytes, and have a +`mempool.ReapMaxBytes`. + +But we can't just reap BlockSize.MaxBytes, since MaxBytes is for the entire block, +not for the txs inside the block. There's extra amino overhead + the actual +headers on top of the actual transactions + evidence + last commit. +We could also consider using a MaxDataBytes instead of or in addition to MaxBytes. + +## MaxBytes vs MaxDataBytes + +The [PR #3045](https://github.com/tendermint/tendermint/pull/3045) suggested +additional clarity/justification was necessary here, wither respect to the use +of MaxDataBytes in addition to, or instead of, MaxBytes. + +MaxBytes provides a clear limit on the total size of a block that requires no +additional calculation if you want to use it to bound resource usage, and there +has been considerable discussions about optimizing tendermint around 1MB blocks. +Regardless, we need some maximum on the size of a block so we can avoid +unmarshaling blocks that are too big during the consensus, and it seems more +straightforward to provide a single fixed number for this rather than a +computation of "MaxDataBytes + everything else you need to make room for +(signatures, evidence, header)". MaxBytes provides a simple bound so we can +always say "blocks are less than X MB". + +Having both MaxBytes and MaxDataBytes feels like unnecessary complexity. It's +not particularly surprising for MaxBytes to imply the maximum size of the +entire block (not just txs), one just has to know that a block includes header, +txs, evidence, votes. For more fine grained control over the txs included in the +block, there is the MaxGas. In practice, the MaxGas may be expected to do most of +the tx throttling, and the MaxBytes to just serve as an upper bound on the total +size. Applications can use MaxGas as a MaxDataBytes by just taking the gas for +every tx to be its size in bytes. + +## Proposed solution + +Therefore, we should + +1) Get rid of MaxTxs. +2) Rename MaxTxsBytes to MaxBytes. + +When we need to ReapMaxBytes from the mempool, we calculate the upper bound as follows: + +``` +ExactLastCommitBytes = {number of validators currently enabled} * {MaxVoteBytes} +MaxEvidenceBytesPerBlock = MaxBytes / 10 +ExactEvidenceBytes = cs.evpool.PendingEvidence(MaxEvidenceBytesPerBlock) * MaxEvidenceBytes + +mempool.ReapMaxBytes(MaxBytes - MaxAminoOverheadForBlock - ExactLastCommitBytes - ExactEvidenceBytes - MaxHeaderBytes) +``` + +where MaxVoteBytes, MaxEvidenceBytes, MaxHeaderBytes and MaxAminoOverheadForBlock +are constants defined inside the `types` package: + +- MaxVoteBytes - 170 bytes +- MaxEvidenceBytes - 364 bytes +- MaxHeaderBytes - 476 bytes (~276 bytes hashes + 200 bytes - 50 UTF-8 encoded + symbols of chain ID 4 bytes each in the worst case + amino overhead) +- MaxAminoOverheadForBlock - 8 bytes (assuming MaxHeaderBytes includes amino + overhead for encoding header, MaxVoteBytes - for encoding vote, etc.) + +ChainID needs to bound to 50 symbols max. + +When reaping evidence, we use MaxBytes to calculate the upper bound (e.g. 1/10) +to save some space for transactions. + +NOTE while reaping the `max int` bytes in mempool, we should account that every +transaction will take `len(tx)+aminoOverhead`, where aminoOverhead=1-4 bytes. + +We should write a test that fails if the underlying structs got changed, but +MaxXXX stayed the same. + +## Status + +Implemented + +## Consequences + +### Positive + +* one way to limit the size of a block +* less variables to configure + +### Negative + +* constants that need to be adjusted if the underlying structs got changed + +### Neutral diff --git a/docs/architecture/adr-021-abci-events.md b/docs/architecture/adr-021-abci-events.md new file mode 100644 index 000000000..ca20a05e9 --- /dev/null +++ b/docs/architecture/adr-021-abci-events.md @@ -0,0 +1,52 @@ +# ADR 012: ABCI Events + +## Changelog + +- *2018-09-02* Remove ABCI errors component. Update description for events +- *2018-07-12* Initial version + +## Context + +ABCI tags were first described in [ADR 002](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-002-event-subscription.md). +They are key-value pairs that can be used to index transactions. + +Currently, ABCI messages return a list of tags to describe an +"event" that took place during the Check/DeliverTx/Begin/EndBlock, +where each tag refers to a different property of the event, like the sending and receiving account addresses. + +Since there is only one list of tags, recording data for multiple such events in +a single Check/DeliverTx/Begin/EndBlock must be done using prefixes in the key +space. + +Alternatively, groups of tags that constitute an event can be separated by a +special tag that denotes a break between the events. This would allow +straightforward encoding of multiple events into a single list of tags without +prefixing, at the cost of these "special" tags to separate the different events. + +TODO: brief description of how the indexing works + +## Decision + +Instead of returning a list of tags, return a list of events, where +each event is a list of tags. This way we naturally capture the concept of +multiple events happening during a single ABCI message. + +TODO: describe impact on indexing and querying + +## Status + +Implemented + +## Consequences + +### Positive + +- Ability to track distinct events separate from ABCI calls (DeliverTx/BeginBlock/EndBlock) +- More powerful query abilities + +### Negative + +- More complex query syntax +- More complex search implementation + +### Neutral diff --git a/docs/architecture/adr-022-abci-errors.md b/docs/architecture/adr-022-abci-errors.md new file mode 100644 index 000000000..ce2c56a28 --- /dev/null +++ b/docs/architecture/adr-022-abci-errors.md @@ -0,0 +1,63 @@ +# ADR 022: ABCI Errors + +## Changelog + +- *2018-09-01* Initial version + +## Context + +ABCI errors should provide an abstraction between application details +and the client interface responsible for formatting & displaying errors to the user. + +Currently, this abstraction consists of a single integer (the `code`), where any +`code > 0` is considered an error (ie. invalid transaction) and all type +information about the error is contained in the code. This integer is +expected to be decoded by the client into a known error string, where any +more specific data is contained in the `data`. + +In a [previous conversation](https://github.com/tendermint/abci/issues/165#issuecomment-353704015), +it was suggested that not all non-zero codes need to be errors, hence why it's called `code` and not `error code`. +It is unclear exactly how the semantics of the `code` field will evolve, though +better lite-client proofs (like discussed for tags +[here](https://github.com/tendermint/tendermint/issues/1007#issuecomment-413917763)) +may play a role. + +Note that having all type information in a single integer +precludes an easy coordination method between "module implementers" and "client +implementers", especially for apps with many "modules". With an unbounded error domain (such as a string), module +implementers can pick a globally unique prefix & error code set, so client +implementers could easily implement support for "module A" regardless of which +particular blockchain network it was running in and which other modules were running with it. With +only error codes, globally unique codes are difficult/impossible, as the space +is finite and collisions are likely without an easy way to coordinate. + +For instance, while trying to build an ecosystem of modules that can be composed into a single +ABCI application, the Cosmos-SDK had to hack a higher level "codespace" into the +single integer so that each module could have its own space to express its +errors. + +## Decision + +Include a `string code_space` in all ABCI messages that have a `code`. +This allows applications to namespace the codes so they can experiment with +their own code schemes. + +It is the responsibility of applications to limit the size of the `code_space` +string. + +How the codespace is hashed into block headers (ie. so it can be queried +efficiently by lite clients) is left for a separate ADR. + +## Consequences + +## Positive + +- No need for complex codespacing on a single integer +- More expressive type system for errors + +## Negative + +- Another field in the response needs to be accounted for +- Some redundancy with `code` field +- May encourage more error/code type info to move to the `codespace` string, which + could impact lite clients. diff --git a/docs/architecture/adr-023-ABCI-propose-tx.md b/docs/architecture/adr-023-ABCI-propose-tx.md new file mode 100644 index 000000000..af8ea8b8f --- /dev/null +++ b/docs/architecture/adr-023-ABCI-propose-tx.md @@ -0,0 +1,183 @@ +# ADR 023: ABCI `ProposeTx` Method + +## Changelog + +25-06-2018: Initial draft based on [#1776](https://github.com/tendermint/tendermint/issues/1776) + +## Context + +[#1776](https://github.com/tendermint/tendermint/issues/1776) was +opened in relation to implementation of a Plasma child chain using Tendermint +Core as consensus/replication engine. + +Due to the requirements of [Minimal Viable Plasma (MVP)](https://ethresear.ch/t/minimal-viable-plasma/426) and [Plasma Cash](https://ethresear.ch/t/plasma-cash-plasma-with-much-less-per-user-data-checking/1298), it is necessary for ABCI apps to have a mechanism to handle the following cases (more may emerge in the near future): + +1. `deposit` transactions on the Root Chain, which must consist of a block + with a single transaction, where there are no inputs and only one output + made in favour of the depositor. In this case, a `block` consists of + a transaction with the following shape: + + ``` + [0, 0, 0, 0, #input1 - zeroed out + 0, 0, 0, 0, #input2 - zeroed out + , , #output1 - in favour of depositor + 0, 0, #output2 - zeroed out + , + ] + ``` + + `exit` transactions may also be treated in a similar manner, wherein the + input is the UTXO being exited on the Root Chain, and the output belongs to + a reserved "burn" address, e.g., `0x0`. In such cases, it is favorable for + the containing block to only hold a single transaction that may receive + special treatment. + +2. Other "internal" transactions on the child chain, which may be initiated + unilaterally. The most basic example of is a coinbase transaction + implementing validator node incentives, but may also be app-specific. In + these cases, it may be favorable for such transactions to + be ordered in a specific manner, e.g., coinbase transactions will always be + at index 0. In general, such strategies increase the determinism and + predictability of blockchain applications. + +While it is possible to deal with the cases enumerated above using the +existing ABCI, currently available result in suboptimal workarounds. Two are +explained in greater detail below. + +### Solution 1: App state-based Plasma chain + +In this work around, the app maintains a `PlasmaStore` with a corresponding +`Keeper`. The PlasmaStore is responsible for maintaing a second, separate +blockchain that complies with the MVP specification, including `deposit` +blocks and other "internal" transactions. These "virtual" blocks are then broadcasted +to the Root Chain. + +This naive approach is, however, fundamentally flawed, as it by definition +diverges from the canonical chain maintained by Tendermint. This is further +exacerbated if the business logic for generating such transactions is +potentially non-deterministic, as this should not even be done in +`Begin/EndBlock`, which may, as a result, break consensus guarantees. + +Additinoally, this has serious implications for "watchers" - independent third parties, +or even an auxilliary blockchain, responsible for ensuring that blocks recorded +on the Root Chain are consistent with the Plasma chain's. Since, in this case, +the Plasma chain is inconsistent with the canonical one maintained by Tendermint +Core, it seems that there exists no compact means of verifying the legitimacy of +the Plasma chain without replaying every state transition from genesis (!). + +### Solution 2: Broadcast to Tendermint Core from ABCI app + +This approach is inspired by `tendermint`, in which Ethereum transactions are +relayed to Tendermint Core. It requires the app to maintain a client connection +to the consensus engine. + +Whenever an "internal" transaction needs to be created, the proposer of the +current block broadcasts the transaction or transactions to Tendermint as +needed in order to ensure that the Tendermint chain and Plasma chain are +completely consistent. + +This allows "internal" transactions to pass through the full consensus +process, and can be validated in methods like `CheckTx`, i.e., signed by the +proposer, is the semantically correct, etc. Note that this involves informing +the ABCI app of the block proposer, which was temporarily hacked in as a means +of conducting this experiment, although this should not be necessary when the +current proposer is passed to `BeginBlock`. + +It is much easier to relay these transactions directly to the Root +Chain smart contract and/or maintain a "compressed" auxiliary chain comprised +of Plasma-friendly blocks that 100% reflect the canonical (Tendermint) +blockchain. Unfortunately, this approach not idiomatic (i.e., utilises the +Tendermint consensus engine in unintended ways). Additionally, it does not +allow the application developer to: + +- Control the _ordering_ of transactions in the proposed block (e.g., index 0, + or 0 to `n` for coinbase transactions) +- Control the _number_ of transactions in the block (e.g., when a `deposit` + block is required) + +Since determinism is of utmost importance in blockchain engineering, this approach, +while more viable, should also not be considered as fit for production. + +## Decision + +### `ProposeTx` + +In order to address the difficulties described above, the ABCI interface must +expose an additional method, tentatively named `ProposeTx`. + +It should have the following signature: + +``` +ProposeTx(RequestProposeTx) ResponseProposeTx +``` + +Where `RequestProposeTx` and `ResponseProposeTx` are `message`s with the +following shapes: + +``` +message RequestProposeTx { + int64 next_block_height = 1; // height of the block the proposed tx would be part of + Validator proposer = 2; // the proposer details +} + +message ResponseProposeTx { + int64 num_tx = 1; // the number of tx to include in proposed block + repeated bytes txs = 2; // ordered transaction data to include in block + bool exclusive = 3; // whether the block should include other transactions (from `mempool`) +} +``` + +`ProposeTx` would be called by before `mempool.Reap` at this +[line](https://github.com/tendermint/tendermint/blob/9cd9f3338bc80a12590631632c23c8dbe3ff5c34/consensus/state.go#L935). +Depending on whether `exclusive` is `true` or `false`, the proposed +transactions are then pushed on top of the transactions received from +`mempool.Reap`. + +### `DeliverTx` + +Since the list of `tx` received from `ProposeTx` are _not_ passed through `CheckTx`, +it is probably a good idea to provide a means of differentiatiating "internal" transactions +from user-generated ones, in case the app developer needs/wants to take extra measures to +ensure validity of the proposed transactions. + +Therefore, the `RequestDeliverTx` message should be changed to provide an additional flag, like so: + +``` +message RequestDeliverTx { + bytes tx = 1; + bool internal = 2; +} +``` + +Alternatively, an additional method `DeliverProposeTx` may be added as an accompanient to +`ProposeTx`. However, it is not clear at this stage if this additional overhead is necessary +to preserve consensus guarantees given that a simple flag may suffice for now. + +## Status + +Pending + +## Consequences + +### Positive + +- Tendermint ABCI apps will be able to function as minimally viable Plasma chains. +- It will thereby become possible to add an extension to `cosmos-sdk` to enable + ABCI apps to support both IBC and Plasma, maximising interop. +- ABCI apps will have great control and flexibility in managing blockchain state, + without having to resort to non-deterministic hacks and/or unsafe workarounds + +### Negative + +- Maintenance overhead of exposing additional ABCI method +- Potential security issues that may have been overlooked and must now be tested extensively + +### Neutral + +- ABCI developers must deal with increased (albeit nominal) API surface area. + +## References + +- [#1776 Plasma and "Internal" Transactions in ABCI Apps](https://github.com/tendermint/tendermint/issues/1776) +- [Minimal Viable Plasma](https://ethresear.ch/t/minimal-viable-plasma/426) +- [Plasma Cash: Plasma with much less per-user data checking](https://ethresear.ch/t/plasma-cash-plasma-with-much-less-per-user-data-checking/1298) diff --git a/docs/architecture/adr-024-sign-bytes.md b/docs/architecture/adr-024-sign-bytes.md new file mode 100644 index 000000000..23a6afcdc --- /dev/null +++ b/docs/architecture/adr-024-sign-bytes.md @@ -0,0 +1,234 @@ +# ADR 024: SignBytes and validator types in privval + +## Context + +Currently, the messages exchanged between tendermint and a (potentially remote) signer/validator, +namely votes, proposals, and heartbeats, are encoded as a JSON string +(e.g., via `Vote.SignBytes(...)`) and then +signed . JSON encoding is sub-optimal for both, hardware wallets +and for usage in ethereum smart contracts. Both is laid down in detail in [issue#1622]. + +Also, there are currently no differences between sign-request and -replies. Also, there is no possibility +for a remote signer to include an error code or message in case something went wrong. +The messages exchanged between tendermint and a remote signer currently live in +[privval/socket.go] and encapsulate the corresponding types in [types]. + + +[privval/socket.go]: https://github.com/tendermint/tendermint/blob/d419fffe18531317c28c29a292ad7d253f6cafdf/privval/socket.go#L496-L502 +[issue#1622]: https://github.com/tendermint/tendermint/issues/1622 +[types]: https://github.com/tendermint/tendermint/tree/master/types + + +## Decision + +- restructure vote, proposal, and heartbeat such that their encoding is easily parseable by +hardware devices and smart contracts using a binary encoding format ([amino] in this case) +- split up the messages exchanged between tendermint and remote signers into requests and +responses (see details below) +- include an error type in responses + +### Overview +``` ++--------------+ +----------------+ +| | SignXRequest | | +|Remote signer |<---------------------+ tendermint | +| (e.g. KMS) | | | +| +--------------------->| | ++--------------+ SignedXReply +----------------+ + + +SignXRequest { + x: X +} + +SignedXReply { + x: X + sig: Signature // []byte + err: Error{ + code: int + desc: string + } +} +``` + +TODO: Alternatively, the type `X` might directly include the signature. A lot of places expect a vote with a +signature and do not necessarily deal with "Replies". +Still exploring what would work best here. +This would look like (exemplified using X = Vote): +``` +Vote { + // all fields besides signature +} + +SignedVote { + Vote Vote + Signature []byte +} + +SignVoteRequest { + Vote Vote +} + +SignedVoteReply { + Vote SignedVote + Err Error +} +``` + +**Note:** There was a related discussion around including a fingerprint of, or, the whole public-key +into each sign-request to tell the signer which corresponding private-key to +use to sign the message. This is particularly relevant in the context of the KMS +but is currently not considered in this ADR. + + +[amino]: https://github.com/tendermint/go-amino/ + +### Vote + +As explained in [issue#1622] `Vote` will be changed to contain the following fields +(notation in protobuf-like syntax for easy readability): + +```proto +// vanilla protobuf / amino encoded +message Vote { + Version fixed32 + Height sfixed64 + Round sfixed32 + VoteType fixed32 + Timestamp Timestamp // << using protobuf definition + BlockID BlockID // << as already defined + ChainID string // at the end because length could vary a lot +} + +// this is an amino registered type; like currently privval.SignVoteMsg: +// registered with "tendermint/socketpv/SignVoteRequest" +message SignVoteRequest { + Vote vote +} + +// amino registered type +// registered with "tendermint/socketpv/SignedVoteReply" +message SignedVoteReply { + Vote Vote + Signature Signature + Err Error +} + +// we will use this type everywhere below +message Error { + Type uint // error code + Description string // optional description +} + +``` + +The `ChainID` gets moved into the vote message directly. Previously, it was injected +using the [Signable] interface method `SignBytes(chainID string) []byte`. Also, the +signature won't be included directly, only in the corresponding `SignedVoteReply` message. + +[Signable]: https://github.com/tendermint/tendermint/blob/d419fffe18531317c28c29a292ad7d253f6cafdf/types/signable.go#L9-L11 + +### Proposal + +```proto +// vanilla protobuf / amino encoded +message Proposal { + Height sfixed64 + Round sfixed32 + Timestamp Timestamp // << using protobuf definition + BlockPartsHeader PartSetHeader // as already defined + POLRound sfixed32 + POLBlockID BlockID // << as already defined +} + +// amino registered with "tendermint/socketpv/SignProposalRequest" +message SignProposalRequest { + Proposal proposal +} + +// amino registered with "tendermint/socketpv/SignProposalReply" +message SignProposalReply { + Prop Proposal + Sig Signature + Err Error // as defined above +} +``` + +### Heartbeat + +**TODO**: clarify if heartbeat also needs a fixed offset and update the fields accordingly: + +```proto +message Heartbeat { + ValidatorAddress Address + ValidatorIndex int + Height int64 + Round int + Sequence int +} +// amino registered with "tendermint/socketpv/SignHeartbeatRequest" +message SignHeartbeatRequest { + Hb Heartbeat +} + +// amino registered with "tendermint/socketpv/SignHeartbeatReply" +message SignHeartbeatReply { + Hb Heartbeat + Sig Signature + Err Error // as defined above +} + +``` + +## PubKey + +TBA - this needs further thoughts: e.g. what todo like in the case of the KMS which holds +several keys? How does it know with which key to reply? + +## SignBytes +`SignBytes` will not require a `ChainID` parameter: + +```golang +type Signable interface { + SignBytes() []byte +} + +``` +And the implementation for vote, heartbeat, proposal will look like: +```golang +// type T is one of vote, sign, proposal +func (tp *T) SignBytes() []byte { + bz, err := cdc.MarshalBinary(tp) + if err != nil { + panic(err) + } + return bz +} +``` + +## Status + +Partially Accepted + +## Consequences + + + +### Positive + +The most relevant positive effect is that the signing bytes can easily be parsed by a +hardware module and a smart contract. Besides that: + +- clearer separation between requests and responses +- added error messages enable better error handling + + +### Negative + +- relatively huge change / refactoring touching quite some code +- lot's of places assume a `Vote` with a signature included -> they will need to +- need to modify some interfaces + +### Neutral + +not even the swiss are neutral diff --git a/docs/architecture/adr-025-commit.md b/docs/architecture/adr-025-commit.md new file mode 100644 index 000000000..a23d3803f --- /dev/null +++ b/docs/architecture/adr-025-commit.md @@ -0,0 +1,150 @@ +# ADR 025 Commit + +## Context + +Currently the `Commit` structure contains a lot of potentially redundant or unnecessary data. +It contains a list of precommits from every validator, where the precommit +includes the whole `Vote` structure. Thus each of the commit height, round, +type, and blockID are repeated for every validator, and could be deduplicated, +leading to very significant savings in block size. + +``` +type Commit struct { + BlockID BlockID `json:"block_id"` + Precommits []*Vote `json:"precommits"` +} + +type Vote struct { + ValidatorAddress Address `json:"validator_address"` + ValidatorIndex int `json:"validator_index"` + Height int64 `json:"height"` + Round int `json:"round"` + Timestamp time.Time `json:"timestamp"` + Type byte `json:"type"` + BlockID BlockID `json:"block_id"` + Signature []byte `json:"signature"` +} +``` + +The original tracking issue for this is [#1648](https://github.com/tendermint/tendermint/issues/1648). +We have discussed replacing the `Vote` type in `Commit` with a new `CommitSig` +type, which includes at minimum the vote signature. The `Vote` type will +continue to be used in the consensus reactor and elsewhere. + +A primary question is what should be included in the `CommitSig` beyond the +signature. One current constraint is that we must include a timestamp, since +this is how we calculuate BFT time, though we may be able to change this [in the +future](https://github.com/tendermint/tendermint/issues/2840). + +Other concerns here include: + +- Validator Address [#3596](https://github.com/tendermint/tendermint/issues/3596) - + Should the CommitSig include the validator address? It is very convenient to + do so, but likely not necessary. This was also discussed in [#2226](https://github.com/tendermint/tendermint/issues/2226). +- Absent Votes [#3591](https://github.com/tendermint/tendermint/issues/3591) - + How to represent absent votes? Currently they are just present as `nil` in the + Precommits list, which is actually problematic for serialization +- Other BlockIDs [#3485](https://github.com/tendermint/tendermint/issues/3485) - + How to represent votes for nil and for other block IDs? We currently allow + votes for nil and votes for alternative block ids, but just ignore them + + +## Decision + +Deduplicate the fields and introduce `CommitSig`: + +``` +type Commit struct { + Height int64 + Round int + BlockID BlockID `json:"block_id"` + Precommits []CommitSig `json:"precommits"` +} + +type CommitSig struct { + BlockID BlockIDFlag + ValidatorAddress Address + Timestamp time.Time + Signature []byte +} + + +// indicate which BlockID the signature is for +type BlockIDFlag int + +const ( + BlockIDFlagAbsent BlockIDFlag = iota // vote is not included in the Commit.Precommits + BlockIDFlagCommit // voted for the Commit.BlockID + BlockIDFlagNil // voted for nil +) + +``` + +Re the concerns outlined in the context: + +**Timestamp**: Leave the timestamp for now. Removing it and switching to +proposer based time will take more analysis and work, and will be left for a +future breaking change. In the meantime, the concerns with the current approach to +BFT time [can be +mitigated](https://github.com/tendermint/tendermint/issues/2840#issuecomment-529122431). + +**ValidatorAddress**: we include it in the `CommitSig` for now. While this +does increase the block size unecessarily (20-bytes per validator), it has some ergonomic and debugging advantages: + +- `Commit` contains everything necessary to reconstruct `[]Vote`, and doesn't depend on additional access to a `ValidatorSet` +- Lite clients can check if they know the validators in a commit without + re-downloading the validator set +- Easy to see directly in a commit which validators signed what without having + to fetch the validator set + +If and when we change the `CommitSig` again, for instance to remove the timestamp, +we can reconsider whether the ValidatorAddress should be removed. + +**Absent Votes**: we include absent votes explicitly with no Signature or +Timestamp but with the ValidatorAddress. This should resolve the serialization +issues and make it easy to see which validator's votes failed to be included. + +**Other BlockIDs**: We use a single byte to indicate which blockID a `CommitSig` +is for. The only options are: + - `Absent` - no vote received from the this validator, so no signature + - `Nil` - validator voted Nil - meaning they did not see a polka in time + - `Commit` - validator voted for this block + +Note this means we don't allow votes for any other blockIDs. If a signature is +included in a commit, it is either for nil or the correct blockID. According to +the Tendermint protocol and assumptions, there is no way for a correct validator to +precommit for a conflicting blockID in the same round an actual commit was +created. This was the consensus from +[#3485](https://github.com/tendermint/tendermint/issues/3485) + +We may want to consider supporting other blockIDs later, as a way to capture +evidence that might be helpful. We should clarify if/when/how doing so would +actually help first. To implement it, we could change the `Commit.BlockID` +field to a slice, where the first entry is the correct block ID and the other +entries are other BlockIDs that validators precommited before. The BlockIDFlag +enum can be extended to represent these additional block IDs on a per block +basis. + +## Status + +Implemented + +## Consequences + +### Positive + +Removing the Type/Height/Round/Index and the BlockID saves roughly 80 bytes per precommit. +It varies because some integers are varint. The BlockID contains two 32-byte hashes an integer, +and the Height is 8-bytes. + +For a chain with 100 validators, that's up to 8kB in savings per block! + + +### Negative + +- Large breaking change to the block and commit structure +- Requires differentiating in code between the Vote and CommitSig objects, which may add some complexity (votes need to be reconstructed to be verified and gossiped) + +### Neutral + +- Commit.Precommits no longer contains nil values diff --git a/docs/architecture/adr-026-general-merkle-proof.md b/docs/architecture/adr-026-general-merkle-proof.md new file mode 100644 index 000000000..5774c10f8 --- /dev/null +++ b/docs/architecture/adr-026-general-merkle-proof.md @@ -0,0 +1,49 @@ +# ADR 026: General Merkle Proof + +## Context + +We are using raw `[]byte` for merkle proofs in `abci.ResponseQuery`. It makes hard to handle multilayer merkle proofs and general cases. Here, new interface `ProofOperator` is defined. The users can defines their own Merkle proof format and layer them easily. + +Goals: +- Layer Merkle proofs without decoding/reencoding +- Provide general way to chain proofs +- Make the proof format extensible, allowing thirdparty proof types + +## Decision + +### ProofOperator + +`type ProofOperator` is an interface for Merkle proofs. The definition is: + +```go +type ProofOperator interface { + Run([][]byte) ([][]byte, error) + GetKey() []byte + ProofOp() ProofOp +} +``` + +Since a proof can treat various data type, `Run()` takes `[][]byte` as the argument, not `[]byte`. For example, a range proof's `Run()` can take multiple key-values as its argument. It will then return the root of the tree for the further process, calculated with the input value. + +`ProofOperator` does not have to be a Merkle proof - it can be a function that transforms the argument for intermediate process e.g. prepending the length to the `[]byte`. + +### ProofOp + +`type ProofOp` is a protobuf message which is a triple of `Type string`, `Key []byte`, and `Data []byte`. `ProofOperator` and `ProofOp`are interconvertible, using `ProofOperator.ProofOp()` and `OpDecoder()`, where `OpDecoder` is a function that each proof type can register for their own encoding scheme. For example, we can add an byte for encoding scheme before the serialized proof, supporting JSON decoding. + +## Status + +Implemented + +## Consequences + +### Positive + +- Layering becomes easier (no encoding/decoding at each step) +- Thirdparty proof format is available + +### Negative + +- Larger size for abci.ResponseQuery +- Unintuitive proof chaining(it is not clear what `Run()` is doing) +- Additional codes for registering `OpDecoder`s diff --git a/docs/architecture/adr-029-check-tx-consensus.md b/docs/architecture/adr-029-check-tx-consensus.md new file mode 100644 index 000000000..191a0ec8e --- /dev/null +++ b/docs/architecture/adr-029-check-tx-consensus.md @@ -0,0 +1,127 @@ +# ADR 029: Check block txs before prevote + +## Changelog + +04-10-2018: Update with link to issue +[#2384](https://github.com/tendermint/tendermint/issues/2384) and reason for rejection +19-09-2018: Initial Draft + +## Context + +We currently check a tx's validity through 2 ways. + +1. Through checkTx in mempool connection. +2. Through deliverTx in consensus connection. + +The 1st is called when external tx comes in, so the node should be a proposer this time. The 2nd is called when external block comes in and reach the commit phase, the node doesn't need to be the proposer of the block, however it should check the txs in that block. + +In the 2nd situation, if there are many invalid txs in the block, it would be too late for all nodes to discover that most txs in the block are invalid, and we'd better not record invalid txs in the blockchain too. + +## Proposed solution + +Therefore, we should find a way to check the txs' validity before send out a prevote. Currently we have cs.isProposalComplete() to judge whether a block is complete. We can have + +``` +func (blockExec *BlockExecutor) CheckBlock(block *types.Block) error { + // check txs of block. + for _, tx := range block.Txs { + reqRes := blockExec.proxyApp.CheckTxAsync(tx) + reqRes.Wait() + if reqRes.Response == nil || reqRes.Response.GetCheckTx() == nil || reqRes.Response.GetCheckTx().Code != abci.CodeTypeOK { + return errors.Errorf("tx %v check failed. response: %v", tx, reqRes.Response) + } + } + return nil +} +``` + +such a method in BlockExecutor to check all txs' validity in that block. + +However, this method should not be implemented like that, because checkTx will share the same state used in mempool in the app. So we should define a new interface method checkBlock in Application to indicate it to use the same state as deliverTx. + +``` +type Application interface { + // Info/Query Connection + Info(RequestInfo) ResponseInfo // Return application info + Query(RequestQuery) ResponseQuery // Query for state + + // Mempool Connection + CheckTx(tx []byte) ResponseCheckTx // Validate a tx for the mempool + + // Consensus Connection + InitChain(RequestInitChain) ResponseInitChain // Initialize blockchain with validators and other info from TendermintCore + CheckBlock(RequestCheckBlock) ResponseCheckBlock + BeginBlock(RequestBeginBlock) ResponseBeginBlock // Signals the beginning of a block + DeliverTx(tx []byte) ResponseDeliverTx // Deliver a tx for full processing + EndBlock(RequestEndBlock) ResponseEndBlock // Signals the end of a block, returns changes to the validator set + Commit() ResponseCommit // Commit the state and return the application Merkle root hash +} +``` + +All app should implement that method. For example, counter: + +``` +func (app *CounterApplication) CheckBlock(block types.Request_CheckBlock) types.ResponseCheckBlock { + if app.serial { + app.originalTxCount = app.txCount //backup the txCount state + for _, tx := range block.CheckBlock.Block.Txs { + if len(tx) > 8 { + return types.ResponseCheckBlock{ + Code: code.CodeTypeEncodingError, + Log: fmt.Sprintf("Max tx size is 8 bytes, got %d", len(tx))} + } + tx8 := make([]byte, 8) + copy(tx8[len(tx8)-len(tx):], tx) + txValue := binary.BigEndian.Uint64(tx8) + if txValue < uint64(app.txCount) { + return types.ResponseCheckBlock{ + Code: code.CodeTypeBadNonce, + Log: fmt.Sprintf("Invalid nonce. Expected >= %v, got %v", app.txCount, txValue)} + } + app.txCount++ + } + } + return types.ResponseCheckBlock{Code: code.CodeTypeOK} +} +``` + +In BeginBlock, the app should restore the state to the orignal state before checking the block: + +``` +func (app *CounterApplication) DeliverTx(tx []byte) types.ResponseDeliverTx { + if app.serial { + app.txCount = app.originalTxCount //restore the txCount state + } + app.txCount++ + return types.ResponseDeliverTx{Code: code.CodeTypeOK} +} +``` + +The txCount is like the nonce in ethermint, it should be restored when entering the deliverTx phase. While some operation like checking the tx signature needs not to be done again. So the deliverTx can focus on how a tx can be applied, ignoring the checking of the tx, because all the checking has already been done in the checkBlock phase before. + +An optional optimization is alter the deliverTx to deliverBlock. For the block has already been checked by checkBlock, so all the txs in it are valid. So the app can cache the block, and in the deliverBlock phase, it just needs to apply the block in the cache. This optimization can save network current in deliverTx. + + + +## Status + +Rejected + +## Decision + +Performance impact is considered too great. See [#2384](https://github.com/tendermint/tendermint/issues/2384) + +## Consequences + +### Positive + +- more robust to defend the adversary to propose a block full of invalid txs. + +### Negative + +- add a new interface method. app logic needs to adjust to appeal to it. +- sending all the tx data over the ABCI twice +- potentially redundant validations (eg. signature checks in both CheckBlock and + DeliverTx) + +### Neutral diff --git a/docs/architecture/adr-030-consensus-refactor.md b/docs/architecture/adr-030-consensus-refactor.md new file mode 100644 index 000000000..5c8c3d754 --- /dev/null +++ b/docs/architecture/adr-030-consensus-refactor.md @@ -0,0 +1,458 @@ +# ADR 030: Consensus Refactor + +## Context + +One of the biggest challenges this project faces is to proof that the +implementations of the specifications are correct, much like we strive to +formaly verify our alogrithms and protocols we should work towards high +confidence about the correctness of our program code. One of those is the core +of Tendermint - Consensus - which currently resides in the `consensus` package. +Over time there has been high friction making changes to the package due to the +algorithm being scattered in a side-effectful container (the current +`ConsensusState`). In order to test the algorithm a large object-graph needs to +be set up and even than the non-deterministic parts of the container makes will +prevent high certainty. Where ideally we have a 1-to-1 representation of the +[spec](https://github.com/tendermint/spec), ready and easy to test for domain +experts. + +Addresses: + +- [#1495](https://github.com/tendermint/tendermint/issues/1495) +- [#1692](https://github.com/tendermint/tendermint/issues/1692) + +## Decision + +To remedy these issues we plan a gradual, non-invasive refactoring of the +`consensus` package. Starting of by isolating the consensus alogrithm into +a pure function and a finite state machine to address the most pressuring issue +of lack of confidence. Doing so while leaving the rest of the package in tact +and have follow-up optional changes to improve the sepration of concerns. + +### Implementation changes + +The core of Consensus can be modelled as a function with clear defined inputs: + +* `State` - data container for current round, height, etc. +* `Event`- significant events in the network + +producing clear outputs; + +* `State` - updated input +* `Message` - signal what actions to perform + +```go +type Event int + +const ( + EventUnknown Event = iota + EventProposal + Majority23PrevotesBlock + Majority23PrecommitBlock + Majority23PrevotesAny + Majority23PrecommitAny + TimeoutNewRound + TimeoutPropose + TimeoutPrevotes + TimeoutPrecommit +) + +type Message int + +const ( + MeesageUnknown Message = iota + MessageProposal + MessageVotes + MessageDecision +) + +type State struct { + height uint64 + round uint64 + step uint64 + lockedValue interface{} // TODO: Define proper type. + lockedRound interface{} // TODO: Define proper type. + validValue interface{} // TODO: Define proper type. + validRound interface{} // TODO: Define proper type. + // From the original notes: valid(v) + valid interface{} // TODO: Define proper type. + // From the original notes: proposer(h, r) + proposer interface{} // TODO: Define proper type. +} + +func Consensus(Event, State) (State, Message) { + // Consolidate implementation. +} +``` + +Tracking of relevant information to feed `Event` into the function and act on +the output is left to the `ConsensusExecutor` (formerly `ConsensusState`). + +Benefits for testing surfacing nicely as testing for a sequence of events +against algorithm could be as simple as the following example: + +``` go +func TestConsensusXXX(t *testing.T) { + type expected struct { + message Message + state State + } + + // Setup order of events, initial state and expectation. + var ( + events = []struct { + event Event + want expected + }{ + // ... + } + state = State{ + // ... + } + ) + + for _, e := range events { + sate, msg = Consensus(e.event, state) + + // Test message expectation. + if msg != e.want.message { + t.Fatalf("have %v, want %v", msg, e.want.message) + } + + // Test state expectation. + if !reflect.DeepEqual(state, e.want.state) { + t.Fatalf("have %v, want %v", state, e.want.state) + } + } +} +``` + + +## Consensus Executor + +## Consensus Core + +```go +type Event interface{} + +type EventNewHeight struct { + Height int64 + ValidatorId int +} + +type EventNewRound HeightAndRound + +type EventProposal struct { + Height int64 + Round int + Timestamp Time + BlockID BlockID + POLRound int + Sender int +} + +type Majority23PrevotesBlock struct { + Height int64 + Round int + BlockID BlockID +} + +type Majority23PrecommitBlock struct { + Height int64 + Round int + BlockID BlockID +} + +type HeightAndRound struct { + Height int64 + Round int +} + +type Majority23PrevotesAny HeightAndRound +type Majority23PrecommitAny HeightAndRound +type TimeoutPropose HeightAndRound +type TimeoutPrevotes HeightAndRound +type TimeoutPrecommit HeightAndRound + + +type Message interface{} + +type MessageProposal struct { + Height int64 + Round int + BlockID BlockID + POLRound int +} + +type VoteType int + +const ( + VoteTypeUnknown VoteType = iota + Prevote + Precommit +) + + +type MessageVote struct { + Height int64 + Round int + BlockID BlockID + Type VoteType +} + + +type MessageDecision struct { + Height int64 + Round int + BlockID BlockID +} + +type TriggerTimeout struct { + Height int64 + Round int + Duration Duration +} + + +type RoundStep int + +const ( + RoundStepUnknown RoundStep = iota + RoundStepPropose + RoundStepPrevote + RoundStepPrecommit + RoundStepCommit +) + +type State struct { + Height int64 + Round int + Step RoundStep + LockedValue BlockID + LockedRound int + ValidValue BlockID + ValidRound int + ValidatorId int + ValidatorSetSize int +} + +func proposer(height int64, round int) int {} +func getValue() BlockID {} + +func Consensus(event Event, state State) (State, Message, TriggerTimeout) { + msg = nil + timeout = nil + switch event := event.(type) { + case EventNewHeight: + if event.Height > state.Height { + state.Height = event.Height + state.Round = -1 + state.Step = RoundStepPropose + state.LockedValue = nil + state.LockedRound = -1 + state.ValidValue = nil + state.ValidRound = -1 + state.ValidatorId = event.ValidatorId + } + return state, msg, timeout + + case EventNewRound: + if event.Height == state.Height and event.Round > state.Round { + state.Round = eventRound + state.Step = RoundStepPropose + if proposer(state.Height, state.Round) == state.ValidatorId { + proposal = state.ValidValue + if proposal == nil { + proposal = getValue() + } + msg = MessageProposal { state.Height, state.Round, proposal, state.ValidRound } + } + timeout = TriggerTimeout { state.Height, state.Round, timeoutPropose(state.Round) } + } + return state, msg, timeout + + case EventProposal: + if event.Height == state.Height and event.Round == state.Round and + event.Sender == proposal(state.Height, state.Round) and state.Step == RoundStepPropose { + if event.POLRound >= state.LockedRound or event.BlockID == state.BlockID or state.LockedRound == -1 { + msg = MessageVote { state.Height, state.Round, event.BlockID, Prevote } + } + state.Step = RoundStepPrevote + } + return state, msg, timeout + + case TimeoutPropose: + if event.Height == state.Height and event.Round == state.Round and state.Step == RoundStepPropose { + msg = MessageVote { state.Height, state.Round, nil, Prevote } + state.Step = RoundStepPrevote + } + return state, msg, timeout + + case Majority23PrevotesBlock: + if event.Height == state.Height and event.Round == state.Round and state.Step >= RoundStepPrevote and event.Round > state.ValidRound { + state.ValidRound = event.Round + state.ValidValue = event.BlockID + if state.Step == RoundStepPrevote { + state.LockedRound = event.Round + state.LockedValue = event.BlockID + msg = MessageVote { state.Height, state.Round, event.BlockID, Precommit } + state.Step = RoundStepPrecommit + } + } + return state, msg, timeout + + case Majority23PrevotesAny: + if event.Height == state.Height and event.Round == state.Round and state.Step == RoundStepPrevote { + timeout = TriggerTimeout { state.Height, state.Round, timeoutPrevote(state.Round) } + } + return state, msg, timeout + + case TimeoutPrevote: + if event.Height == state.Height and event.Round == state.Round and state.Step == RoundStepPrevote { + msg = MessageVote { state.Height, state.Round, nil, Precommit } + state.Step = RoundStepPrecommit + } + return state, msg, timeout + + case Majority23PrecommitBlock: + if event.Height == state.Height { + state.Step = RoundStepCommit + state.LockedValue = event.BlockID + } + return state, msg, timeout + + case Majority23PrecommitAny: + if event.Height == state.Height and event.Round == state.Round { + timeout = TriggerTimeout { state.Height, state.Round, timeoutPrecommit(state.Round) } + } + return state, msg, timeout + + case TimeoutPrecommit: + if event.Height == state.Height and event.Round == state.Round { + state.Round = state.Round + 1 + } + return state, msg, timeout + } +} + +func ConsensusExecutor() { + proposal = nil + votes = HeightVoteSet { Height: 1 } + state = State { + Height: 1 + Round: 0 + Step: RoundStepPropose + LockedValue: nil + LockedRound: -1 + ValidValue: nil + ValidRound: -1 + } + + event = EventNewHeight {1, id} + state, msg, timeout = Consensus(event, state) + + event = EventNewRound {state.Height, 0} + state, msg, timeout = Consensus(event, state) + + if msg != nil { + send msg + } + + if timeout != nil { + trigger timeout + } + + for { + select { + case message := <- msgCh: + switch msg := message.(type) { + case MessageProposal: + + case MessageVote: + if msg.Height == state.Height { + newVote = votes.AddVote(msg) + if newVote { + switch msg.Type { + case Prevote: + prevotes = votes.Prevotes(msg.Round) + if prevotes.WeakCertificate() and msg.Round > state.Round { + event = EventNewRound { msg.Height, msg.Round } + state, msg, timeout = Consensus(event, state) + state = handleStateChange(state, msg, timeout) + } + + if blockID, ok = prevotes.TwoThirdsMajority(); ok and blockID != nil { + if msg.Round == state.Round and hasBlock(blockID) { + event = Majority23PrevotesBlock { msg.Height, msg.Round, blockID } + state, msg, timeout = Consensus(event, state) + state = handleStateChange(state, msg, timeout) + } + if proposal != nil and proposal.POLRound == msg.Round and hasBlock(blockID) { + event = EventProposal { + Height: state.Height + Round: state.Round + BlockID: blockID + POLRound: proposal.POLRound + Sender: message.Sender + } + state, msg, timeout = Consensus(event, state) + state = handleStateChange(state, msg, timeout) + } + } + + if prevotes.HasTwoThirdsAny() and msg.Round == state.Round { + event = Majority23PrevotesAny { msg.Height, msg.Round, blockID } + state, msg, timeout = Consensus(event, state) + state = handleStateChange(state, msg, timeout) + } + + case Precommit: + + } + } + } + case timeout := <- timeoutCh: + + case block := <- blockCh: + + } + } +} + +func handleStateChange(state, msg, timeout) State { + if state.Step == Commit { + state = ExecuteBlock(state.LockedValue) + } + if msg != nil { + send msg + } + if timeout != nil { + trigger timeout + } +} + +``` + +### Implementation roadmap + +* implement proposed implementation +* replace currently scattered calls in `ConsensusState` with calls to the new + `Consensus` function +* rename `ConsensusState` to `ConsensusExecutor` to avoid confusion +* propose design for improved separation and clear information flow between + `ConsensusExecutor` and `ConsensusReactor` + +## Status + +Draft. + +## Consequences + +### Positive + +- isolated implementation of the algorithm +- improved testability - simpler to proof correctness +- clearer separation of concerns - easier to reason + +### Negative + +### Neutral diff --git a/docs/architecture/adr-033-pubsub.md b/docs/architecture/adr-033-pubsub.md new file mode 100644 index 000000000..7b7912a9f --- /dev/null +++ b/docs/architecture/adr-033-pubsub.md @@ -0,0 +1,247 @@ +# ADR 033: pubsub 2.0 + +Author: Anton Kaliaev (@melekes) + +## Changelog + +02-10-2018: Initial draft + +16-01-2019: Second version based on our conversation with Jae + +17-01-2019: Third version explaining how new design solves current issues + +25-01-2019: Fourth version to treat buffered and unbuffered channels differently + +## Context + +Since the initial version of the pubsub, there's been a number of issues +raised: [#951], [#1879], [#1880]. Some of them are high-level issues questioning the +core design choices made. Others are minor and mostly about the interface of +`Subscribe()` / `Publish()` functions. + +### Sync vs Async + +Now, when publishing a message to subscribers, we can do it in a goroutine: + +_using channels for data transmission_ +```go +for each subscriber { + out := subscriber.outc + go func() { + out <- msg + } +} +``` + +_by invoking callback functions_ +```go +for each subscriber { + go subscriber.callbackFn() +} +``` + +This gives us greater performance and allows us to avoid "slow client problem" +(when other subscribers have to wait for a slow subscriber). A pool of +goroutines can be used to avoid uncontrolled memory growth. + +In certain cases, this is what you want. But in our case, because we need +strict ordering of events (if event A was published before B, the guaranteed +delivery order will be A -> B), we can't publish msg in a new goroutine every time. + +We can also have a goroutine per subscriber, although we'd need to be careful +with the number of subscribers. It's more difficult to implement as well + +unclear if we'll benefit from it (cause we'd be forced to create N additional +channels to distribute msg to these goroutines). + +### Non-blocking send + +There is also a question whenever we should have a non-blocking send. +Currently, sends are blocking, so publishing to one client can block on +publishing to another. This means a slow or unresponsive client can halt the +system. Instead, we can use a non-blocking send: + +```go +for each subscriber { + out := subscriber.outc + select { + case out <- msg: + default: + log("subscriber %v buffer is full, skipping...") + } +} +``` + +This fixes the "slow client problem", but there is no way for a slow client to +know if it had missed a message. We could return a second channel and close it +to indicate subscription termination. On the other hand, if we're going to +stick with blocking send, **devs must always ensure subscriber's handling code +does not block**, which is a hard task to put on their shoulders. + +The interim option is to run goroutines pool for a single message, wait for all +goroutines to finish. This will solve "slow client problem", but we'd still +have to wait `max(goroutine_X_time)` before we can publish the next message. + +### Channels vs Callbacks + +Yet another question is whether we should use channels for message transmission or +call subscriber-defined callback functions. Callback functions give subscribers +more flexibility - you can use mutexes in there, channels, spawn goroutines, +anything you really want. But they also carry local scope, which can result in +memory leaks and/or memory usage increase. + +Go channels are de-facto standard for carrying data between goroutines. + +### Why `Subscribe()` accepts an `out` channel? + +Because in our tests, we create buffered channels (cap: 1). Alternatively, we +can make capacity an argument and return a channel. + +## Decision + +### MsgAndTags + +Use a `MsgAndTags` struct on the subscription channel to indicate what tags the +msg matched. + +```go +type MsgAndTags struct { + Msg interface{} + Tags TagMap +} +``` + +### Subscription Struct + + +Change `Subscribe()` function to return a `Subscription` struct: + +```go +type Subscription struct { + // private fields +} + +func (s *Subscription) Out() <-chan MsgAndTags +func (s *Subscription) Canceled() <-chan struct{} +func (s *Subscription) Err() error +``` + +`Out()` returns a channel onto which messages and tags are published. +`Unsubscribe`/`UnsubscribeAll` does not close the channel to avoid clients from +receiving a nil message. + +`Canceled()` returns a channel that's closed when the subscription is terminated +and supposed to be used in a select statement. + +If the channel returned by `Canceled()` is not closed yet, `Err()` returns nil. +If the channel is closed, `Err()` returns a non-nil error explaining why: +`ErrUnsubscribed` if the subscriber choose to unsubscribe, +`ErrOutOfCapacity` if the subscriber is not pulling messages fast enough and the channel returned by `Out()` became full. +After `Err()` returns a non-nil error, successive calls to `Err() return the same error. + +```go +subscription, err := pubsub.Subscribe(...) +if err != nil { + // ... +} +for { +select { + case msgAndTags <- subscription.Out(): + // ... + case <-subscription.Canceled(): + return subscription.Err() +} +``` + +### Capacity and Subscriptions + +Make the `Out()` channel buffered (with capacity 1) by default. In most cases, we want to +terminate the slow subscriber. Only in rare cases, we want to block the pubsub +(e.g. when debugging consensus). This should lower the chances of the pubsub +being frozen. + +```go +// outCap can be used to set capacity of Out channel +// (1 by default, must be greater than 0). +Subscribe(ctx context.Context, clientID string, query Query, outCap... int) (Subscription, error) { +``` + +Use a different function for an unbuffered channel: + +```go +// Subscription uses an unbuffered channel. Publishing will block. +SubscribeUnbuffered(ctx context.Context, clientID string, query Query) (Subscription, error) { +``` + +SubscribeUnbuffered should not be exposed to users. + +### Blocking/Nonblocking + +The publisher should treat these kinds of channels separately. +It should block on unbuffered channels (for use with internal consensus events +in the consensus tests) and not block on the buffered ones. If a client is too +slow to keep up with it's messages, it's subscription is terminated: + +for each subscription { + out := subscription.outChan + if cap(out) == 0 { + // block on unbuffered channel + out <- msg + } else { + // don't block on buffered channels + select { + case out <- msg: + default: + // set the error, notify on the cancel chan + subscription.err = fmt.Errorf("client is too slow for msg) + close(subscription.cancelChan) + + // ... unsubscribe and close out + } + } +} + +### How this new design solves the current issues? + +[#951] ([#1880]): + +Because of non-blocking send, situation where we'll deadlock is not possible +anymore. If the client stops reading messages, it will be removed. + +[#1879]: + +MsgAndTags is used now instead of a plain message. + +### Future problems and their possible solutions + +[#2826] + +One question I am still pondering about: how to prevent pubsub from slowing +down consensus. We can increase the pubsub queue size (which is 0 now). Also, +it's probably a good idea to limit the total number of subscribers. + +This can be made automatically. Say we set queue size to 1000 and, when it's >= +80% full, refuse new subscriptions. + +## Status + +Implemented + +## Consequences + +### Positive + +- more idiomatic interface +- subscribers know what tags msg was published with +- subscribers aware of the reason their subscription was canceled + +### Negative + +- (since v1) no concurrency when it comes to publishing messages + +### Neutral + + +[#951]: https://github.com/tendermint/tendermint/issues/951 +[#1879]: https://github.com/tendermint/tendermint/issues/1879 +[#1880]: https://github.com/tendermint/tendermint/issues/1880 +[#2826]: https://github.com/tendermint/tendermint/issues/2826 diff --git a/docs/architecture/adr-034-priv-validator-file-structure.md b/docs/architecture/adr-034-priv-validator-file-structure.md new file mode 100644 index 000000000..c87cec132 --- /dev/null +++ b/docs/architecture/adr-034-priv-validator-file-structure.md @@ -0,0 +1,72 @@ +# ADR 034: PrivValidator file structure + +## Changelog + +03-11-2018: Initial Draft + +## Context + +For now, the PrivValidator file `priv_validator.json` contains mutable and immutable parts. +Even in an insecure mode which does not encrypt private key on disk, it is reasonable to separate +the mutable part and immutable part. + +References: +[#1181](https://github.com/tendermint/tendermint/issues/1181) +[#2657](https://github.com/tendermint/tendermint/issues/2657) +[#2313](https://github.com/tendermint/tendermint/issues/2313) + +## Proposed Solution + +We can split mutable and immutable parts with two structs: +```go +// FilePVKey stores the immutable part of PrivValidator +type FilePVKey struct { + Address types.Address `json:"address"` + PubKey crypto.PubKey `json:"pub_key"` + PrivKey crypto.PrivKey `json:"priv_key"` + + filePath string +} + +// FilePVState stores the mutable part of PrivValidator +type FilePVLastSignState struct { + Height int64 `json:"height"` + Round int `json:"round"` + Step int8 `json:"step"` + Signature []byte `json:"signature,omitempty"` + SignBytes cmn.HexBytes `json:"signbytes,omitempty"` + + filePath string + mtx sync.Mutex +} +``` + +Then we can combine `FilePVKey` with `FilePVLastSignState` and will get the original `FilePV`. + +```go +type FilePV struct { + Key FilePVKey + LastSignState FilePVLastSignState +} +``` + +As discussed, `FilePV` should be located in `config`, and `FilePVLastSignState` should be stored in `data`. The +store path of each file should be specified in `config.yml`. + +What we need to do next is changing the methods of `FilePV`. + +## Status + +Implemented + +## Consequences + +### Positive + +- separate the mutable and immutable of PrivValidator + +### Negative + +- need to add more config for file path + +### Neutral diff --git a/docs/architecture/adr-035-documentation.md b/docs/architecture/adr-035-documentation.md new file mode 100644 index 000000000..92cb07916 --- /dev/null +++ b/docs/architecture/adr-035-documentation.md @@ -0,0 +1,40 @@ +# ADR 035: Documentation + +Author: @zramsay (Zach Ramsay) + +## Changelog + +### November 2nd 2018 + +- initial write-up + +## Context + +The Tendermint documentation has undergone several changes until settling on the current model. Originally, the documentation was hosted on the website and had to be updated asynchronously from the code. Along with the other repositories requiring documentation, the whole stack moved to using Read The Docs to automatically generate, publish, and host the documentation. This, however, was insufficient; the RTD site had advertisement, it wasn't easily accessible to devs, didn't collect metrics, was another set of external links, etc. + +## Decision + +For two reasons, the decision was made to use VuePress: + +1) ability to get metrics (implemented on both Tendermint and SDK) +2) host the documentation on the website as a `/docs` endpoint. + +This is done while maintaining synchrony between the docs and code, i.e., the website is built whenever the docs are updated. + +## Status + +The two points above have been implemented; the `config.js` has a Google Analytics identifier and the documentation workflow has been up and running largely without problems for several months. Details about the documentation build & workflow can be found [here](../DOCS_README.md) + +## Consequences + +Because of the organizational seperation between Tendermint & Cosmos, there is a challenge of "what goes where" for certain aspects of documentation. + +### Positive + +This architecture is largely positive relative to prior docs arrangements. + +### Negative + +A significant portion of the docs automation / build process is in private repos with limited access/visibility to devs. However, these tasks are handled by the SRE team. + +### Neutral diff --git a/docs/architecture/adr-036-empty-blocks-abci.md b/docs/architecture/adr-036-empty-blocks-abci.md new file mode 100644 index 000000000..ec4806cfa --- /dev/null +++ b/docs/architecture/adr-036-empty-blocks-abci.md @@ -0,0 +1,38 @@ +# ADR 036: Empty Blocks via ABCI + +## Changelog + +- {date}: {changelog} + +## Context + +> This section contains all the context one needs to understand the current state, and why there is a problem. It should be as succinct as possible and introduce the high level idea behind the solution. + +## Decision + +> This section explains all of the details of the proposed solution, including implementation details. +> It should also describe affects / corollary items that may need to be changed as a part of this. +> If the proposed change will be large, please also indicate a way to do the change to maximize ease of review. +> (e.g. the optimal split of things to do between separate PR's) + +## Status + +> A decision may be "proposed" if it hasn't been agreed upon yet, or "accepted" once it is agreed upon. If a later ADR changes or reverses a decision, it may be marked as "deprecated" or "superseded" with a reference to its replacement. + +{Deprecated|Proposed|Accepted|Declined} + +## Consequences + +> This section describes the consequences, after applying the decision. All consequences should be summarized here, not just the "positive" ones. + +### Positive + +### Negative + +### Neutral + +## References + +> Are there any relevant PR comments, issues that led up to this, or articles referenced for why we made the given design choice? If so link them here! + +- {reference link} diff --git a/docs/architecture/adr-037-deliver-block.md b/docs/architecture/adr-037-deliver-block.md new file mode 100644 index 000000000..c5e119c06 --- /dev/null +++ b/docs/architecture/adr-037-deliver-block.md @@ -0,0 +1,100 @@ +# ADR 037: Deliver Block + +Author: Daniil Lashin (@danil-lashin) + +## Changelog + +13-03-2019: Initial draft + +## Context + +Initial conversation: https://github.com/tendermint/tendermint/issues/2901 + +Some applications can handle transactions in parallel, or at least some +part of tx processing can be parallelized. Now it is not possible for developer +to execute txs in parallel because Tendermint delivers them consequentially. + +## Decision + +Now Tendermint have `BeginBlock`, `EndBlock`, `Commit`, `DeliverTx` steps +while executing block. This doc proposes merging this steps into one `DeliverBlock` +step. It will allow developers of applications to decide how they want to +execute transactions (in parallel or consequentially). Also it will simplify and +speed up communications between application and Tendermint. + +As @jaekwon [mentioned](https://github.com/tendermint/tendermint/issues/2901#issuecomment-477746128) +in discussion not all application will benefit from this solution. In some cases, +when application handles transaction consequentially, it way slow down the blockchain, +because it need to wait until full block is transmitted to application to start +processing it. Also, in the case of complete change of ABCI, we need to force all the apps +to change their implementation completely. That's why I propose to introduce one more ABCI +type. + +# Implementation Changes + +In addition to default application interface which now have this structure + +```go +type Application interface { + // Info and Mempool methods... + + // Consensus Connection + InitChain(RequestInitChain) ResponseInitChain // Initialize blockchain with validators and other info from TendermintCore + BeginBlock(RequestBeginBlock) ResponseBeginBlock // Signals the beginning of a block + DeliverTx(tx []byte) ResponseDeliverTx // Deliver a tx for full processing + EndBlock(RequestEndBlock) ResponseEndBlock // Signals the end of a block, returns changes to the validator set + Commit() ResponseCommit // Commit the state and return the application Merkle root hash +} +``` + +this doc proposes to add one more: + +```go +type Application interface { + // Info and Mempool methods... + + // Consensus Connection + InitChain(RequestInitChain) ResponseInitChain // Initialize blockchain with validators and other info from TendermintCore + DeliverBlock(RequestDeliverBlock) ResponseDeliverBlock // Deliver full block + Commit() ResponseCommit // Commit the state and return the application Merkle root hash +} + +type RequestDeliverBlock struct { + Hash []byte + Header Header + Txs Txs + LastCommitInfo LastCommitInfo + ByzantineValidators []Evidence +} + +type ResponseDeliverBlock struct { + ValidatorUpdates []ValidatorUpdate + ConsensusParamUpdates *ConsensusParams + Tags []kv.Pair + TxResults []ResponseDeliverTx +} + +``` + +Also, we will need to add new config param, which will specify what kind of ABCI application uses. +For example, it can be `abci_type`. Then we will have 2 types: +- `advanced` - current ABCI +- `simple` - proposed implementation + +## Status + +In review + +## Consequences + +### Positive + +- much simpler introduction and tutorials for new developers (instead of implementing 5 methods whey +will need to implement only 3) +- txs can be handled in parallel +- simpler interface +- faster communications between Tendermint and application + +### Negative + +- Tendermint should now support 2 kinds of ABCI diff --git a/docs/architecture/adr-038-non-zero-start-height.md b/docs/architecture/adr-038-non-zero-start-height.md new file mode 100644 index 000000000..7dd474ec7 --- /dev/null +++ b/docs/architecture/adr-038-non-zero-start-height.md @@ -0,0 +1,38 @@ +# ADR 038: Non-zero start height + +## Changelog + +- {date}: {changelog} + +## Context + +> This section contains all the context one needs to understand the current state, and why there is a problem. It should be as succinct as possible and introduce the high level idea behind the solution. + +## Decision + +> This section explains all of the details of the proposed solution, including implementation details. +> It should also describe affects / corollary items that may need to be changed as a part of this. +> If the proposed change will be large, please also indicate a way to do the change to maximize ease of review. +> (e.g. the optimal split of things to do between separate PR's) + +## Status + +> A decision may be "proposed" if it hasn't been agreed upon yet, or "accepted" once it is agreed upon. If a later ADR changes or reverses a decision, it may be marked as "deprecated" or "superseded" with a reference to its replacement. + +{Deprecated|Proposed|Accepted|Declined} + +## Consequences + +> This section describes the consequences, after applying the decision. All consequences should be summarized here, not just the "positive" ones. + +### Positive + +### Negative + +### Neutral + +## References + +> Are there any relevant PR comments, issues that led up to this, or articles referenced for why we made the given design choice? If so link them here! + +- {reference link} diff --git a/docs/architecture/adr-039-peer-behaviour.md b/docs/architecture/adr-039-peer-behaviour.md new file mode 100644 index 000000000..2fb63f4b2 --- /dev/null +++ b/docs/architecture/adr-039-peer-behaviour.md @@ -0,0 +1,159 @@ +# ADR 039: Peer Behaviour Interface + +## Changelog +* 07-03-2019: Initial draft +* 14-03-2019: Updates from feedback + +## Context + +The responsibility for signaling and acting upon peer behavior lacks a single +owning component and is heavily coupled with the network stack[1](#references). Reactors +maintain a reference to the `p2p.Switch` which they use to call +`switch.StopPeerForError(...)` when a peer misbehaves and +`switch.MarkAsGood(...)` when a peer contributes in some meaningful way. +While the switch handles `StopPeerForError` internally, the `MarkAsGood` +method delegates to another component, `p2p.AddrBook`. This scheme of delegation +across Switch obscures the responsibility for handling peer behavior +and ties up the reactors in a larger dependency graph when testing. + +## Decision + +Introduce a `PeerBehaviour` interface and concrete implementations which +provide methods for reactors to signal peer behavior without direct +coupling `p2p.Switch`. Introduce a ErrorBehaviourPeer to provide +concrete reasons for stopping peers. Introduce GoodBehaviourPeer to provide +concrete ways in which a peer contributes. + +### Implementation Changes + +PeerBehaviour then becomes an interface for signaling peer errors as well +as for marking peers as `good`. + +```go +type PeerBehaviour interface { + Behaved(peer Peer, reason GoodBehaviourPeer) + Errored(peer Peer, reason ErrorBehaviourPeer) +} +``` + +Instead of signaling peers to stop with arbitrary reasons: +`reason interface{}` + +We introduce a concrete error type ErrorBehaviourPeer: +```go +type ErrorBehaviourPeer int + +const ( + ErrorBehaviourUnknown = iota + ErrorBehaviourBadMessage + ErrorBehaviourMessageOutofOrder + ... +) +``` + +To provide additional information on the ways a peer contributed, we introduce +the GoodBehaviourPeer type. + +```go +type GoodBehaviourPeer int + +const ( + GoodBehaviourVote = iota + GoodBehaviourBlockPart + ... +) +``` + +As a first iteration we provide a concrete implementation which wraps +the switch: +```go +type SwitchedPeerBehaviour struct { + sw *Switch +} + +func (spb *SwitchedPeerBehaviour) Errored(peer Peer, reason ErrorBehaviourPeer) { + spb.sw.StopPeerForError(peer, reason) +} + +func (spb *SwitchedPeerBehaviour) Behaved(peer Peer, reason GoodBehaviourPeer) { + spb.sw.MarkPeerAsGood(peer) +} + +func NewSwitchedPeerBehaviour(sw *Switch) *SwitchedPeerBehaviour { + return &SwitchedPeerBehaviour{ + sw: sw, + } +} +``` + +Reactors, which are often difficult to unit test[2](#references) could use an implementation which exposes the signals produced by the reactor in +manufactured scenarios: + +```go +type ErrorBehaviours map[Peer][]ErrorBehaviourPeer +type GoodBehaviours map[Peer][]GoodBehaviourPeer + +type StorePeerBehaviour struct { + eb ErrorBehaviours + gb GoodBehaviours +} + +func NewStorePeerBehaviour() *StorePeerBehaviour{ + return &StorePeerBehaviour{ + eb: make(ErrorBehaviours), + gb: make(GoodBehaviours), + } +} + +func (spb StorePeerBehaviour) Errored(peer Peer, reason ErrorBehaviourPeer) { + if _, ok := spb.eb[peer]; !ok { + spb.eb[peer] = []ErrorBehaviours{reason} + } else { + spb.eb[peer] = append(spb.eb[peer], reason) + } +} + +func (mpb *StorePeerBehaviour) GetErrored() ErrorBehaviours { + return mpb.eb +} + + +func (spb StorePeerBehaviour) Behaved(peer Peer, reason GoodBehaviourPeer) { + if _, ok := spb.gb[peer]; !ok { + spb.gb[peer] = []GoodBehaviourPeer{reason} + } else { + spb.gb[peer] = append(spb.gb[peer], reason) + } +} + +func (spb *StorePeerBehaviour) GetBehaved() GoodBehaviours { + return spb.gb +} +``` + +## Status + +Accepted + +## Consequences + +### Positive + + * De-couple signaling from acting upon peer behavior. + * Reduce the coupling of reactors and the Switch and the network + stack + * The responsibility of managing peer behavior can be migrated to + a single component instead of split between the switch and the + address book. + +### Negative + + * The first iteration will simply wrap the Switch and introduce a + level of indirection. + +### Neutral + +## References + +1. Issue [#2067](https://github.com/tendermint/tendermint/issues/2067): P2P Refactor +2. PR: [#3506](https://github.com/tendermint/tendermint/pull/3506): ADR 036: Blockchain Reactor Refactor diff --git a/docs/architecture/adr-040-blockchain-reactor-refactor.md b/docs/architecture/adr-040-blockchain-reactor-refactor.md new file mode 100644 index 000000000..520d55b5d --- /dev/null +++ b/docs/architecture/adr-040-blockchain-reactor-refactor.md @@ -0,0 +1,534 @@ +# ADR 040: Blockchain Reactor Refactor + +## Changelog + +19-03-2019: Initial draft + +## Context + +The Blockchain Reactor's high level responsibility is to enable peers who are far behind the current state of the +blockchain to quickly catch up by downloading many blocks in parallel from its peers, verifying block correctness, and +executing them against the ABCI application. We call the protocol executed by the Blockchain Reactor `fast-sync`. +The current architecture diagram of the blockchain reactor can be found here: + +![Blockchain Reactor Architecture Diagram](img/bc-reactor.png) + +The current architecture consists of dozens of routines and it is tightly depending on the `Switch`, making writing +unit tests almost impossible. Current tests require setting up complex dependency graphs and dealing with concurrency. +Note that having dozens of routines is in this case overkill as most of the time routines sits idle waiting for +something to happen (message to arrive or timeout to expire). Due to dependency on the `Switch`, testing relatively +complex network scenarios and failures (for example adding and removing peers) is very complex tasks and frequently lead +to complex tests with not deterministic behavior ([#3400]). Impossibility to write proper tests makes confidence in +the code low and this resulted in several issues (some are fixed in the meantime and some are still open): +[#3400], [#2897], [#2896], [#2699], [#2888], [#2457], [#2622], [#2026]. + +## Decision + +To remedy these issues we plan a major refactor of the blockchain reactor. The proposed architecture is largely inspired +by ADR-30 and is presented on the following diagram: +![Blockchain Reactor Refactor Diagram](img/bc-reactor-refactor.png) + +We suggest a concurrency architecture where the core algorithm (we call it `Controller`) is extracted into a finite +state machine. The active routine of the reactor is called `Executor` and is responsible for receiving and sending +messages from/to peers and triggering timeouts. What messages should be sent and timeouts triggered is determined mostly +by the `Controller`. The exception is `Peer Heartbeat` mechanism which is `Executor` responsibility. The heartbeat +mechanism is used to remove slow and unresponsive peers from the peer list. Writing of unit tests is simpler with +this architecture as most of the critical logic is part of the `Controller` function. We expect that simpler concurrency +architecture will not have significant negative effect on the performance of this reactor (to be confirmed by +experimental evaluation). + + +### Implementation changes + +We assume the following system model for "fast sync" protocol: + +* a node is connected to a random subset of all nodes that represents its peer set. Some nodes are correct and some + might be faulty. We don't make assumptions about ratio of faulty nodes, i.e., it is possible that all nodes in some + peer set are faulty. +* we assume that communication between correct nodes is synchronous, i.e., if a correct node `p` sends a message `m` to + a correct node `q` at time `t`, then `q` will receive message the latest at time `t+Delta` where `Delta` is a system + parameter that is known by network participants. `Delta` is normally chosen to be an order of magnitude higher than + the real communication delay (maximum) between correct nodes. Therefore if a correct node `p` sends a request message + to a correct node `q` at time `t` and there is no the corresponding reply at time `t + 2*Delta`, then `p` can assume + that `q` is faulty. Note that the network assumptions for the consensus reactor are different (we assume partially + synchronous model there). + +The requirements for the "fast sync" protocol are formally specified as follows: + +- `Correctness`: If a correct node `p` is connected to a correct node `q` for a long enough period of time, then `p` +- will eventually download all requested blocks from `q`. +- `Termination`: If a set of peers of a correct node `p` is stable (no new nodes are added to the peer set of `p`) for +- a long enough period of time, then protocol eventually terminates. +- `Fairness`: A correct node `p` sends requests for blocks to all peers from its peer set. + +As explained above, the `Executor` is responsible for sending and receiving messages that are part of the `fast-sync` +protocol. The following messages are exchanged as part of `fast-sync` protocol: + +``` go +type Message int +const ( + MessageUnknown Message = iota + MessageStatusRequest + MessageStatusResponse + MessageBlockRequest + MessageBlockResponse +) +``` +`MessageStatusRequest` is sent periodically to all peers as a request for a peer to provide its current height. It is +part of the `Peer Heartbeat` mechanism and a failure to respond timely to this message results in a peer being removed +from the peer set. Note that the `Peer Heartbeat` mechanism is used only while a peer is in `fast-sync` mode. We assume +here existence of a mechanism that gives node a possibility to inform its peers that it is in the `fast-sync` mode. + +``` go +type MessageStatusRequest struct { + SeqNum int64 // sequence number of the request +} +``` +`MessageStatusResponse` is sent as a response to `MessageStatusRequest` to inform requester about the peer current +height. + +``` go +type MessageStatusResponse struct { + SeqNum int64 // sequence number of the corresponding request + Height int64 // current peer height +} +``` + +`MessageBlockRequest` is used to make a request for a block and the corresponding commit certificate at a given height. + +``` go +type MessageBlockRequest struct { + Height int64 +} +``` + +`MessageBlockResponse` is a response for the corresponding block request. In addition to providing the block and the +corresponding commit certificate, it contains also a current peer height. + +``` go +type MessageBlockResponse struct { + Height int64 + Block Block + Commit Commit + PeerHeight int64 +} +``` + +In addition to sending and receiving messages, and `HeartBeat` mechanism, controller is also managing timeouts +that are triggered upon `Controller` request. `Controller` is then informed once a timeout expires. + +``` go +type TimeoutTrigger int +const ( + TimeoutUnknown TimeoutTrigger = iota + TimeoutResponseTrigger + TimeoutTerminationTrigger +) +``` + +The `Controller` can be modelled as a function with clearly defined inputs: + +* `State` - current state of the node. Contains data about connected peers and its behavior, pending requests, +* received blocks, etc. +* `Event` - significant events in the network. + +producing clear outputs: + +* `State` - updated state of the node, +* `MessageToSend` - signal what message to send and to which peer +* `TimeoutTrigger` - signal that timeout should be triggered. + + +We consider the following `Event` types: + +``` go +type Event int +const ( + EventUnknown Event = iota + EventStatusReport + EventBlockRequest + EventBlockResponse + EventRemovePeer + EventTimeoutResponse + EventTimeoutTermination +) +``` + +`EventStatusResponse` event is generated once `MessageStatusResponse` is received by the `Executor`. + +``` go +type EventStatusReport struct { + PeerID ID + Height int64 +} +``` + +`EventBlockRequest` event is generated once `MessageBlockRequest` is received by the `Executor`. + +``` go +type EventBlockRequest struct { + Height int64 + PeerID p2p.ID +} +``` +`EventBlockResponse` event is generated upon reception of `MessageBlockResponse` message by the `Executor`. + +``` go +type EventBlockResponse struct { + Height int64 + Block Block + Commit Commit + PeerID ID + PeerHeight int64 +} +``` +`EventRemovePeer` is generated by `Executor` to signal that the connection to a peer is closed due to peer misbehavior. + +``` go +type EventRemovePeer struct { + PeerID ID +} +``` +`EventTimeoutResponse` is generated by `Executor` to signal that a timeout triggered by `TimeoutResponseTrigger` has +expired. + +``` go +type EventTimeoutResponse struct { + PeerID ID + Height int64 +} +``` +`EventTimeoutTermination` is generated by `Executor` to signal that a timeout triggered by `TimeoutTerminationTrigger` +has expired. + +``` go +type EventTimeoutTermination struct { + Height int64 +} +``` + +`MessageToSend` is just a wrapper around `Message` type that contains id of the peer to which message should be sent. + +``` go +type MessageToSend struct { + PeerID ID + Message Message +} +``` + +The Controller state machine can be in two modes: `ModeFastSync` when +a node is trying to catch up with the network by downloading committed blocks, +and `ModeConsensus` in which it executes Tendermint consensus protocol. We +consider that `fast sync` mode terminates once the Controller switch to +`ModeConsensus`. + +``` go +type Mode int +const ( + ModeUnknown Mode = iota + ModeFastSync + ModeConsensus +) +``` +`Controller` is managing the following state: + +``` go +type ControllerState struct { + Height int64 // the first block that is not committed + Mode Mode // mode of operation + PeerMap map[ID]PeerStats // map of peer IDs to peer statistics + MaxRequestPending int64 // maximum height of the pending requests + FailedRequests []int64 // list of failed block requests + PendingRequestsNum int // total number of pending requests + Store []BlockInfo // contains list of downloaded blocks + Executor BlockExecutor // store, verify and executes blocks +} +``` + +`PeerStats` data structure keeps for every peer its current height and a list of pending requests for blocks. + +``` go +type PeerStats struct { + Height int64 + PendingRequest int64 // a request sent to this peer +} +``` + +`BlockInfo` data structure is used to store information (as part of block store) about downloaded blocks: from what peer + a block and the corresponding commit certificate are received. +``` go +type BlockInfo struct { + Block Block + Commit Commit + PeerID ID // a peer from which we received the corresponding Block and Commit +} +``` + +The `Controller` is initialized by providing an initial height (`startHeight`) from which it will start downloading +blocks from peers and the current state of the `BlockExecutor`. + +``` go +func NewControllerState(startHeight int64, executor BlockExecutor) ControllerState { + state = ControllerState {} + state.Height = startHeight + state.Mode = ModeFastSync + state.MaxRequestPending = startHeight - 1 + state.PendingRequestsNum = 0 + state.Executor = executor + initialize state.PeerMap, state.FailedRequests and state.Store to empty data structures + return state +} +``` + +The core protocol logic is given with the following function: + +``` go +func handleEvent(state ControllerState, event Event) (ControllerState, Message, TimeoutTrigger, Error) { + msg = nil + timeout = nil + error = nil + + switch state.Mode { + case ModeConsensus: + switch event := event.(type) { + case EventBlockRequest: + msg = createBlockResponseMessage(state, event) + return state, msg, timeout, error + default: + error = "Only respond to BlockRequests while in ModeConsensus!" + return state, msg, timeout, error + } + + case ModeFastSync: + switch event := event.(type) { + case EventBlockRequest: + msg = createBlockResponseMessage(state, event) + return state, msg, timeout, error + + case EventStatusResponse: + return handleEventStatusResponse(event, state) + + case EventRemovePeer: + return handleEventRemovePeer(event, state) + + case EventBlockResponse: + return handleEventBlockResponse(event, state) + + case EventResponseTimeout: + return handleEventResponseTimeout(event, state) + + case EventTerminationTimeout: + // Termination timeout is triggered in case of empty peer set and in case there are no pending requests. + // If this timeout expires and in the meantime no new peers are added or new pending requests are made + // then `fast-sync` mode terminates by switching to `ModeConsensus`. + // Note that termination timeout should be higher than the response timeout. + if state.Height == event.Height && state.PendingRequestsNum == 0 { state.State = ConsensusMode } + return state, msg, timeout, error + + default: + error = "Received unknown event type!" + return state, msg, timeout, error + } + } +} +``` + +``` go +func createBlockResponseMessage(state ControllerState, event BlockRequest) MessageToSend { + msgToSend = nil + if _, ok := state.PeerMap[event.PeerID]; !ok { peerStats = PeerStats{-1, -1} } + if state.Executor.ContainsBlockWithHeight(event.Height) && event.Height > peerStats.Height { + peerStats = event.Height + msg = BlockResponseMessage{ + Height: event.Height, + Block: state.Executor.getBlock(eventHeight), + Commit: state.Executor.getCommit(eventHeight), + PeerID: event.PeerID, + CurrentHeight: state.Height - 1, + } + msgToSend = MessageToSend { event.PeerID, msg } + } + state.PeerMap[event.PeerID] = peerStats + return msgToSend +} +``` + +``` go +func handleEventStatusResponse(event EventStatusResponse, state ControllerState) (ControllerState, MessageToSend, TimeoutTrigger, Error) { + if _, ok := state.PeerMap[event.PeerID]; !ok { + peerStats = PeerStats{ -1, -1 } + } else { + peerStats = state.PeerMap[event.PeerID] + } + + if event.Height > peerStats.Height { peerStats.Height = event.Height } + // if there are no pending requests for this peer, try to send him a request for block + if peerStats.PendingRequest == -1 { + msg = createBlockRequestMessages(state, event.PeerID, peerStats.Height) + // msg is nil if no request for block can be made to a peer at this point in time + if msg != nil { + peerStats.PendingRequests = msg.Height + state.PendingRequestsNum++ + // when a request for a block is sent to a peer, a response timeout is triggered. If no corresponding block is sent by the peer + // during response timeout period, then the peer is considered faulty and is removed from the peer set. + timeout = ResponseTimeoutTrigger{ msg.PeerID, msg.Height, PeerTimeout } + } else if state.PendingRequestsNum == 0 { + // if there are no pending requests and no new request can be placed to the peer, termination timeout is triggered. + // If termination timeout expires and we are still at the same height and there are no pending requests, the "fast-sync" + // mode is finished and we switch to `ModeConsensus`. + timeout = TerminationTimeoutTrigger{ state.Height, TerminationTimeout } + } + } + state.PeerMap[event.PeerID] = peerStats + return state, msg, timeout, error +} +``` + +``` go +func handleEventRemovePeer(event EventRemovePeer, state ControllerState) (ControllerState, MessageToSend, TimeoutTrigger, Error) { + if _, ok := state.PeerMap[event.PeerID]; ok { + pendingRequest = state.PeerMap[event.PeerID].PendingRequest + // if a peer is removed from the peer set, its pending request is declared failed and added to the `FailedRequests` list + // so it can be retried. + if pendingRequest != -1 { + add(state.FailedRequests, pendingRequest) + } + state.PendingRequestsNum-- + delete(state.PeerMap, event.PeerID) + // if the peer set is empty after removal of this peer then termination timeout is triggered. + if state.PeerMap.isEmpty() { + timeout = TerminationTimeoutTrigger{ state.Height, TerminationTimeout } + } + } else { error = "Removing unknown peer!" } + return state, msg, timeout, error +``` + +``` go +func handleEventBlockResponse(event EventBlockResponse, state ControllerState) (ControllerState, MessageToSend, TimeoutTrigger, Error) + if state.PeerMap[event.PeerID] { + peerStats = state.PeerMap[event.PeerID] + // when expected block arrives from a peer, it is added to the store so it can be verified and if correct executed after. + if peerStats.PendingRequest == event.Height { + peerStats.PendingRequest = -1 + state.PendingRequestsNum-- + if event.PeerHeight > peerStats.Height { peerStats.Height = event.PeerHeight } + state.Store[event.Height] = BlockInfo{ event.Block, event.Commit, event.PeerID } + // blocks are verified sequentially so adding a block to the store does not mean that it will be immediately verified + // as some of the previous blocks might be missing. + state = verifyBlocks(state) // it can lead to event.PeerID being removed from peer list + if _, ok := state.PeerMap[event.PeerID]; ok { + // we try to identify new request for a block that can be asked to the peer + msg = createBlockRequestMessage(state, event.PeerID, peerStats.Height) + if msg != nil { + peerStats.PendingRequests = msg.Height + state.PendingRequestsNum++ + // if request for block is made, response timeout is triggered + timeout = ResponseTimeoutTrigger{ msg.PeerID, msg.Height, PeerTimeout } + } else if state.PeerMap.isEmpty() || state.PendingRequestsNum == 0 { + // if the peer map is empty (the peer can be removed as block verification failed) or there are no pending requests + // termination timeout is triggered. + timeout = TerminationTimeoutTrigger{ state.Height, TerminationTimeout } + } + } + } else { error = "Received Block from wrong peer!" } + } else { error = "Received Block from unknown peer!" } + + state.PeerMap[event.PeerID] = peerStats + return state, msg, timeout, error +} +``` + +``` go +func handleEventResponseTimeout(event, state) { + if _, ok := state.PeerMap[event.PeerID]; ok { + peerStats = state.PeerMap[event.PeerID] + // if a response timeout expires and the peer hasn't delivered the block, the peer is removed from the peer list and + // the request is added to the `FailedRequests` so the block can be downloaded from other peer + if peerStats.PendingRequest == event.Height { + add(state.FailedRequests, pendingRequest) + delete(state.PeerMap, event.PeerID) + state.PendingRequestsNum-- + // if peer set is empty, then termination timeout is triggered + if state.PeerMap.isEmpty() { + timeout = TimeoutTrigger{ state.Height, TerminationTimeout } + } + } + } + return state, msg, timeout, error +} +``` + +``` go +func createBlockRequestMessage(state ControllerState, peerID ID, peerHeight int64) MessageToSend { + msg = nil + blockHeight = -1 + r = find request in state.FailedRequests such that r <= peerHeight // returns `nil` if there are no such request + // if there is a height in failed requests that can be downloaded from the peer send request to it + if r != nil { + blockNumber = r + delete(state.FailedRequests, r) + } else if state.MaxRequestPending < peerHeight { + // if height of the maximum pending request is smaller than peer height, then ask peer for next block + state.MaxRequestPending++ + blockHeight = state.MaxRequestPending // increment state.MaxRequestPending and then return the new value + } + + if blockHeight > -1 { msg = MessageToSend { peerID, MessageBlockRequest { blockHeight } } + return msg +} +``` + +``` go +func verifyBlocks(state State) State { + done = false + for !done { + block = state.Store[height] + if block != nil { + verified = verify block.Block using block.Commit // return `true` is verification succeed, 'false` otherwise + + if verified { + block.Execute() // executing block is costly operation so it might make sense executing asynchronously + state.Height++ + } else { + // if block verification failed, then it is added to `FailedRequests` and the peer is removed from the peer set + add(state.FailedRequests, height) + state.Store[height] = nil + if _, ok := state.PeerMap[block.PeerID]; ok { + pendingRequest = state.PeerMap[block.PeerID].PendingRequest + // if there is a pending request sent to the peer that is just to be removed from the peer set, add it to `FailedRequests` + if pendingRequest != -1 { + add(state.FailedRequests, pendingRequest) + state.PendingRequestsNum-- + } + delete(state.PeerMap, event.PeerID) + } + done = true + } + } else { done = true } + } + return state +} +``` + +In the proposed architecture `Controller` is not active task, i.e., it is being called by the `Executor`. Depending on +the return values returned by `Controller`,`Executor` will send a message to some peer (`msg` != nil), trigger a +timeout (`timeout` != nil) or deal with errors (`error` != nil). +In case a timeout is triggered, it will provide as an input to `Controller` the corresponding timeout event once +timeout expires. + + +## Status + +Draft. + +## Consequences + +### Positive + +- isolated implementation of the algorithm +- improved testability - simpler to prove correctness +- clearer separation of concerns - easier to reason + +### Negative + +### Neutral diff --git a/docs/architecture/adr-041-proposer-selection-via-abci.md b/docs/architecture/adr-041-proposer-selection-via-abci.md new file mode 100644 index 000000000..58bf20de3 --- /dev/null +++ b/docs/architecture/adr-041-proposer-selection-via-abci.md @@ -0,0 +1,29 @@ +# ADR 041: Application should be in charge of validator set + +## Changelog + + +## Context + +Currently Tendermint is in charge of validator set and proposer selection. Application can only update the validator set changes at EndBlock time. +To support Light Client, application should make sure at least 2/3 of validator are same at each round. + +Application should have full control on validator set changes and proposer selection. In each round Application can provide the list of validators for next rounds in order with their power. The proposer is the first in the list, in case the proposer is offline, the next one can propose the proposal and so on. + +## Decision + +## Status + +## Consequences + +Tendermint is no more in charge of validator set and its changes. The Application should provide the correct information. +However Tendermint can provide psedo-randomness algorithm to help application for selecting proposer in each round. + +### Positive + +### Negative + +### Neutral + +## References + diff --git a/docs/architecture/adr-042-state-sync.md b/docs/architecture/adr-042-state-sync.md new file mode 100644 index 000000000..a15893183 --- /dev/null +++ b/docs/architecture/adr-042-state-sync.md @@ -0,0 +1,235 @@ +# ADR 042: State Sync Design + +## Changelog + +2019-06-27: Init by EB +2019-07-04: Follow up by brapse + +## Context +StateSync is a feature which would allow a new node to receive a +snapshot of the application state without downloading blocks or going +through consensus. Once downloaded, the node could switch to FastSync +and eventually participate in consensus. The goal of StateSync is to +facilitate setting up a new node as quickly as possible. + +## Considerations +Because Tendermint doesn't know anything about the application state, +StateSync will broker messages between nodes and through +the ABCI to an opaque applicaton. The implementation will have multiple +touch points on both the tendermint code base and ABCI application. + +* A StateSync reactor to facilitate peer communication - Tendermint +* A Set of ABCI messages to transmit application state to the reactor - Tendermint +* A Set of MultiStore APIs for exposing snapshot data to the ABCI - ABCI application +* A Storage format with validation and performance considerations - ABCI application + +### Implementation Properties +Beyond the approach, any implementation of StateSync can be evaluated +across different criteria: + +* Speed: Expected throughput of producing and consuming snapshots +* Safety: Cost of pushing invalid snapshots to a node +* Liveness: Cost of preventing a node from receiving/constructing a snapshot +* Effort: How much effort does an implementation require + +### Implementation Question +* What is the format of a snapshot + * Complete snapshot + * Ordered IAVL key ranges + * Compressed individually chunks which can be validated +* How is data validated + * Trust a peer with it's data blindly + * Trust a majority of peers + * Use light client validation to validate each chunk against consensus + produced merkle tree root +* What are the performance characteristics + * Random vs sequential reads + * How parallelizeable is the scheduling algorithm + +### Proposals +Broadly speaking there are two approaches to this problem which have had +varying degrees of discussion and progress. These approach can be +summarized as: + +**Lazy:** Where snapshots are produced dynamically at request time. This +solution would use the existing data structure. +**Eager:** Where snapshots are produced periodically and served from disk at +request time. This solution would create an auxiliary data structure +optimized for batch read/writes. + +Additionally the propsosals tend to vary on how they provide safety +properties. + +**LightClient** Where a client can aquire the merkle root from the block +headers synchronized from a trusted validator set. Subsets of the application state, +called chunks can therefore be validated on receipt to ensure each chunk +is part of the merkle root. + +**Majority of Peers** Where manifests of chunks along with checksums are +downloaded and compared against versions provided by a majority of +peers. + +#### Lazy StateSync +An initial specification was published by Alexis Sellier. +In this design, the state has a given `size` of primitive elements (like +keys or nodes), each element is assigned a number from 0 to `size-1`, +and chunks consists of a range of such elements. Ackratos raised +[some concerns](https://docs.google.com/document/d/1npGTAa1qxe8EQZ1wG0a0Sip9t5oX2vYZNUDwr_LVRR4/edit) +about this design, somewhat specific to the IAVL tree, and mainly concerning +performance of random reads and of iterating through the tree to determine element numbers +(ie. elements aren't indexed by the element number). + +An alternative design was suggested by Jae Kwon in +[#3639](https://github.com/tendermint/tendermint/issues/3639) where chunking +happens lazily and in a dynamic way: nodes request key ranges from their peers, +and peers respond with some subset of the +requested range and with notes on how to request the rest in parallel from other +peers. Unlike chunk numbers, keys can be verified directly. And if some keys in the +range are ommitted, proofs for the range will fail to verify. +This way a node can start by requesting the entire tree from one peer, +and that peer can respond with say the first few keys, and the ranges to request +from other peers. + +Additionally, per chunk validation tends to come more naturally to the +Lazy approach since it tends to use the existing structure of the tree +(ie. keys or nodes) rather than state-sync specific chunks. Such a +design for tendermint was originally tracked in +[#828](https://github.com/tendermint/tendermint/issues/828). + +#### Eager StateSync +Warp Sync as implemented in OpenEthereum to rapidly +download both blocks and state snapshots from peers. Data is carved into ~4MB +chunks and snappy compressed. Hashes of snappy compressed chunks are stored in a +manifest file which co-ordinates the state-sync. Obtaining a correct manifest +file seems to require an honest majority of peers. This means you may not find +out the state is incorrect until you download the whole thing and compare it +with a verified block header. + +A similar solution was implemented by Binance in +[#3594](https://github.com/tendermint/tendermint/pull/3594) +based on their initial implementation in +[PR #3243](https://github.com/tendermint/tendermint/pull/3243) +and [some learnings](https://docs.google.com/document/d/1npGTAa1qxe8EQZ1wG0a0Sip9t5oX2vYZNUDwr_LVRR4/edit). +Note this still requires the honest majority peer assumption. + +As an eager protocol, warp-sync can efficiently compress larger, more +predicatable chunks once per snapshot and service many new peers. By +comparison lazy chunkers would have to compress each chunk at request +time. + +### Analysis of Lazy vs Eager +Lazy vs Eager have more in common than they differ. They all require +reactors on the tendermint side, a set of ABCI messages and a method for +serializing/deserializing snapshots facilitated by a SnapshotFormat. + +The biggest difference between Lazy and Eager proposals is in the +read/write patterns necessitated by serving a snapshot chunk. +Specifically, Lazy State Sync performs random reads to the underlying data +structure while Eager can optimize for sequential reads. + +This distinctin between approaches was demonstrated by Binance's +[ackratos](https://github.com/ackratos) in their implementation of [Lazy +State sync](https://github.com/tendermint/tendermint/pull/3243), The +[analysis](https://docs.google.com/document/d/1npGTAa1qxe8EQZ1wG0a0Sip9t5oX2vYZNUDwr_LVRR4/) +of the performance, and follow up implementation of [Warp +Sync](http://github.com/tendermint/tendermint/pull/3594). + +#### Compairing Security Models +There are several different security models which have been +discussed/proposed in the past but generally fall into two categories. + +Light client validation: In which the node receiving data is expected to +first perform a light client sync and have all the nessesary block +headers. Within the trusted block header (trusted in terms of from a +validator set subject to [weak +subjectivity](https://github.com/tendermint/tendermint/pull/3795)) and +can compare any subset of keys called a chunk against the merkle root. +The advantage of light client validation is that the block headers are +signed by validators which have something to lose for malicious +behavior. If a validator were to provide an invalid proof, they can be +slashed. + +Majority of peer validation: A manifest file containing a list of chunks +along with checksums of each chunk is downloaded from a +trusted source. That source can be a community resource similar to +[sum.golang.org](https://sum.golang.org) or downloaded from the majority +of peers. One disadantage of the majority of peer security model is the +vuliberability to eclipse attacks in which a malicious users looks to +saturate a target node's peer list and produce a manufactured picture of +majority. + +A third option would be to include snapshot related data in the +block header. This could include the manifest with related checksums and be +secured through consensus. One challenge of this approach is to +ensure that creating snapshots does not put undo burden on block +propsers by synchronizing snapshot creation and block creation. One +approach to minimizing the burden is for snapshots for height +`H` to be included in block `H+n` where `n` is some `n` block away, +giving the block propser enough time to complete the snapshot +asynchronousy. + +## Proposal: Eager StateSync With Per Chunk Light Client Validation +The conclusion after some concideration of the advantages/disadvances of +eager/lazy and different security models is to produce a state sync +which eagerly produces snapshots and uses light client validation. This +approach has the performance advantages of pre-computing efficient +snapshots which can streamed to new nodes on demand using sequential IO. +Secondly, by using light client validation we cna validate each chunk on +receipt and avoid the potential eclipse attack of majority of peer based +security. + +### Implementation +Tendermint is responsible for downloading and verifying chunks of +AppState from peers. ABCI Application is responsible for taking +AppStateChunk objects from TM and constructing a valid state tree whose +root corresponds with the AppHash of syncing block. In particular we +will need implement: + +* Build new StateSync reactor brokers message transmission between the peers + and the ABCI application +* A set of ABCI Messages +* Design SnapshotFormat as an interface which can: + * validate chunks + * read/write chunks from file + * read/write chunks to/from application state store + * convert manifests into chunkRequest ABCI messages +* Implement SnapshotFormat for cosmos-hub with concrete implementation for: + * read/write chunks in a way which can be: + * parallelized across peers + * validated on receipt + * read/write to/from IAVL+ tree + +![StateSync Architecture Diagram](img/state-sync.png) + +## Implementation Path +* Create StateSync reactor based on [#3753](https://github.com/tendermint/tendermint/pull/3753) +* Design SnapshotFormat with an eye towards cosmos-hub implementation +* ABCI message to send/receive SnapshotFormat +* IAVL+ changes to support SnapshotFormat +* Deliver Warp sync (no chunk validation) +* light client implementation for weak subjectivity +* Deliver StateSync with chunk validation + +## Status + +Proposed + +## Concequences + +### Neutral + +### Positive +* Safe & performant state sync design substantiated with real world implementation experience +* General interfaces allowing application specific innovation +* Parallizable implementation trajectory with reasonable engineering effort + +### Negative +* Static Scheduling lacks opportunity for real time chunk availability optimizations + +## References +[sync: Sync current state without full replay for Applications](https://github.com/tendermint/tendermint/issues/828) - original issue +[tendermint state sync proposal 2](https://docs.google.com/document/d/1npGTAa1qxe8EQZ1wG0a0Sip9t5oX2vYZNUDwr_LVRR4/edit) - ackratos proposal +[proposal 2 implementation](https://github.com/tendermint/tendermint/pull/3243) - ackratos implementation +[WIP General/Lazy State-Sync pseudo-spec](https://github.com/tendermint/tendermint/issues/3639) - Jae Proposal +[Warp Sync Implementation](https://github.com/tendermint/tendermint/pull/3594) - ackratos +[Chunk Proposal](https://github.com/tendermint/tendermint/pull/3799) - Bucky proposed diff --git a/docs/architecture/adr-043-blockchain-riri-org.md b/docs/architecture/adr-043-blockchain-riri-org.md new file mode 100644 index 000000000..19c2334c3 --- /dev/null +++ b/docs/architecture/adr-043-blockchain-riri-org.md @@ -0,0 +1,404 @@ +# ADR 043: Blockhchain Reactor Riri-Org + +## Changelog + +- 18-06-2019: Initial draft +- 08-07-2019: Reviewed +- 29-11-2019: Implemented +- 14-02-2020: Updated with the implementation details + +## Context + +The blockchain reactor is responsible for two high level processes:sending/receiving blocks from peers and FastSync-ing blocks to catch upnode who is far behind. The goal of [ADR-40](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-040-blockchain-reactor-refactor.md) was to refactor these two processes by separating business logic currently wrapped up in go-channels into pure `handle*` functions. While the ADR specified what the final form of the reactor might look like it lacked guidance on intermediary steps to get there. +The following diagram illustrates the state of the [blockchain-reorg](https://github.com/tendermint/tendermint/pull/3561) reactor which will be referred to as `v1`. + +![v1 Blockchain Reactor Architecture +Diagram](https://github.com/tendermint/tendermint/blob/f9e556481654a24aeb689bdadaf5eab3ccd66829/docs/architecture/img/blockchain-reactor-v1.png) + +While `v1` of the blockchain reactor has shown significant improvements in terms of simplifying the concurrency model, the current PR has run into few roadblocks. + +- The current PR large and difficult to review. +- Block gossiping and fast sync processes are highly coupled to the shared `Pool` data structure. +- Peer communication is spread over multiple components creating complex dependency graph which must be mocked out during testing. +- Timeouts modeled as stateful tickers introduce non-determinism in tests + +This ADR is meant to specify the missing components and control necessary to achieve [ADR-40](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-040-blockchain-reactor-refactor.md). + +## Decision + +Partition the responsibilities of the blockchain reactor into a set of components which communicate exclusively with events. Events will contain timestamps allowing each component to track time as internal state. The internal state will be mutated by a set of `handle*` which will produce event(s). The integration between components will happen in the reactor and reactor tests will then become integration tests between components. This design will be known as `v2`. + +![v2 Blockchain Reactor Architecture +Diagram](https://github.com/tendermint/tendermint/blob/584e67ac3fac220c5c3e0652e3582eca8231e814/docs/architecture/img/blockchain-reactor-v2.png) + +### Fast Sync Related Communication Channels + +The diagram below shows the fast sync routines and the types of channels and queues used to communicate with each other. +In addition the per reactor channels used by the sendRoutine to send messages over the Peer MConnection are shown. + +![v2 Blockchain Channels and Queues +Diagram](https://github.com/tendermint/tendermint/blob/5cf570690f989646fb3b615b734da503f038891f/docs/architecture/img/blockchain-v2-channels.png) + +### Reactor changes in detail + +The reactor will include a demultiplexing routine which will send each message to each sub routine for independent processing. Each sub routine will then select the messages it's interested in and call the handle specific function specified in [ADR-40](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-040-blockchain-reactor-refactor.md). The demuxRoutine acts as "pacemaker" setting the time in which events are expected to be handled. + +```go +func demuxRoutine(msgs, scheduleMsgs, processorMsgs, ioMsgs) { + timer := time.NewTicker(interval) + for { + select { + case <-timer.C: + now := evTimeCheck{time.Now()} + schedulerMsgs <- now + processorMsgs <- now + ioMsgs <- now + case msg:= <- msgs: + msg.time = time.Now() + // These channels should produce backpressure before + // being full to avoid starving each other + schedulerMsgs <- msg + processorMsgs <- msg + ioMesgs <- msg + if msg == stop { + break; + } + } + } +} + +func processRoutine(input chan Message, output chan Message) { + processor := NewProcessor(..) + for { + msg := <- input + switch msg := msg.(type) { + case bcBlockRequestMessage: + output <- processor.handleBlockRequest(msg)) + ... + case stop: + processor.stop() + break; + } +} + +func scheduleRoutine(input chan Message, output chan Message) { + schelduer = NewScheduler(...) + for { + msg := <-msgs + switch msg := input.(type) { + case bcBlockResponseMessage: + output <- scheduler.handleBlockResponse(msg) + ... + case stop: + schedule.stop() + break; + } + } +} +``` + +## Lifecycle management + +A set of routines for individual processes allow processes to run in parallel with clear lifecycle management. `Start`, `Stop`, and `AddPeer` hooks currently present in the reactor will delegate to the sub-routines allowing them to manage internal state independent without further coupling to the reactor. + +```go +func (r *BlockChainReactor) Start() { + r.msgs := make(chan Message, maxInFlight) + schedulerMsgs := make(chan Message) + processorMsgs := make(chan Message) + ioMsgs := make(chan Message) + + go processorRoutine(processorMsgs, r.msgs) + go scheduleRoutine(schedulerMsgs, r.msgs) + go ioRoutine(ioMsgs, r.msgs) + ... +} + +func (bcR *BlockchainReactor) Receive(...) { + ... + r.msgs <- msg + ... +} + +func (r *BlockchainReactor) Stop() { + ... + r.msgs <- stop + ... +} + +... +func (r *BlockchainReactor) Stop() { + ... + r.msgs <- stop + ... +} +... + +func (r *BlockchainReactor) AddPeer(peer p2p.Peer) { + ... + r.msgs <- bcAddPeerEv{peer.ID} + ... +} + +``` + +## IO handling + +An io handling routine within the reactor will isolate peer communication. Message going through the ioRoutine will usually be one way, using `p2p` APIs. In the case in which the `p2p` API such as `trySend` return errors, the ioRoutine can funnel those message back to the demuxRoutine for distribution to the other routines. For instance errors from the ioRoutine can be consumed by the scheduler to inform better peer selection implementations. + +```go +func (r *BlockchainReacor) ioRoutine(ioMesgs chan Message, outMsgs chan Message) { + ... + for { + msg := <-ioMsgs + switch msg := msg.(type) { + case scBlockRequestMessage: + queued := r.sendBlockRequestToPeer(...) + if queued { + outMsgs <- ioSendQueued{...} + } + case scStatusRequestMessage + r.sendStatusRequestToPeer(...) + case bcPeerError + r.Swtich.StopPeerForError(msg.src) + ... + ... + case bcFinished + break; + } + } +} + +``` + +### Processor Internals + +The processor is responsible for ordering, verifying and executing blocks. The Processor will maintain an internal cursor `height` refering to the last processed block. As a set of blocks arrive unordered, the Processor will check if it has `height+1` necessary to process the next block. The processor also maintains the map `blockPeers` of peers to height, to keep track of which peer provided the block at `height`. `blockPeers` can be used in`handleRemovePeer(...)` to reschedule all unprocessed blocks provided by a peer who has errored. + +```go +type Processor struct { + height int64 // the height cursor + state ... + blocks [height]*Block // keep a set of blocks in memory until they are processed + blockPeers [height]PeerID // keep track of which heights came from which peerID + lastTouch timestamp +} + +func (proc *Processor) handleBlockResponse(peerID, block) { + if block.height <= height || block[block.height] { + } else if blocks[block.height] { + return errDuplicateBlock{} + } else { + blocks[block.height] = block + } + + if blocks[height] && blocks[height+1] { + ... = state.Validators.VerifyCommit(...) + ... = store.SaveBlock(...) + state, err = blockExec.ApplyBlock(...) + ... + if err == nil { + delete blocks[height] + height++ + lastTouch = msg.time + return pcBlockProcessed{height-1} + } else { + ... // Delete all unprocessed block from the peer + return pcBlockProcessError{peerID, height} + } + } +} + +func (proc *Processor) handleRemovePeer(peerID) { + events = [] + // Delete all unprocessed blocks from peerID + for i = height; i < len(blocks); i++ { + if blockPeers[i] == peerID { + events = append(events, pcBlockReschedule{height}) + + delete block[height] + } + } + return events +} + +func handleTimeCheckEv(time) { + if time - lastTouch > timeout { + // Timeout the processor + ... + } +} +``` + +## Schedule + +The Schedule maintains the internal state used for scheduling blockRequestMessages based on some scheduling algorithm. The schedule needs to maintain state on: + +- The state `blockState` of every block seem up to height of maxHeight +- The set of peers and their peer state `peerState` +- which peers have which blocks +- which blocks have been requested from which peers + +```go +type blockState int + +const ( + blockStateNew = iota + blockStatePending, + blockStateReceived, + blockStateProcessed +) + +type schedule { + // a list of blocks in which blockState + blockStates map[height]blockState + + // a map of which blocks are available from which peers + blockPeers map[height]map[p2p.ID]scPeer + + // a map of peerID to schedule specific peer struct `scPeer` + peers map[p2p.ID]scPeer + + // a map of heights to the peer we are waiting for a response from + pending map[height]scPeer + + targetPending int // the number of blocks we want in blockStatePending + targetReceived int // the number of blocks we want in blockStateReceived + + peerTimeout int + peerMinSpeed int +} + +func (sc *schedule) numBlockInState(state blockState) uint32 { + num := 0 + for i := sc.minHeight(); i <= sc.maxHeight(); i++ { + if sc.blockState[i] == state { + num++ + } + } + return num +} + + +func (sc *schedule) popSchedule(maxRequest int) []scBlockRequestMessage { + // We only want to schedule requests such that we have less than sc.targetPending and sc.targetReceived + // This ensures we don't saturate the network or flood the processor with unprocessed blocks + todo := min(sc.targetPending - sc.numBlockInState(blockStatePending), sc.numBlockInState(blockStateReceived)) + events := []scBlockRequestMessage{} + for i := sc.minHeight(); i < sc.maxMaxHeight(); i++ { + if todo == 0 { + break + } + if blockStates[i] == blockStateNew { + peer = sc.selectPeer(blockPeers[i]) + sc.blockStates[i] = blockStatePending + sc.pending[i] = peer + events = append(events, scBlockRequestMessage{peerID: peer.peerID, height: i}) + todo-- + } + } + return events +} +... + +type scPeer struct { + peerID p2p.ID + numOustandingRequest int + lastTouched time.Time + monitor flow.Monitor +} + +``` + +# Scheduler + +The scheduler is configured to maintain a target `n` of in flight +messages and will use feedback from `_blockResponseMessage`, +`_statusResponseMessage` and `_peerError` produce an optimal assignment +of scBlockRequestMessage at each `timeCheckEv`. + +``` + +func handleStatusResponse(peerID, height, time) { + schedule.touchPeer(peerID, time) + schedule.setPeerHeight(peerID, height) +} + +func handleBlockResponseMessage(peerID, height, block, time) { + schedule.touchPeer(peerID, time) + schedule.markReceived(peerID, height, size(block)) +} + +func handleNoBlockResponseMessage(peerID, height, time) { + schedule.touchPeer(peerID, time) + // reschedule that block, punish peer... + ... +} + +func handlePeerError(peerID) { + // Remove the peer, reschedule the requests + ... +} + +func handleTimeCheckEv(time) { + // clean peer list + + events = [] + for peerID := range schedule.peersNotTouchedSince(time) { + pending = schedule.pendingFrom(peerID) + schedule.setPeerState(peerID, timedout) + schedule.resetBlocks(pending) + events = append(events, peerTimeout{peerID}) + } + + events = append(events, schedule.popSchedule()) + + return events +} +``` + +## Peer + +The Peer Stores per peer state based on messages received by the scheduler. + +```go +type Peer struct { + lastTouched timestamp + lastDownloaded timestamp + pending map[height]struct{} + height height // max height for the peer + state { + pending, // we know the peer but not the height + active, // we know the height + timeout // the peer has timed out + } +} +``` + +## Status + +Implemented + +## Consequences + +### Positive + +- Test become deterministic +- Simulation becomes a-termporal: no need wait for a wall-time timeout +- Peer Selection can be independently tested/simulated +- Develop a general approach to refactoring reactors + +### Negative + +### Neutral + +### Implementation Path + +- Implement the scheduler, test the scheduler, review the rescheduler +- Implement the processor, test the processor, review the processor +- Implement the demuxer, write integration test, review integration tests + +## References + +- [ADR-40](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-040-blockchain-reactor-refactor.md): The original blockchain reactor re-org proposal +- [Blockchain re-org](https://github.com/tendermint/tendermint/pull/3561): The current blockchain reactor re-org implementation (v1) diff --git a/docs/architecture/adr-044-lite-client-with-weak-subjectivity.md b/docs/architecture/adr-044-lite-client-with-weak-subjectivity.md new file mode 100644 index 000000000..d42d699a8 --- /dev/null +++ b/docs/architecture/adr-044-lite-client-with-weak-subjectivity.md @@ -0,0 +1,141 @@ +# ADR 044: Lite Client with Weak Subjectivity + +## Changelog +* 13-07-2019: Initial draft +* 14-08-2019: Address cwgoes comments + +## Context + +The concept of light clients was introduced in the Bitcoin white paper. It +describes a watcher of distributed consensus process that only validates the +consensus algorithm and not the state machine transactions within. + +Tendermint light clients allow bandwidth & compute-constrained devices, such as smartphones, low-power embedded chips, or other blockchains to +efficiently verify the consensus of a Tendermint blockchain. This forms the +basis of safe and efficient state synchronization for new network nodes and +inter-blockchain communication (where a light client of one Tendermint instance +runs in another chain's state machine). + +In a network that is expected to reliably punish validators for misbehavior +by slashing bonded stake and where the validator set changes +infrequently, clients can take advantage of this assumption to safely +synchronize a lite client without downloading the intervening headers. + +Light clients (and full nodes) operating in the Proof Of Stake context need a +trusted block height from a trusted source that is no older than 1 unbonding +window plus a configurable evidence submission synchrony bound. This is called “weak subjectivity”. + +Weak subjectivity is required in Proof of Stake blockchains because it is +costless for an attacker to buy up voting keys that are no longer bonded and +fork the network at some point in its prior history. See Vitalik’s post at +[Proof of Stake: How I Learned to Love Weak +Subjectivity](https://blog.ethereum.org/2014/11/25/proof-stake-learned-love-weak-subjectivity/). + +Currently, Tendermint provides a lite client implementation in the +[light](https://github.com/tendermint/tendermint/tree/master/light) package. This +lite client implements a bisection algorithm that tries to use a binary search +to find the minimum number of block headers where the validator set voting +power changes are less than < 1/3rd. This interface does not support weak +subjectivity at this time. The Cosmos SDK also does not support counterfactual +slashing, nor does the lite client have any capacity to report evidence making +these systems *theoretically unsafe*. + +NOTE: Tendermint provides a somewhat different (stronger) light client model +than Bitcoin under eclipse, since the eclipsing node(s) can only fool the light +client if they have two-thirds of the private keys from the last root-of-trust. + +## Decision + +### The Weak Subjectivity Interface + +Add the weak subjectivity interface for when a new light client connects to the +network or when a light client that has been offline for longer than the +unbonding period connects to the network. Specifically, the node needs to +initialize the following structure before syncing from user input: + +``` +type TrustOptions struct { + // Required: only trust commits up to this old. + // Should be equal to the unbonding period minus some delta for evidence reporting. + TrustPeriod time.Duration `json:"trust-period"` + + // Option 1: TrustHeight and TrustHash can both be provided + // to force the trusting of a particular height and hash. + // If the latest trusted height/hash is more recent, then this option is + // ignored. + TrustHeight int64 `json:"trust-height"` + TrustHash []byte `json:"trust-hash"` + + // Option 2: Callback can be set to implement a confirmation + // step if the trust store is uninitialized, or expired. + Callback func(height int64, hash []byte) error +} +``` + +The expectation is the user will get this information from a trusted source +like a validator, a friend, or a secure website. A more user friendly +solution with trust tradeoffs is that we establish an https based protocol with +a default end point that populates this information. Also an on-chain registry +of roots-of-trust (e.g. on the Cosmos Hub) seems likely in the future. + +### Linear Verification + +The linear verification algorithm requires downloading all headers +between the `TrustHeight` and the `LatestHeight`. The lite client downloads the +full header for the provided `TrustHeight` and then proceeds to download `N+1` +headers and applies the [Tendermint validation +rules](https://github.com/tendermint/tendermint/tree/master/spec/light-client/verification/README.md) +to each block. + +### Bisecting Verification + +Bisecting Verification is a more bandwidth and compute intensive mechanism that +in the most optimistic case requires a light client to only download two block +headers to come into synchronization. + +The bisection algorithm proceeds in the following fashion. The client downloads +and verifies the full block header for `TrustHeight` and then fetches +`LatestHeight` blocker header. The client then verifies the `LatestHeight` +header. Finally the client attempts to verify the `LatestHeight` header with +voting powers taken from `NextValidatorSet` in the `TrustHeight` header. This +verification will succeed if the validators from `TrustHeight` still have > 2/3 ++1 of voting power in the `LatestHeight`. If this succeeds, the client is fully +synchronized. If this fails, then following Bisection Algorithm should be +executed. + +The Client tries to download the block at the mid-point block between +`LatestHeight` and `TrustHeight` and attempts that same algorithm as above +using `MidPointHeight` instead of `LatestHeight` and a different threshold - +1/3 +1 of voting power for *non-adjacent headers*. In the case the of failure, +recursively perform the `MidPoint` verification until success then start over +with an updated `NextValidatorSet` and `TrustHeight`. + +If the client encounters a forged header, it should submit the header along +with some other intermediate headers as the evidence of misbehavior to other +full nodes. After that, it can retry the bisection using another full node. An +optimal client will cache trusted headers from the previous run to minimize +network usage. + +--- + +Check out the formal specification +[here](https://github.com/tendermint/tendermint/tree/master/spec/light-client). + +## Status + +Implemented + +## Consequences + +### Positive + +* light client which is safe to use (it can go offline, but not for too long) + +### Negative + +* complexity of bisection + +### Neutral + +* social consensus can be prone to errors (for cases where a new light client + joins a network or it has been offline for too long) diff --git a/docs/architecture/adr-045-abci-evidence.md b/docs/architecture/adr-045-abci-evidence.md new file mode 100644 index 000000000..abdc0aba3 --- /dev/null +++ b/docs/architecture/adr-045-abci-evidence.md @@ -0,0 +1,140 @@ +# ADR 45 - ABCI Evidence Handling + +## Changelog +* 21-09-2019: Initial draft + +## Context + +Evidence is a distinct component in a Tendermint block and has it's own reactor +for high priority gossipping. Currently, Tendermint supports only a single form of evidence, an explicit +equivocation, where a validator signs conflicting blocks at the same +height/round. It is detected in real-time in the consensus reactor, and gossiped +through the evidence reactor. Evidence can also be submitted through the RPC. + +Currently, Tendermint does not gracefully handle a fork on the main chain. +If a fork is detected, the node panics. At this point manual intervention and +social consensus are required to reconfigure. We'd like to do something more +graceful here, but that's for another day. + +It's possible to fool lite clients without there being a fork on the +main chain - so called Fork-Lite. See the +[fork accountability](https://github.com/tendermint/tendermint/blob/master/spec/light-client/accountability/README.md) +document for more details. For a sequential lite client, this can happen via +equivocation or amnesia attacks. For a skipping lite client this can also happen +via lunatic validator attacks. There must be some way for applications to punish +all forms of misbehavior. + +The essential question is whether Tendermint should manage the evidence +verification, or whether it should treat evidence more like a transaction (ie. +arbitrary bytes) and let the application handle it (including all the signature +checking). + +Currently, evidence verification is handled by Tendermint. Once committed, +[evidence is passed over +ABCI](https://github.com/tendermint/tendermint/blob/master/proto/tendermint/abci/types.proto#L354) +in BeginBlock in a reduced form that includes only +the type of evidence, its height and timestamp, the validator it's from, and the +total voting power of the validator set at the height. The app trusts Tendermint +to perform the evidence verification, as the ABCI evidence does not contain the +signatures and additional data for the app to verify itself. + +Arguments in favor of leaving evidence handling in Tendermint: + +1) Attacks on full nodes must be detectable by full nodes in real time, ie. within the consensus reactor. + So at the very least, any evidence involved in something that could fool a full + node must be handled natively by Tendermint as there would otherwise be no way + for the ABCI app to detect it (ie. we don't send all votes we receive during + consensus to the app ... ). + +2) Amensia attacks can not be easily detected - they require an interactive + protocol among all the validators to submit justification for their past + votes. Our best notion of [how to do this + currently](https://github.com/tendermint/tendermint/blob/c67154232ca8be8f5c21dff65d154127adc4f7bb/docs/spec/consensus/fork-detection.md) + is via a centralized + monitor service that is trusted for liveness to aggregate data from + current and past validators, but which produces a proof of misbehavior (ie. + via amnesia) that can be verified by anyone, including the blockchain. + Validators must submit all the votes they saw for the relevant consensus + height to justify their precommits. This is quite specific to the Tendermint + protocol and may change if the protocol is upgraded. Hence it would be awkward + to co-ordinate this from the app. + +3) Evidence gossipping is similar to tx gossipping, but it should be higher + priority. Since the mempool does not support any notion of priority yet, + evidence is gossipped through a distinct Evidence reactor. If we just treated + evidence like any other transaction, leaving it entirely to the application, + Tendermint would have no way to know how to prioritize it, unless/until we + significantly upgrade the mempool. Thus we would need to continue to treat evidence + distinctly and update the ABCI to either support sending Evidence through + CheckTx/DeliverTx, or to introduce new CheckEvidence/DeliverEvidence methods. + In either case we'd need to make more changes to ABCI then if Tendermint + handled things and we just added support for another evidence type that could be included + in BeginBlock. + +4) All ABCI application frameworks will benefit from most of the heavy lifting + being handled by Tendermint, rather than each of them needing to re-implement + all the evidence verification logic in each language. + +Arguments in favor of moving evidence handling to the application: + +5) Skipping lite clients require us to track the set of all validators that were + bonded over some period in case validators that are unbonding but still + slashable sign invalid headers to fool lite clients. The Cosmos-SDK + staking/slashing modules track this, as it's used for slashing. + Tendermint does not currently track this, though it does keep track of the + validator set at every height. This leans in favour of managing evidence in + the app to avoid redundantly managing the historical validator set data in + Tendermint + +6) Applications supporting cross-chain validation will be required to process + evidence from other chains. This data will come in the form of a transaction, + but it means the app will be required to have all the functionality to process + evidence, even if the evidence for its own chain is handled directly by + Tendermint. + +7) Evidence from lite clients may be large and constitute some form of DoS + vector against full nodes. Putting it in transactions allows it to engage the application's fee + mechanism to pay for cost of executions in the event the evidence is false. + This means the evidence submitter must be able to afford the fees for the + submission, but of course it should be refunded if the evidence is valid. + That said, the burden is mostly on full nodes, which don't necessarily benefit + from fees. + + +## Decision + +The above mostly seems to suggest that evidence detection belongs in Tendermint. +(5) does not impose particularly large obligations on Tendermint and (6) just +means the app can use Tendermint libraries. That said, (7) is potentially +cause for some concern, though it could still attack full nodes that weren't associated with validators +(ie. that don't benefit from fees). This could be handled out of band, for instance by +full nodes offering the light client service via payment channels or via some +other payment service. This can also be mitigated by banning client IPs if they +send bad data. Note the burden is on the client to actually send us a lot of +data in the first place. + +A separate ADR will describe how Tendermint will handle these new forms of +evidence, in terms of how it will engage the monitoring protocol described in +the [fork +detection](https://github.com/tendermint/tendermint/blob/c67154232ca8be8f5c21dff65d154127adc4f7bb/docs/spec/consensus/fork-detection.md) document, +and how it will track past validators and manage DoS issues. + +## Status + +Proposed. + +## Consequences + +### Positive + +- No real changes to ABCI +- Tendermint handles evidence for all apps + +### Neutral + +- Need to be careful about denial of service on the Tendermint RPC + +### Negative + +- Tendermint duplicates data by tracking all pubkeys that were validators during + the unbonding period diff --git a/docs/architecture/adr-046-light-client-implementation.md b/docs/architecture/adr-046-light-client-implementation.md new file mode 100644 index 000000000..15d77373d --- /dev/null +++ b/docs/architecture/adr-046-light-client-implementation.md @@ -0,0 +1,169 @@ +# ADR 046: Lite Client Implementation + +## Changelog +* 13-02-2020: Initial draft +* 26-02-2020: Cross-checking the first header +* 28-02-2020: Bisection algorithm details +* 31-03-2020: Verify signature got changed + +## Context + +A `Client` struct represents a light client, connected to a single blockchain. + +The user has an option to verify headers using `VerifyHeader` or +`VerifyHeaderAtHeight` or `Update` methods. The latter method downloads the +latest header from primary and compares it with the currently trusted one. + +```go +type Client interface { + // verify new headers + VerifyHeaderAtHeight(height int64, now time.Time) (*types.SignedHeader, error) + VerifyHeader(newHeader *types.SignedHeader, newVals *types.ValidatorSet, now time.Time) error + Update(now time.Time) (*types.SignedHeader, error) + + // get trusted headers & validators + TrustedHeader(height int64) (*types.SignedHeader, error) + TrustedValidatorSet(height int64) (valSet *types.ValidatorSet, heightUsed int64, err error) + LastTrustedHeight() (int64, error) + FirstTrustedHeight() (int64, error) + + // query configuration options + ChainID() string + Primary() provider.Provider + Witnesses() []provider.Provider + + Cleanup() error +} +``` + +A new light client can either be created from scratch (via `NewClient`) or +using the trusted store (via `NewClientFromTrustedStore`). When there's some +data in the trusted store and `NewClient` is called, the light client will a) +check if stored header is more recent b) optionally ask the user whenever it +should rollback (no confirmation required by default). + +```go +func NewClient( + chainID string, + trustOptions TrustOptions, + primary provider.Provider, + witnesses []provider.Provider, + trustedStore store.Store, + options ...Option) (*Client, error) { +``` + +`witnesses` as argument (as opposite to `Option`) is an intentional choice, +made to increase security by default. At least one witness is required, +although, right now, the light client does not check that primary != witness. +When cross-checking a new header with witnesses, minimum number of witnesses +required to respond: 1. Note the very first header (`TrustOptions.Hash`) is +also cross-checked with witnesses for additional security. + +Due to bisection algorithm nature, some headers might be skipped. If the light +client does not have a header for height `X` and `VerifyHeaderAtHeight(X)` or +`VerifyHeader(H#X)` methods are called, these will perform either a) backwards +verification from the latest header back to the header at height `X` or b) +bisection verification from the first stored header to the header at height `X`. + +`TrustedHeader`, `TrustedValidatorSet` only communicate with the trusted store. +If some header is not there, an error will be returned indicating that +verification is required. + +```go +type Provider interface { + ChainID() string + + SignedHeader(height int64) (*types.SignedHeader, error) + ValidatorSet(height int64) (*types.ValidatorSet, error) +} +``` + +Provider is a full node usually, but can be another light client. The above +interface is thin and can accommodate many implementations. + +If provider (primary or witness) becomes unavailable for a prolonged period of +time, it will be removed to ensure smooth operation. + +Both `Client` and providers expose chain ID to track if there are on the same +chain. Note, when chain upgrades or intentionally forks, chain ID changes. + +The light client stores headers & validators in the trusted store: + +```go +type Store interface { + SaveSignedHeaderAndValidatorSet(sh *types.SignedHeader, valSet *types.ValidatorSet) error + DeleteSignedHeaderAndValidatorSet(height int64) error + + SignedHeader(height int64) (*types.SignedHeader, error) + ValidatorSet(height int64) (*types.ValidatorSet, error) + + LastSignedHeaderHeight() (int64, error) + FirstSignedHeaderHeight() (int64, error) + + SignedHeaderAfter(height int64) (*types.SignedHeader, error) + + Prune(size uint16) error + + Size() uint16 +} +``` + +At the moment, the only implementation is the `db` store (wrapper around the KV +database, used in Tendermint). In the future, remote adapters are possible +(e.g. `Postgresql`). + +```go +func Verify( + chainID string, + trustedHeader *types.SignedHeader, // height=X + trustedVals *types.ValidatorSet, // height=X or height=X+1 + untrustedHeader *types.SignedHeader, // height=Y + untrustedVals *types.ValidatorSet, // height=Y + trustingPeriod time.Duration, + now time.Time, + maxClockDrift time.Duration, + trustLevel tmmath.Fraction) error { +``` + +`Verify` pure function is exposed for a header verification. It handles both +cases of adjacent and non-adjacent headers. In the former case, it compares the +hashes directly (2/3+ signed transition). Otherwise, it verifies 1/3+ +(`trustLevel`) of trusted validators are still present in new validators. + +While `Verify` function is certainly handy, `VerifyAdjacent` and +`VerifyNonAdjacent` should be used most often to avoid logic errors. + +### Bisection algorithm details + +Non-recursive bisection algorithm was implemented despite the spec containing +the recursive version. There are two major reasons: + +1) Constant memory consumption => no risk of getting OOM (Out-Of-Memory) exceptions; +2) Faster finality (see Fig. 1). + +_Fig. 1: Differences between recursive and non-recursive bisections_ + +![Fig. 1](./img/adr-046-fig1.png) + +Specification of the non-recursive bisection can be found +[here](https://github.com/tendermint/spec/blob/zm_non-recursive-verification/spec/consensus/light-client/non-recursive-verification.md). + +## Status + +Implemented + +## Consequences + +### Positive + +* single `Client` struct, which is easy to use +* flexible interfaces for header providers and trusted storage + +### Negative + +* `Verify` needs to be aligned with the current spec + +### Neutral + +* `Verify` function might be misused (called with non-adjacent headers in + incorrectly implemented sequential verification) diff --git a/docs/architecture/adr-047-handling-evidence-from-light-client.md b/docs/architecture/adr-047-handling-evidence-from-light-client.md new file mode 100644 index 000000000..9854bc13e --- /dev/null +++ b/docs/architecture/adr-047-handling-evidence-from-light-client.md @@ -0,0 +1,254 @@ +# ADR 047: Handling evidence from light client + +## Changelog +* 18-02-2020: Initial draft +* 24-02-2020: Second version +* 13-04-2020: Add PotentialAmnesiaEvidence and a few remarks +* 31-07-2020: Remove PhantomValidatorEvidence +* 14-08-2020: Introduce light traces (listed now as an alternative approach) +* 20-08-2020: Light client produces evidence when detected instead of passing to full node +* 16-09-2020: Post-implementation revision +* 15-03-2020: Ammends for the case of a forward lunatic attack + +### Glossary of Terms + +- a `LightBlock` is the unit of data that a light client receives, verifies and stores. +It is composed of a validator set, commit and header all at the same height. +- a **Trace** is seen as an array of light blocks across a range of heights that were +created as a result of skipping verification. +- a **Provider** is a full node that a light client is connected to and serves the light +client signed headers and validator sets. +- `VerifySkipping` (sometimes known as bisection or verify non-adjacent) is a method the +light client uses to verify a target header from a trusted header. The process involves verifying +intermediate headers in between the two by making sure that 1/3 of the validators that signed +the trusted header also signed the untrusted one. +- **Light Bifurcation Point**: If the light client was to run `VerifySkipping` with two providers +(i.e. a primary and a witness), the bifurcation point is the height that the headers +from each of these providers are different yet valid. This signals that one of the providers +may be trying to fool the light client. + +## Context + +The bisection method of header verification used by the light client exposes +itself to a potential attack if any block within the light clients trusted period has +a malicious group of validators with power that exceeds the light clients trust level +(default is 1/3). To improve light client (and overall network) security, the light +client has a detector component that compares the verified header provided by the +primary against witness headers. This ADR outlines the process of mitigating attacks +on the light client by using witness nodes to cross reference with. + +## Alternative Approaches + +A previously discussed approach to handling evidence was to pass all the data that the +light client had witnessed when it had observed diverging headers for the full node to +process.This was known as a light trace and had the following structure: + +```go +type ConflictingHeadersTrace struct { + Headers []*types.SignedHeader +} +``` + +This approach has the advantage of not requiring as much processing on the light +client side in the event that an attack happens. Although, this is not a significant +difference as the light client would in any case have to validate all the headers +from both witness and primary. Using traces would consume a large amount of bandwidth +and adds a DDOS vector to the full node. + + +## Decision + +The light client will be divided into two components: a `Verifier` (either sequential or +skipping) and a `Detector` (see [Informal's Detector](https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/detection/detection.md)) +. The detector will take the trace of headers from the primary and check it against all +witnesses. For a witness with a diverging header, the detector will first verify the header +by bisecting through all the heights defined by the trace that the primary provided. If valid, +the light client will trawl through both traces and find the point of bifurcation where it +can proceed to extract any evidence (as is discussed in detail later). + +Upon successfully detecting the evidence, the light client will send it to both primary and +witness before halting. It will not send evidence to other peers nor continue to verify the +primary's header against any other header. + + +## Detailed Design + +The verification process of the light client will start from a trusted header and use a bisectional +algorithm to verify up to a header at a given height. This becomes the verified header (does not +mean that it is trusted yet). All headers that were verified in between are cached and known as +intermediary headers and the entire array is sometimes referred to as a trace. + +The light client's detector then takes all the headers and runs the detect function. + +```golang +func (c *Client) detectDivergence(primaryTrace []*types.LightBlock, now time.Time) error +``` + +The function takes the last header it received, the target header and compares it against all the witnesses +it has through the following function: + +```golang +func (c *Client) compareNewHeaderWithWitness(errc chan error, h *types.SignedHeader, + witness provider.Provider, witnessIndex int) +``` + +The err channel is used to send back all the outcomes so that they can be processed in parallel. +Invalid headers result in dropping the witness, lack of response or not having the headers is ignored +just as headers that have the same hash. Headers, however, +of a different hash then trigger the detection process between the primary and that particular witness. + +This begins with verification of the witness's header via skipping verification which is run in tande +with locating the Light Bifurcation Point + +![](../imgs/light-client-detector.png) + +This is done with: + +```golang +func (c *Client) examineConflictingHeaderAgainstTrace( + trace []*types.LightBlock, + targetBlock *types.LightBlock, + source provider.Provider, + now time.Time, + ) ([]*types.LightBlock, *types.LightBlock, error) +``` + +which performs the following + +1. Checking that the trusted header is the same. Currently, they should not theoretically be different +because witnesses cannot be added and removed after the client is initialized. But we do this any way +as a sanity check. If this fails we have to drop the witness. + +2. Querying and verifying the witness's headers using bisection at the same heights of all the +intermediary headers of the primary (In the above example this is A, B, C, D, F, H). If bisection fails +or the witness stops responding then we can call the witness faulty and drop it. + +3. We eventually reach a verified header by the witness which is not the same as the intermediary header +(In the above example this is E). This is the point of bifurcation (This could also be the last header). + +4. There is a unique case where the trace that is being examined against has blocks that have a greater +height than the targetBlock. This can occur as part of a forward lunatic attack where the primary has +provided a light block that has a height greater than the head of the chain (see Appendix B). In this +case, the light client will verify the sources blocks up to the targetBlock and return the block in the +trace that is directly after the targetBlock in height as the `ConflictingBlock` + +This function then returns the trace of blocks from the witness node between the common header and the +divergent header of the primary as it is likely, as seen in the example to the right, that multiple +headers where required in order to verify the divergent one. This trace will +be used later (as is also described later in this document). + +![](../imgs/bifurcation-point.png) + +Now, that an attack has been detected, the light client must form evidence to prove it. There are +three types of attacks that either the primary or witness could have done to try fool the light client +into verifying the wrong header: Lunatic, Equivocation and Amnesia. As the consequence is the same and +the data required to prove it is also very similar, we bundle these attack styles together in a single +evidence: + +```golang +type LightClientAttackEvidence struct { + ConflictingBlock *LightBlock + CommonHeight int64 +} +``` + +The light client takes the stance of first suspecting the primary. Given the bifurcation point found +above, it takes the two divergent headers and compares whether the one from the primary is valid with +respect to the one from the witness. This is done by calling `isInvalidHeader()` which looks to see if +any one of the deterministically derived header fields differ from one another. This could be one of +`ValidatorsHash`, `NextValidatorsHash`, `ConsensusHash`, `AppHash`, and `LastResultsHash`. +In this case we know it's a Lunatic attack and to help the witness verify it we send the height +of the common header which is 1 in the example above or C in the example above that. If all these +hashes are the same then we can infer that it is either Equivocation or Amnesia. In this case we send +the height of the diverged headers because we know that the validator sets are the same, hence the +malicious nodes are still bonded at that height. In the example above, this is height 10 and the +example above that it is the height at E. + +The light client now has the evidence and broadcasts it to the witness. + +However, it could have been that the header the light client used from the witness against the primary +was forged, so before halting the light client swaps the process and thus suspects the witness and +uses the primary to create evidence. It calls `examineConflictingHeaderAgainstTrace` this time using +the witness trace found earlier. +If the primary was malicious it is likely that it will not respond but if it is innocent then the +light client will produce the same evidence but this time the conflicting +block will come from the witness node instead of the primary. The evidence is then formed and sent to +the primary node. + +This then ends the process and the verify function that was called at the start returns the error to +the user. + +For a detailed overview of how each of these three attacks can be conducted please refer to the +[fork accountability spec](https://github.com/tendermint/tendermint/blob/master/spec/consensus/light-client/accountability.md). + +## Full Node Verification + +When a full node receives evidence from the light client it will need to verify +it for itself before gossiping it to peers and trying to commit it on chain. This process is outlined + in [ADR-059](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-059-evidence-composition-and-lifecycle.md). + +## Status + +Implemented + +## Consequences + +### Positive + +* Light client has increased security against Lunatic, Equivocation and Amnesia attacks. +* Do not need intermediate data structures to encapsulate the malicious behavior +* Generalized evidence makes the code simpler + +### Negative + +* Breaking change on the light client from versions 0.33.8 and below. Previous +versions will still send `ConflictingHeadersEvidence` but it won't be recognized +by the full node. Light clients will however still refuse the header and shut down. +* Amnesia attacks although detected, will not be able to be punished as it is not +clear from the current information which nodes behaved maliciously. +* Evidence module must handle both individual and grouped evidence. + +### Neutral + +## References + +* [Fork accountability spec](https://github.com/tendermint/tendermint/blob/master/spec/consensus/light-client/accountability.md) +* [ADR 056: Light client amnesia attacks](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-056-light-client-amnesia-attacks.md) +* [ADR-059: Evidence Composition and Lifecycle](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-059-evidence-composition-and-lifecycle.md) +* [Informal's Light Client Detector](https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/detection/detection.md) + + +## Appendix A + +PhantomValidatorEvidence was used to capture when a validator that was still staked +(i.e. within the bonded period) but was not in the current validator set had voted for a block. + +In later discussions it was argued that although possible to keep phantom validator +evidence, any case a phantom validator that could have the capacity to be involved +in fooling a light client would have to be aided by 1/3+ lunatic validators. + +It would also be very unlikely that the new validators injected by the lunatic attack +would be validators that currently still have something staked. + +Not only this but there was a large degree of extra computation required in storing all +the currently staked validators that could possibly fall into the group of being +a phantom validator. Given this, it was removed. + +## Appendix B + +A unique flavor of lunatic attack is a forward lunatic attack. This is where a malicious +node provides a header with a height greater than the height of the blockchain. Thus there +are no witnesses capable of rebutting the malicious header. Such an attack will also +require an accomplice, i.e. at least one other witness to also return the same forged header. +Although such attacks can be any arbitrary height ahead, they must still remain within the +clock drift of the light clients real time. Therefore, to detect such an attack, a light +client will wait for a time + +``` +2 * MAX_CLOCK_DRIFT + LAG +``` + +for a witness to provide the latest block it has. Given the time constraints, if the witness +is operating at the head of the blockchain, it will have a header with an earlier height but +a later timestamp. This can be used to prove that the primary has submitted a lunatic header +which violates monotonically increasing time. diff --git a/docs/architecture/adr-050-improved-trusted-peering.md b/docs/architecture/adr-050-improved-trusted-peering.md new file mode 100644 index 000000000..d079e67bd --- /dev/null +++ b/docs/architecture/adr-050-improved-trusted-peering.md @@ -0,0 +1,58 @@ +# ADR 50: Improved Trusted Peering + +## Changelog +* 22-10-2019: Initial draft +* 05-11-2019: Modify `maximum-dial-period` to `persistent-peers-max-dial-period` + +## Context + +When `max-num-inbound-peers` or `max-num-outbound-peers` of a node is reached, the node cannot spare more slots to any peer +by inbound or outbound. Therefore, after a certain period of disconnection, any important peering can be lost indefinitely +because all slots are consumed by other peers, and the node stops trying to dial the peer anymore. + +This is happening because of two reasons, exponential backoff and absence of unconditional peering feature for trusted peers. + + +## Decision + +We would like to suggest solving this problem by introducing two parameters in `config.toml`, `unconditional-peer-ids` and +`persistent-peers-max-dial-period`. + +1) `unconditional-peer-ids` + +A node operator inputs list of ids of peers which are allowed to be connected by both inbound or outbound regardless of +`max-num-inbound-peers` or `max-num-outbound-peers` of user's node reached or not. + +2) `persistent-peers-max-dial-period` + +Terms between each dial to each persistent peer will not exceed `persistent-peers-max-dial-period` during exponential backoff. +Therefore, `dial-period` = min(`persistent-peers-max-dial-period`, `exponential-backoff-dial-period`) + +Alternative approach + +Persistent-peers is only for outbound, therefore it is not enough to cover the full utility of `unconditional-peer-ids`. +@creamers158(https://github.com/Creamers158) suggested putting id-only items into persistent-peers to be handled as +`unconditional-peer-ids`, but it needs very complicated struct exception for different structure of items in persistent-peers. +Therefore we decided to have `unconditional-peer-ids` to independently cover this use-case. + +## Status + +Proposed + +## Consequences + +### Positive + +A node operator can configure two new parameters in `config.toml` so that he/she can assure that tendermint will allow connections +from/to peers in `unconditional-peer-ids`. Also he/she can assure that every persistent peer will be dialed at least once in every +`persistent-peers-max-dial-period` term. It achieves more stable and persistent peering for trusted peers. + +### Negative + +The new feature introduces two new parameters in `config.toml` which needs explanation for node operators. + +### Neutral + +## References + +* two p2p feature enhancement proposal(https://github.com/tendermint/tendermint/issues/4053) diff --git a/docs/architecture/adr-051-double-signing-risk-reduction.md b/docs/architecture/adr-051-double-signing-risk-reduction.md new file mode 100644 index 000000000..2bf8db731 --- /dev/null +++ b/docs/architecture/adr-051-double-signing-risk-reduction.md @@ -0,0 +1,53 @@ +# ADR 051: Double Signing Risk Reduction + +## Changelog + +* 27-11-2019: Initial draft +* 13-01-2020: Separate into 2 ADR, This ADR will only cover Double signing Protection and ADR-052 handle Tendermint Mode +* 22-01-2020: change the title from "Double signing Protection" to "Double Signing Risk Reduction" + +## Context + +To provide a risk reduction method for double signing incidents mistakenly executed by validators +- Validators often mistakenly run duplicated validators to cause double-signing incident +- This proposed feature is to reduce the risk of mistaken double-signing incident by checking recent N blocks before voting begins +- When we think of such serious impact on double-signing incident, it is very reasonable to have multiple risk reduction algorithm built in node daemon + +## Decision + +We would like to suggest a double signing risk reduction method. + +- Methodology : query recent consensus results to find out whether node's consensus key is used on consensus recently or not +- When to check + - When the state machine starts `ConsensusReactor` after fully synced + - When the node is validator ( with privValidator ) + - When `cs.config.DoubleSignCheckHeight > 0` +- How to check + 1. When a validator is transformed from syncing status to fully synced status, the state machine check recent N blocks (`latest_height - double_sign_check_height`) to find out whether there exists consensus votes using the validator's consensus key + 2. If there exists votes from the validator's consensus key, exit state machine program +- Configuration + - We would like to suggest by introducing `double_sign_check_height` parameter in `config.toml` and cli, how many blocks state machine looks back to check votes + - `double_sign_check_height = {{ .Consensus.DoubleSignCheckHeight }}` in `config.toml` + - `tendermint node --consensus.double_sign_check_height` in cli + - State machine ignore checking procedure when `double_sign_check_height == 0` + +## Status + +Implemented + +## Consequences + +### Positive + +- Validators can avoid double signing incident by mistakes. (eg. If another validator node is voting on consensus, starting new validator node with same consensus key will cause panic stop of the state machine because consensus votes with the consensus key are found in recent blocks) +- We expect this method will prevent majority of double signing incident by mistakes. + +### Negative + +- When the risk reduction method is on, restarting a validator node will panic because the node itself voted on consensus with the same consensus key. So, validators should stop the state machine, wait for some blocks, and then restart the state machine to avoid panic stop. + +### Neutral + +## References + +- Issue [#4059](https://github.com/tendermint/tendermint/issues/4059) : double-signing protection diff --git a/docs/architecture/adr-052-tendermint-mode.md b/docs/architecture/adr-052-tendermint-mode.md new file mode 100644 index 000000000..04f3eb699 --- /dev/null +++ b/docs/architecture/adr-052-tendermint-mode.md @@ -0,0 +1,85 @@ +# ADR 052: Tendermint Mode + +## Changelog + +* 27-11-2019: Initial draft from ADR-051 +* 13-01-2020: Separate ADR Tendermint Mode from ADR-051 +* 29-03-2021: Update info regarding defaults + +## Context + +- Full mode: full mode does not have the capability to become a validator. +- Validator mode : this mode is exactly same as existing state machine behavior. sync without voting on consensus, and participate consensus when fully synced +- Seed mode : lightweight seed node maintaining an address book, p2p like [TenderSeed](https://gitlab.com/polychainlabs/tenderseed) + +## Decision + +We would like to suggest a simple Tendermint mode abstraction. These modes will live under one binary, and when initializing a node the user will be able to specify which node they would like to create. + +- Which reactor, component to include for each node + - full + - switch, transport + - reactors + - mempool + - consensus + - evidence + - blockchain + - p2p/pex + - statesync + - rpc (safe connections only) + - *~~no privValidator(priv_validator_key.json, priv_validator_state.json)~~* + - validator + - switch, transport + - reactors + - mempool + - consensus + - evidence + - blockchain + - p2p/pex + - statesync + - rpc (safe connections only) + - with privValidator(priv_validator_key.json, priv_validator_state.json) + - seed + - switch, transport + - reactor + - p2p/pex +- Configuration, cli command + - We would like to suggest by introducing `mode` parameter in `config.toml` and cli + - `mode = "{{ .BaseConfig.Mode }}"` in `config.toml` + - `tendermint start --mode validator` in cli + - full | validator | seednode + - There will be no default. Users will need to specify when they run `tendermint init` +- RPC modification + - `host:26657/status` + - return empty `validator_info` when in full mode + - no rpc server in seednode +- Where to modify in codebase + - Add switch for `config.Mode` on `node/node.go:DefaultNewNode` + - If `config.Mode==validator`, call default `NewNode` (current logic) + - If `config.Mode==full`, call `NewNode` with `nil` `privValidator` (do not load or generation) + - Need to add exception routine for `nil` `privValidator` to related functions + - If `config.Mode==seed`, call `NewSeedNode` (seed node version of `node/node.go:NewNode`) + - Need to add exception routine for `nil` `reactor`, `component` to related functions + +## Status + +Implemented + +## Consequences + +### Positive + +- Node operators can choose mode when they run state machine according to the purpose of the node. +- Mode can prevent mistakes because users have to specify which mode they want to run via flag. (eg. If a user want to run a validator node, she/he should explicitly write down validator as mode) +- Different mode needs different reactors, resulting in efficient resource usage. + +### Negative + +- Users need to study how each mode operate and which capability it has. + +### Neutral + +## References + +- Issue [#2237](https://github.com/tendermint/tendermint/issues/2237) : Tendermint "mode" +- [TenderSeed](https://gitlab.com/polychainlabs/tenderseed) : A lightweight Tendermint Seed Node. diff --git a/docs/architecture/adr-053-state-sync-prototype.md b/docs/architecture/adr-053-state-sync-prototype.md new file mode 100644 index 000000000..2d8c37ad1 --- /dev/null +++ b/docs/architecture/adr-053-state-sync-prototype.md @@ -0,0 +1,254 @@ +# ADR 053: State Sync Prototype + +State sync is now [merged](https://github.com/tendermint/tendermint/pull/4705). Up-to-date ABCI documentation is [available](https://github.com/tendermint/spec/pull/90), refer to it rather than this ADR for details. + +This ADR outlines the plan for an initial state sync prototype, and is subject to change as we gain feedback and experience. It builds on discussions and findings in [ADR-042](./adr-042-state-sync.md), see that for background information. + +## Changelog + +* 2020-01-28: Initial draft (Erik Grinaker) + +* 2020-02-18: Updates after initial prototype (Erik Grinaker) + * ABCI: added missing `reason` fields. + * ABCI: used 32-bit 1-based chunk indexes (was 64-bit 0-based). + * ABCI: moved `RequestApplySnapshotChunk.chain_hash` to `RequestOfferSnapshot.app_hash`. + * Gaia: snapshots must include node versions as well, both for inner and leaf nodes. + * Added experimental prototype info. + * Added open questions and implementation plan. + +* 2020-03-29: Strengthened and simplified ABCI interface (Erik Grinaker) + * ABCI: replaced `chunks` with `chunk_hashes` in `Snapshot`. + * ABCI: removed `SnapshotChunk` message. + * ABCI: renamed `GetSnapshotChunk` to `LoadSnapshotChunk`. + * ABCI: chunks are now exchanged simply as `bytes`. + * ABCI: chunks are now 0-indexed, for parity with `chunk_hashes` array. + * Reduced maximum chunk size to 16 MB, and increased snapshot message size to 4 MB. + +* 2020-04-29: Update with final released ABCI interface (Erik Grinaker) + +## Context + +State sync will allow a new node to receive a snapshot of the application state without downloading blocks or going through consensus. This bootstraps the node significantly faster than the current fast sync system, which replays all historical blocks. + +Background discussions and justifications are detailed in [ADR-042](./adr-042-state-sync.md). Its recommendations can be summarized as: + +* The application periodically takes full state snapshots (i.e. eager snapshots). + +* The application splits snapshots into smaller chunks that can be individually verified against a chain app hash. + +* Tendermint uses the light client to obtain a trusted chain app hash for verification. + +* Tendermint discovers and downloads snapshot chunks in parallel from multiple peers, and passes them to the application via ABCI to be applied and verified against the chain app hash. + +* Historical blocks are not backfilled, so state synced nodes will have a truncated block history. + +## Tendermint Proposal + +This describes the snapshot/restore process seen from Tendermint. The interface is kept as small and general as possible to give applications maximum flexibility. + +### Snapshot Data Structure + +A node can have multiple snapshots taken at various heights. Snapshots can be taken in different application-specified formats (e.g. MessagePack as format `1` and Protobuf as format `2`, or similarly for schema versioning). Each snapshot consists of multiple chunks containing the actual state data, for parallel downloads and reduced memory usage. + +```proto +message Snapshot { + uint64 height = 1; // The height at which the snapshot was taken + uint32 format = 2; // The application-specific snapshot format + uint32 chunks = 3; // Number of chunks in the snapshot + bytes hash = 4; // Arbitrary snapshot hash - should be equal only for identical snapshots + bytes metadata = 5; // Arbitrary application metadata +} +``` + +Chunks are exchanged simply as `bytes`, and cannot be larger than 16 MB. `Snapshot` messages should be less than 4 MB. + +### ABCI Interface + +```proto +// Lists available snapshots +message RequestListSnapshots {} + +message ResponseListSnapshots { + repeated Snapshot snapshots = 1; +} + +// Offers a snapshot to the application +message RequestOfferSnapshot { + Snapshot snapshot = 1; // snapshot offered by peers + bytes app_hash = 2; // light client-verified app hash for snapshot height + } + +message ResponseOfferSnapshot { + Result result = 1; + + enum Result { + accept = 0; // Snapshot accepted, apply chunks + abort = 1; // Abort all snapshot restoration + reject = 2; // Reject this specific snapshot, and try a different one + reject_format = 3; // Reject all snapshots of this format, and try a different one + reject_sender = 4; // Reject all snapshots from the sender(s), and try a different one + } +} + +// Loads a snapshot chunk +message RequestLoadSnapshotChunk { + uint64 height = 1; + uint32 format = 2; + uint32 chunk = 3; // Zero-indexed +} + +message ResponseLoadSnapshotChunk { + bytes chunk = 1; +} + +// Applies a snapshot chunk +message RequestApplySnapshotChunk { + uint32 index = 1; + bytes chunk = 2; + string sender = 3; + } + +message ResponseApplySnapshotChunk { + Result result = 1; + repeated uint32 refetch_chunks = 2; // Chunks to refetch and reapply (regardless of result) + repeated string reject_senders = 3; // Chunk senders to reject and ban (regardless of result) + + enum Result { + accept = 0; // Chunk successfully accepted + abort = 1; // Abort all snapshot restoration + retry = 2; // Retry chunk, combine with refetch and reject as appropriate + retry_snapshot = 3; // Retry snapshot, combine with refetch and reject as appropriate + reject_snapshot = 4; // Reject this snapshot, try a different one but keep sender rejections + } +} +``` + +### Taking Snapshots + +Tendermint is not aware of the snapshotting process at all, it is entirely an application concern. The following guarantees must be provided: + +* **Periodic:** snapshots must be taken periodically, not on-demand, for faster restores, lower load, and less DoS risk. + +* **Deterministic:** snapshots must be deterministic, and identical across all nodes - typically by taking a snapshot at given height intervals. + +* **Consistent:** snapshots must be consistent, i.e. not affected by concurrent writes - typically by using a data store that supports versioning and/or snapshot isolation. + +* **Asynchronous:** snapshots must be asynchronous, i.e. not halt block processing and state transitions. + +* **Chunked:** snapshots must be split into chunks of reasonable size (on the order of megabytes), and each chunk must be verifiable against the chain app hash. + +* **Garbage collected:** snapshots must be garbage collected periodically. + +### Restoring Snapshots + +Nodes should have options for enabling state sync and/or fast sync, and be provided a trusted header hash for the light client. + +When starting an empty node with state sync and fast sync enabled, snapshots are restored as follows: + +1. The node checks that it is empty, i.e. that it has no state nor blocks. + +2. The node contacts the given seeds to discover peers. + +3. The node contacts a set of full nodes, and verifies the trusted block header using the given hash via the light client. + +4. The node requests available snapshots via P2P from peers, via `RequestListSnapshots`. Peers will return the 10 most recent snapshots, one message per snapshot. + +5. The node aggregates snapshots from multiple peers, ordered by height and format (in reverse). If there are mismatches between different snapshots, the one hosted by the largest amount of peers is chosen. The node iterates over all snapshots in reverse order by height and format until it finds one that satisfies all of the following conditions: + + * The snapshot height's block is considered trustworthy by the light client (i.e. snapshot height is greater than trusted header and within unbonding period of the latest trustworthy block). + + * The snapshot's height or format hasn't been explicitly rejected by an earlier `RequestOfferSnapshot`. + + * The application accepts the `RequestOfferSnapshot` call. + +6. The node downloads chunks in parallel from multiple peers, via `RequestLoadSnapshotChunk`. Chunk messages cannot exceed 16 MB. + +7. The node passes chunks sequentially to the app via `RequestApplySnapshotChunk`. + +8. Once all chunks have been applied, the node compares the app hash to the chain app hash, and if they do not match it either errors or discards the state and starts over. + +9. The node switches to fast sync to catch up blocks that were committed while restoring the snapshot. + +10. The node switches to normal consensus mode. + +## Gaia Proposal + +This describes the snapshot process seen from Gaia, using format version `1`. The serialization format is unspecified, but likely to be compressed Amino or Protobuf. + +### Snapshot Metadata + +In the initial version there is no snapshot metadata, so it is set to an empty byte buffer. + +Once all chunks have been successfully built, snapshot metadata should be stored in a database and served via `RequestListSnapshots`. + +### Snapshot Chunk Format + +The Gaia data structure consists of a set of named IAVL trees. A root hash is constructed by taking the root hashes of each of the IAVL trees, then constructing a Merkle tree of the sorted name/hash map. + +IAVL trees are versioned, but a snapshot only contains the version relevant for the snapshot height. All historical versions are ignored. + +IAVL trees are insertion-order dependent, so key/value pairs must be set in an appropriate insertion order to produce the same tree branching structure. This insertion order can be found by doing a breadth-first scan of all nodes (including inner nodes) and collecting unique keys in order. However, the node hash also depends on the node's version, so snapshots must contain the inner nodes' version numbers as well. + +For the initial prototype, each chunk consists of a complete dump of all node data for all nodes in an entire IAVL tree. Thus the number of chunks equals the number of persistent stores in Gaia. No incremental verification of chunks is done, only a final app hash comparison at the end of the snapshot restoration. + +For a production version, it should be sufficient to store key/value/version for all nodes (leaf and inner) in insertion order, chunked in some appropriate way. If per-chunk verification is required, the chunk must also contain enough information to reconstruct the Merkle proofs all the way up to the root of the multistore, e.g. by storing a complete subtree's key/value/version data plus Merkle hashes of all other branches up to the multistore root. The exact approach will depend on tradeoffs between size, time, and verification. IAVL RangeProofs are not recommended, since these include redundant data such as proofs for intermediate and leaf nodes that can be derived from the above data. + +Chunks should be built greedily by collecting node data up to some size limit (e.g. 10 MB) and serializing it. Chunk data is stored in the file system as `snapshots///`, and a SHA-256 checksum is stored along with the snapshot metadata. + +### Snapshot Scheduling + +Snapshots should be taken at some configurable height interval, e.g. every 1000 blocks. All nodes should preferably have the same snapshot schedule, such that all nodes can serve chunks for a given snapshot. + +Taking consistent snapshots of IAVL trees is greatly simplified by them being versioned: simply snapshot the version that corresponds to the snapshot height, while concurrent writes create new versions. IAVL pruning must not prune a version that is being snapshotted. + +Snapshots must also be garbage collected after some configurable time, e.g. by keeping the latest `n` snapshots. + +## Resolved Questions + +* Is it OK for state-synced nodes to not have historical blocks nor historical IAVL versions? + + > Yes, this is as intended. Maybe backfill blocks later. + +* Do we need incremental chunk verification for first version? + + > No, we'll start simple. Can add chunk verification via a new snapshot format without any breaking changes in Tendermint. For adversarial conditions, maybe consider support for whitelisting peers to download chunks from. + +* Should the snapshot ABCI interface be a separate optional ABCI service, or mandatory? + + > Mandatory, to keep things simple for now. It will therefore be a breaking change and push the release. For apps using the Cosmos SDK, we can provide a default implementation that does not serve snapshots and errors when trying to apply them. + +* How can we make sure `ListSnapshots` data is valid? An adversary can provide fake/invalid snapshots to DoS peers. + + > For now, just pick snapshots that are available on a large number of peers. Maybe support whitelisting. We may consider e.g. placing snapshot manifests on the blockchain later. + +* Should we punish nodes that provide invalid snapshots? How? + + > No, these are full nodes not validators, so we can't punish them. Just disconnect from them and ignore them. + +* Should we call these snapshots? The SDK already uses the term "snapshot" for `PruningOptions.SnapshotEvery`, and state sync will introduce additional SDK options for snapshot scheduling and pruning that are not related to IAVL snapshotting or pruning. + + > Yes. Hopefully these concepts are distinct enough that we can refer to state sync snapshots and IAVL snapshots without too much confusion. + +* Should we store snapshot and chunk metadata in a database? Can we use the database for chunks? + + > As a first approach, store metadata in a database and chunks in the filesystem. + +* Should a snapshot at height H be taken before or after the block at H is processed? E.g. RPC `/commit` returns app_hash after _previous_ height, i.e. _before_ current height. + + > After commit. + +* Do we need to support all versions of blockchain reactor (i.e. fast sync)? + + > We should remove the v1 reactor completely once v2 has stabilized. + +* Should `ListSnapshots` be a streaming API instead of a request/response API? + + > No, just use a max message size. + +## Status + +Implemented + +## References + +* [ADR-042](./adr-042-state-sync.md) and its references diff --git a/docs/architecture/adr-054-crypto-encoding-2.md b/docs/architecture/adr-054-crypto-encoding-2.md new file mode 100644 index 000000000..e58681d15 --- /dev/null +++ b/docs/architecture/adr-054-crypto-encoding-2.md @@ -0,0 +1,71 @@ +# ADR 054: Crypto encoding (part 2) + +## Changelog + +2020-2-27: Created +2020-4-16: Update + +## Context + +Amino has been a pain point of many users in the ecosystem. While Tendermint does not suffer greatly from the performance degradation introduced by amino, we are making an effort in moving the encoding format to a widely adopted format, [Protocol Buffers](https://developers.google.com/protocol-buffers). With this migration a new standard is needed for the encoding of keys. This will cause ecosystem wide breaking changes. + +Currently amino encodes keys as ` `. + +## Decision + +Previously Tendermint defined all the key types for use in Tendermint and the Cosmos-SDK. Going forward the Cosmos-SDK will define its own protobuf type for keys. This will allow Tendermint to only define the keys that are being used in the codebase (ed25519). +There is the the opportunity to only define the usage of ed25519 (`bytes`) and not have it be a `oneof`, but this would mean that the `oneof` work is only being postponed to a later date. When using the `oneof` protobuf type we will have to manually switch over the possible key types and then pass them to the interface which is needed. + +The approach that will be taken to minimize headaches for users is one where all encoding of keys will shift to protobuf and where amino encoding is relied on, there will be custom marshal and unmarshal functions. + +Protobuf messages: + +```proto +message PubKey { + oneof key { + bytes ed25519 = 1; + } + +message PrivKey { + oneof sum { + bytes ed25519 = 1; + } +} +``` + +> Note: The places where backwards compatibility is needed is still unclear. + +All modules currently do not rely on amino encoded bytes and keys are not amino encoded for genesis, therefore a hardfork upgrade is what will be needed to adopt these changes. + +This work will be broken out into a few PRs, this work will be merged into a proto-breakage branch, all PRs will be reviewed prior to being merged: + +1. Encoding of keys to protobuf and protobuf messages +2. Move Tendermint types to protobuf, mainly the ones that are being encoded. +3. Go one by one through the reactors and transition amino encoded messages to protobuf. +4. Test with cosmos-sdk and/or testnets repo. + +## Status + +Implemented + +## Consequences + +- Move keys to protobuf encoding, where backwards compatibility is needed, amino marshal and unmarshal functions will be used. + +### Positive + +- Protocol Buffer encoding will not change going forward. +- Removing amino overhead from keys will help with the KSM. +- Have a large ecosystem of supported languages. + +### Negative + +- Hardfork is required to integrate this into running chains. + +### Neutral + +## References + +> Are there any relevant PR comments, issues that led up to this, or articles referenced for why we made the given design choice? If so link them here! + +- {reference link} diff --git a/docs/architecture/adr-055-protobuf-design.md b/docs/architecture/adr-055-protobuf-design.md new file mode 100644 index 000000000..ab2f75283 --- /dev/null +++ b/docs/architecture/adr-055-protobuf-design.md @@ -0,0 +1,61 @@ +# ADR 055: Protobuf Design + +## Changelog + +- 2020-4-15: Created (@marbar3778) +- 2020-6-18: Updated (@marbar3778) + +## Context + +Currently we use [go-amino](https://github.com/tendermint/go-amino) throughout Tendermint. Amino is not being maintained anymore (April 15, 2020) by the Tendermint team and has been found to have issues: + +- https://github.com/tendermint/go-amino/issues/286 +- https://github.com/tendermint/go-amino/issues/230 +- https://github.com/tendermint/go-amino/issues/121 + +These are a few of the known issues that users could run into. + +Amino enables quick prototyping and development of features. While this is nice, amino does not provide the performance and developer convenience that is expected. For Tendermint to see wider adoption as a BFT protocol engine a transition to an adopted encoding format is needed. Below are some possible options that can be explored. + +There are a few options to pick from: + +- `Protobuf`: Protocol buffers are Google's language-neutral, platform-neutral, extensible mechanism for serializing structured data – think XML, but smaller, faster, and simpler. It is supported in countless languages and has been proven in production for many years. + +- `FlatBuffers`: FlatBuffers is an efficient cross platform serialization library. Flatbuffers are more efficient than Protobuf due to the fast that there is no parsing/unpacking to a second representation. FlatBuffers has been tested and used in production but is not widely adopted. + +- `CapnProto`: Cap’n Proto is an insanely fast data interchange format and capability-based RPC system. Cap'n Proto does not have a encoding/decoding step. It has not seen wide adoption throughout the industry. + +- @erikgrinaker - https://github.com/tendermint/tendermint/pull/4623#discussion_r401163501 + ``` + Cap'n'Proto is awesome. It was written by one of the original Protobuf developers to fix some of its issues, and supports e.g. random access to process huge messages without loading them into memory and an (opt-in) canonical form which would be very useful when determinism is needed (e.g. in the state machine). That said, I suspect Protobuf is the better choice due to wider adoption, although it makes me kind of sad since Cap'n'Proto is technically better. + ``` + +## Decision + +Transition Tendermint to Protobuf because of its performance and tooling. The Ecosystem behind Protobuf is vast and has outstanding [support for many languages](https://developers.google.com/protocol-buffers/docs/tutorials). + +We will be making this possible by keeping the current types in there current form (handwritten) and creating a `/proto` directory in which all the `.proto` files will live. Where encoding is needed, on disk and over the wire, we will call util functions that will transition the types from handwritten go types to protobuf generated types. This is inline with the recommended file structure from [buf](https://buf.build). You can find more information on this file structure [here](https://buf.build/docs/lint-checkers#file_layout). + +By going with this design we will enable future changes to types and allow for a more modular codebase. + +## Status + +Implemented + +## Consequences + +### Positive + +- Allows for modular types in the future +- Less refactoring +- Allows the proto files to be pulled into the spec repo in the future. +- Performance +- Tooling & support in multiple languages + +### Negative + +- When a developer is updating a type they need to make sure to update the proto type as well + +### Neutral + +## References diff --git a/docs/architecture/adr-056-light-client-amnesia-attacks.md b/docs/architecture/adr-056-light-client-amnesia-attacks.md new file mode 100644 index 000000000..8eed63d9b --- /dev/null +++ b/docs/architecture/adr-056-light-client-amnesia-attacks.md @@ -0,0 +1,170 @@ +# ADR 056: Light client amnesia attacks + +## Changelog + +- 02.04.20: Initial Draft +- 06.04.20: Second Draft +- 10.06.20: Post Implementation Revision +- 19.08.20: Short Term Amnesia Alteration +- 01.10.20: Status of Amnesia for 0.34 + +## Context + +Whilst most created evidence of malicious behavior is self evident such that any individual can verify them independently there are types of evidence, known collectively as global evidence, that require further collaboration from the network in order to accumulate enough information to create evidence that is individually verifiable and can therefore be processed through consensus. [Fork Accountability](https://github.com/tendermint/tendermint/blob/master/spec/consensus/light-client/accountability.md) has been coined to describe the entire process of detection, proving and punishing of malicious behavior. This ADR addresses specifically what a light client amnesia attack is and how it can be proven and the current decision around handling light client amnesia attacks. For information on evidence handling by the light client, it is recommended to read [ADR 47](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-047-handling-evidence-from-light-client.md). + +### Amnesia Attack + +The schematic below explains a scenario where an amnesia attack can occur such that two sets of honest nodes, C1 and C2, commit different blocks. + +![](../imgs/tm-amnesia-attack.png) + +1. C1 and F send PREVOTE messages for block A. +2. C1 sends PRECOMMIT for round 1 for block A. +3. A new round is started, C2 and F send PREVOTE messages for a different block B. +4. C2 and F then send PRECOMMIT messages for block B. +5. F later on creates PRECOMMITS for block A and combines it with those from C1 to form a block + + +This forged block can then be used to fool light clients trying to verify it. It must be stressed that there are a few more hurdles or dimensions to the attack to consider.For a more detailed walkthrough refer to Appendix A. + +## Decision + +The decision surrounding amnesia attacks has both a short term and long term component. In the long term, a more sturdy protocol will need to be fleshed out and implemented. There is already draft documents outlining what such a protocol would look like and the resources it would require (see references). Prior revisions however outlined a protocol which had been implemented (See Appendix B). It was agreed that it still required greater consideration and review given it's importance. It was therefore discussed, with the limited time frame set before 0.34, whether the protocol should be completely removed or if there should remain some logic in handling the aforementioned scenarios. + +The latter of the two options meant storing a record of all votes in any height with which there was more than one round. This information would then be accessible for applications if they wanted to perform some off-chain verification and punishment. + +In summary, this seemed like too much to ask of the application to implement only on a temporary basis, whilst not having the domain specific knowledge and considering such a difficult and unlikely attack. Therefore the short term decision is to identify when the attack has occurred and implement the detector algorithm highlighted in [ADR 47](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-047-handling-evidence-from-light-client.md) but to not implement any accountability protocol that would identify malicious validators and allow applications to punish them. This will hopefully change in the long term with the focus on eventually reaching a concrete and secure protocol with identifying and dealing with these attacks. + +## Implications + +- Light clients will still be able to detect amnesia attacks so long as the assumption of having at least one correct witness holds +- Light clients will gossip the attack to witnesses and halt thus failing to validate the incorrect block (and therefore not being fooled) +- Validators will propose and commit evidence of the amnesia attack on chain +- No evidence will be passed to the application indicting any malicious validators, thus meaning that no malicious validators will be punished for performing the attack +- If a light clients bubble of providers are all faulty the light client will falsely validate amnesia attacks as well as any other 1/3+ light client attack. + +## Status + +Implemented + +## Consequences + +### Positive + +Light clients are still able to prevent falsely validating a block. + +Already implemented. + +### Negative + +Light clients where all witnesses are faulty can be subject to an amnesia attack and verify a forged block that is not part of the chain. + +### Neutral + + +## References + +- [Fork accountability algorithm](https://docs.google.com/document/d/11ZhMsCj3y7zIZz4udO9l25xqb0kl7gmWqNpGVRzOeyY/edit) +- [Fork accountability spec](https://github.com/tendermint/tendermint/blob/master/spec/consensus/light-client/accountability.md) + +## Appendix A: Detailed Walkthrough of Performing a Light Client Amnesia Attack + +As the attacker, a prerequisite to this attack is first to observe or attempt to craft a block where a subset (less than ⅓) of correct validators sent precommit votes for a proposal in an earlier round and later received ⅔ prevotes for a different proposal thus changing their lock and correctly sending precommit votes (and later committing) for the proposal in the latter round. The second prerequisite is to have at least ⅓ validating power in that height (or enough voting power to have ⅔+ when combined with the precommits of the earlier round). + +To go back to how one may craft such a block, we begin with one of the validators in this cabal being the proposer. They propose a block with all the txs that they want to fool a light client with. The proposer then only relays this to the members of their cabal and a controlled subset of correct validators (less than ⅓). We will call ourselves f for faulty and c1 for this correct subset. + +Attackers need to rely on the assistance of some form of a network partition or on the nature of the sporadic voting to conjure their desired environment. The attackers need at least ⅓ of the validating power of the remaining correct validators, we shall denote this as c2, to not see ⅔ prevotes and thus not be locked on a block when it comes to the next round. If we have less than ⅓ remaining validators that don’t see this first proposal, then we will not have enough voting power to reach ⅔+ prevotes (the sum of f and c2) in the following round and thus change the lock of c1 such that we correctly commit the block in the latter round yet have enough precommits in the earlier round to fool the light client. Remember this is our desired scenario: to save all these precommit votes for a different (in this case earlier) proposed block. + +To try to break this down even further let’s go back to the first round. F sends c1 a proposal (and not c2), c1 in turn sends their prevotes to all whom they are connected to. This means that some will be received by c2. F then sends their prevotes just to c1. Now not all validators in c1 may be connected to each other, so perhaps some validators in c1 might not receive ⅔ (from their own cohort and from f) and thus not precommit. In other situations we may see a validator in c2 connected to all validators in c1. Therefore they too will receive ⅔ prevotes and thus precommit. We can conclude therefore that although targeting this c1 subset of validators, those that actually precommit may be somewhat different. The key is for the attackers to observe the n amount of precommits they need in round 1 where n is ⅔+ - f, whilst ensuring that n itself does not go over ⅓. If it does then less than ⅔ validators remain to be able to change the lock and commit the block in the later round. + +An extra dimension to this puzzle is the timeouts. Whilst c1 is relaying votes to its peers and these validators count closer towards the ⅔ threshold needed to send their precommit votes at any moment the timeout could be reached and thus the nodes will precommit nil and ignore any late prevote messages. + +This is all to say that such an attack is partly out of the attackers hands. All they can do is tweak the subset of validators that they first choose to gossip the proposal and modify the timings around when they send their prevotes until they reach the desired precondition: n precommits for an earlier proposal and ⅔ precommits for the later proposal. So this is up to the gods of non deterministic behavior to help them out with their plight. I’m not going to allocate the hours to calculate the probability but it could be in the magnitude of 1000’s of blocks trying to get this scenario before the precondition is met. + +Obviously, the probability becomes substantially higher as the cabal’s voting power nears ⅔. This is because both n decreases and there is greater tolerance to send prevotes to a greater amount of validators without going overboard and reaching the ⅓ precommit threshold in the first round which would mean they would have to try again. + +Once we’ve got our n, we can then forge the remaining signatures for that block (from the f) and bundle them all together and tada we have a forged signed header. + +Now we’ve done that, it’s time to find some light clients to fool. + +Also critical to this type of attack is that the light client that is connected to our nodes must request a light block at that specific height with which we forged this signed header but this shouldn’t be hard to do. To bring this back to a real context, say our faulty cabal, f, bought some groceries using atoms and then wanted to prove that they did, the grocery owner whips out their phone, runs the light client and f tells them the height they committed the transaction. + +An important note here is that because the validator sets are the same between the canonical and the forged block, this attack also works on light clients that verify sequentially. In fact, they are especially vulnerable because they currently don’t run the detector function afterwards. + +However, if our grocery owner verifies using the skipping algorithm, they will then run the detector and therefore they will compare with other witness nodes. Ideally for our attackers, if f has a lot of nodes exposing their rpc endpoints, then there is a chance that all the witnesses the light client has are faulty and thus we have a successful attack and the grocery owner has been fooled into handing f a few apples and carrots. + +However, there is a greater chance, especially if the light client is connected to quite a few other nodes that a divergence will be detected. The light client will figure out there was an amnesia attack and send the evidence to the witness to commit on chain. The grocery owner will see that verification failed and won't hand over the apples or carrots but also f won't be punished for their villainous behavior. This means that they can go over to the hairdressers and see if they can pull off the same stunt again. + +So this brings to the fore the current defenses that are in place. As long as there has not been a cabal of validators with greater than 1/3 power (or the trust level), the light clients verification algorithm will prevent any attempts to deceive it. Greater than this threshold and we rely on the detector as a second layer of defense to pick up on any attack. It's security is chiefly tied with the assumption that at least one of the witnesses is correct. If this fails then as illustrated above, the light client can be suceptible to amnesia (as well as equivocation and lunatic) attacks. + +The outstanding problem, if we indeed consider it big enough to be one, therefore lies in the incentivisation mechanism which is how f and other malicious validators are punished. This is decided by the application but it's up to Tendermint to identify them. With other forms of attacks the evidence lies in the precommits. But because an amnesia attack uses precommits from another round, which is information that is discarded by the consensus engine once the block is committed, it is difficult to understand which validators were in fact faulty. + +If we cast our minds back to what I previously wrote, part of an amnesia attack depends on getting n precommits from an earlier round. These are then bundled with the malicious validators' own signatures. This means that the light client nor full nodes are capable of distinguishing which of the signatures were correctly created as part of Tendermint consensus and which were forged later on. + +## Appendix B: Prior Amnesia Evidence Accountability Implementation + +As the distinction between these two attacks (amnesia and back to the past) can only be distinguished by confirming with all validators (to see if it is a full fork or a light fork), for the purpose of simplicity, these attacks will be treated as the same. + +Currently, the evidence reactor is used to simply broadcast and store evidence. The idea of creating a new reactor for the specific task of verifying these attacks was briefly discussed, but it is decided that the current evidence reactor will be extended. + +The process begins with a light client receiving conflicting headers (in the future this could also be a full node during fast sync or state sync), which it sends to a full node to analyze. As part of [evidence handling](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-047-handling-evidence-from-light-client.md), this is extracted into potential amnesia evidence when the validator voted in more than one round for a different block. + +```golang +type PotentialAmnesiaEvidence struct { + VoteA *types.Vote + VoteB *types.Vote + + Heightstamp int64 +} +``` + +*NOTE: There had been an earlier notion towards batching evidence against the entire set of validators all together but this has given way to individual processing predominantly to maintain consistency with the other forms of evidence. A more extensive breakdown can be found [here](https://github.com/tendermint/tendermint/issues/4729)* + +The evidence will contain the precommit votes for a validator that voted for both rounds. If the validator voted in more than two rounds, then they will have multiple `PotentialAmnesiaEvidence` against them hence it is possible that there is multiple evidence for a validator in a single height but not for a single round. The votes should be all valid and the height and time that the infringement was made should be within: + +`MaxEvidenceAge - ProofTrialPeriod` + +This trial period will be discussed later. + +Returning to the event of an amnesia attack, if we were to examine the behavior of the honest nodes, C1 and C2, in the schematic, C2 will not PRECOMMIT an earlier round, but it is likely, if a node in C1 were to receive +2/3 PREVOTE's or PRECOMMIT's for a higher round, that it would remove the lock and PREVOTE and PRECOMMIT for the later round. Therefore, unfortunately it is not a case of simply punishing all nodes that have double voted in the `PotentialAmnesiaEvidence`. + +Instead we use the Proof of Lock Change (PoLC) referred to in the [consensus spec](https://github.com/tendermint/tendermint/blob/master/spec/consensus/consensus.md#terms). When an honest node votes again for a different block in a later round +(which will only occur in very rare cases), it will generate the PoLC and store it in the evidence reactor for a time equal to the `MaxEvidenceAge` + +```golang +type ProofOfLockChange struct { + Votes []*types.Vote + PubKey crypto.PubKey +} +``` + +This can be either evidence of +2/3 PREVOTES or PRECOMMITS (either warrants the honest node the right to vote) and is valid, among other checks, so long as the PRECOMMIT vote of the node in V2 came after all the votes in the `ProofOfLockChange` i.e. it received +2/3 votes for a block and then voted for that block thereafter (F is unable to prove this). + +In the event that an honest node receives `PotentialAmnesiaEvidence` it will first `ValidateBasic()` and `Verify()` it and then will check if it is among the suspected nodes in the evidence. If so, it will retrieve the `ProofOfLockChange` and combine it with `PotentialAmensiaEvidence` to form `AmensiaEvidence`. All honest nodes that are part of the indicted group will have a time, measured in blocks, equal to `ProofTrialPeriod`, the aforementioned evidence paramter, to gossip their `AmnesiaEvidence` with their `ProofOfLockChange` + +```golang +type AmnesiaEvidence struct { + *types.PotentialAmnesiaEvidence + Polc *types.ProofOfLockChange +} +``` + +If the node is not required to submit any proof than it will simply broadcast the `PotentialAmnesiaEvidence`, stamp the height that it received the evidence and begin to wait out the trial period. It will ignore other `PotentialAmnesiaEvidence` gossiped at the same height and round. + +If a node receives `AmnesiaEvidence` that contains a valid `ProofOfClockChange` it will add it to the evidence store and replace any PotentialAmnesiaEvidence of the same height and round. At this stage, an amnesia evidence with polc, it is ready to be submitted to the chin. If a node receives `AmnesiaEvidence` with an empty polc it will ignore it as each honest node will conduct their own trial period to be sure that time was given for any other honest nodes to respond. + +There can only be one `AmnesiaEvidence` and one `PotentialAmneisaEvidence` stored for each attack (i.e. for each height). + +When, `state.LastBlockHeight > PotentialAmnesiaEvidence.timestamp + ProofTrialPeriod`, nodes will upgrade the corresponding `PotentialAmnesiaEvidence` and attach an empty `ProofOfLockChange`. Then honest validators of the current validator set can begin proposing the block that contains the `AmnesiaEvidence`. + +*NOTE: Even before the evidence is proposed and committed, the off-chain process of gossiping valid evidence could be + enough for honest nodes to recognize the fork and halt.* + +Other validators will vote `nil` if: + +- The Amnesia Evidence is not valid +- The Amensia Evidence is not within their own trial period i.e. too soon. +- They don't have the Amnesia Evidence and it is has an empty polc (each validator needs to run their own trial period of the evidence) +- Is of an AmnesiaEvidence that has already been committed to the chain. + +Finally it is important to stress that the protocol of having a trial period addresses attacks where a validator voted again for a different block at a later round and time. In the event, however, that the validator voted for an earlier round after voting for a later round i.e. `VoteA.Timestamp < VoteB.Timestamp && VoteA.Round > VoteB.Round` then this action is inexcusable and can be punished immediately without the need of a trial period. In this case, PotentialAmnesiaEvidence will be instantly upgraded to AmnesiaEvidence. diff --git a/docs/architecture/adr-057-RPC.md b/docs/architecture/adr-057-RPC.md new file mode 100644 index 000000000..5e7c9f1dc --- /dev/null +++ b/docs/architecture/adr-057-RPC.md @@ -0,0 +1,90 @@ +# ADR 057: RPC + +## Changelog + +- 19-05-2020: created + +## Context + +Currently the RPC layer of Tendermint is using a variant of the JSON-RPC protocol. This ADR is meant to serve as a pro/con list for possible alternatives and JSON-RPC. + +There are currently two options being discussed: gRPC & JSON-RPC. + +### JSON-RPC + +JSON-RPC is a JSON-based RPC protocol. Tendermint has implemented its own variant of JSON-RPC which is not compatible with the [JSON-RPC 2.0 specification](https://www.jsonrpc.org/specification). + +**Pros:** + +- Easy to use & implement (by default) +- Well-known and well-understood by users and integrators +- Integrates reasonably well with web infrastructure (proxies, API gateways, service meshes, caches, etc) +- human readable encoding (by default) + +**Cons:** + +- No schema support +- RPC clients must be hand-written +- Streaming not built into protocol +- Underspecified types (e.g. numbers and timestamps) +- Tendermint has its own implementation (not standards compliant, maintenance overhead) + - High maintenance cost associated to this +- Stdlib `jsonrpc` package only supports JSON-RPC 1.0, no dominant package for JSON-RPC 2.0 +- Tooling around documentation/specification (e.g. Swagger) could be better +- JSON data is larger (offset by HTTP compression) +- Serializing is slow ([~100% marshal, ~400% unmarshal](https://github.com/alecthomas/go_serialization_benchmarks)); insignificant in absolute terms +- Specification was last updated in 2013 and is way behind Swagger/OpenAPI + +### gRPC + gRPC-gateway (REST + Swagger) + +gRPC is a high performant RPC framework. It has been battle tested by a large number of users and is heavily relied on and maintained by countless large corporations. + +**Pros:** + +- Efficient data retrieval for users, lite clients and other protocols +- Easily implemented in supported languages (Go, Dart, JS, TS, rust, Elixir, Haskell, ...) +- Defined schema with richer type system (Protocol Buffers) +- Can use common schemas and types across all protocols and data stores (RPC, ABCI, blocks, etc) +- Established conventions for forwards- and backwards-compatibility +- Bi-directional streaming +- Servers and clients are be autogenerated in many languages (e.g. Tendermint-rs) +- Auto-generated swagger documentation for REST API +- Backwards and forwards compatibility guarantees enforced at the protocol level. +- Can be used with different codecs (JSON, CBOR, ...) + +**Cons:** + +- Complex system involving cross-language schemas, code generation, and custom protocols +- Type system does not always map cleanly to native language type system; integration woes +- Many common types require Protobuf plugins (e.g. timestamps and duration) +- Generated code may be non-idiomatic and hard to use +- Migration will be disruptive and laborious + +## Decision + +> This section explains all of the details of the proposed solution, including implementation details. +> It should also describe affects / corollary items that may need to be changed as a part of this. +> If the proposed change will be large, please also indicate a way to do the change to maximize ease of review. +> (e.g. the optimal split of things to do between separate PR's) + +## Status + +> A decision may be "proposed" if it hasn't been agreed upon yet, or "accepted" once it is agreed upon. If a later ADR changes or reverses a decision, it may be marked as "deprecated" or "superseded" with a reference to its replacement. + +{Deprecated|Proposed|Accepted} + +## Consequences + +> This section describes the consequences, after applying the decision. All consequences should be summarized here, not just the "positive" ones. + +### Positive + +### Negative + +### Neutral + +## References + +> Are there any relevant PR comments, issues that led up to this, or articles referenced for why we made the given design choice? If so link them here! + +- {reference link} diff --git a/docs/architecture/adr-058-event-hashing.md b/docs/architecture/adr-058-event-hashing.md new file mode 100644 index 000000000..184b921d5 --- /dev/null +++ b/docs/architecture/adr-058-event-hashing.md @@ -0,0 +1,122 @@ +# ADR 058: Event hashing + +## Changelog + +- 2020-07-17: initial version +- 2020-07-27: fixes after Ismail and Ethan's comments +- 2020-07-27: declined + +## Context + +Before [PR#4845](https://github.com/tendermint/tendermint/pull/4845), +`Header#LastResultsHash` was a root of the Merkle tree built from `DeliverTx` +results. Only `Code`, `Data` fields were included because `Info` and `Log` +fields are non-deterministic. + +At some point, we've added events to `ResponseBeginBlock`, `ResponseEndBlock`, +and `ResponseDeliverTx` to give applications a way to attach some additional +information to blocks / transactions. + +Many applications seem to have started using them since. + +However, before [PR#4845](https://github.com/tendermint/tendermint/pull/4845) +there was no way to prove that certain events were a part of the result +(_unless the application developer includes them into the state tree_). + +Hence, [PR#4845](https://github.com/tendermint/tendermint/pull/4845) was +opened. In it, `GasWanted` along with `GasUsed` are included when hashing +`DeliverTx` results. Also, events from `BeginBlock`, `EndBlock` and `DeliverTx` +results are hashed into the `LastResultsHash` as follows: + +- Since we do not expect `BeginBlock` and `EndBlock` to contain many events, + these will be Protobuf encoded and included in the Merkle tree as leaves. +- `LastResultsHash` therefore is the root hash of a Merkle tree w/ 3 leafs: + proto-encoded `ResponseBeginBlock#Events`, root hash of a Merkle tree build + from `ResponseDeliverTx` responses (Log, Info and Codespace fields are + ignored), and proto-encoded `ResponseEndBlock#Events`. +- Order of events is unchanged - same as received from the ABCI application. + +[Spec PR](https://github.com/tendermint/spec/pull/97/files) + +While it's certainly good to be able to prove something, introducing new events +or removing such becomes difficult because it breaks the `LastResultsHash`. It +means that every time you add, remove or update an event, you'll need a +hard-fork. And that is undoubtedly bad for applications, which are evolving and +don't have a stable events set. + +## Decision + +As a middle ground approach, the proposal is to add the +`Block#LastResultsEvents` consensus parameter that is a list of all events that +are to be hashed in the header. + +``` +@ proto/tendermint/abci/types.proto:295 @ message BlockParams { + int64 max_bytes = 1; + // Note: must be greater or equal to -1 + int64 max_gas = 2; + // List of events, which will be hashed into the LastResultsHash + repeated string last_results_events = 3; +} +``` + +Initially the list is empty. The ABCI application can change it via `InitChain` +or `EndBlock`. + +Example: + +```go +func (app *MyApp) DeliverTx(req types.RequestDeliverTx) types.ResponseDeliverTx { + //... + events := []abci.Event{ + { + Type: "transfer", + Attributes: []abci.EventAttribute{ + {Key: []byte("sender"), Value: []byte("Bob"), Index: true}, + }, + }, + } + return types.ResponseDeliverTx{Code: code.CodeTypeOK, Events: events} +} +``` + +For "transfer" event to be hashed, the `LastResultsEvents` must contain a +string "transfer". + +## Status + +Declined + +**Until there's more stability/motivation/use-cases/demand, the decision is to +push this entirely application side and just have apps which want events to be +provable to insert them into their application-side merkle trees. Of course +this puts more pressure on their application state and makes event proving +application specific, but it might help built up a better sense of use-cases +and how this ought to ultimately be done by Tendermint.** + +## Consequences + +### Positive + +1. networks can perform parameter change proposals to update this list as new events are added +2. allows networks to avoid having to do hard-forks +3. events can still be added at-will to the application w/o breaking anything + +### Negative + +1. yet another consensus parameter +2. more things to track in the tendermint state + +## References + +- [ADR 021](./adr-021-abci-events.md) +- [Indexing transactions](../app-dev/indexing-transactions.md) + +## Appendix A. Alternative proposals + +The other proposal was to add `Hash bool` flag to the `Event`, similarly to +`Index bool` EventAttribute's field. When `true`, Tendermint would hash it into +the `LastResultsEvents`. The downside is that the logic is implicit and depends +largely on the node's operator, who decides what application code to run. The +above proposal makes it (the logic) explicit and easy to upgrade via +governance. diff --git a/docs/architecture/adr-059-evidence-composition-and-lifecycle.md b/docs/architecture/adr-059-evidence-composition-and-lifecycle.md new file mode 100644 index 000000000..e38b39089 --- /dev/null +++ b/docs/architecture/adr-059-evidence-composition-and-lifecycle.md @@ -0,0 +1,306 @@ +# ADR 059: Evidence Composition and Lifecycle + +## Changelog + +- 04/09/2020: Initial Draft (Unabridged) +- 07/09/2020: First Version +- 13/03/2021: Ammendment to accomodate forward lunatic attack +- 29/06/2021: Add information about ABCI specific fields + +## Scope + +This document is designed to collate together and surface some predicaments involving evidence in Tendermint: both its composition and lifecycle. It then aims to find a solution to these. The scope does not extend to the verification nor detection of certain types of evidence but concerns itself mainly with the general form of evidence and how it moves from inception to application. + +## Background + +For a long time `DuplicateVoteEvidence`, formed in the consensus reactor, was the only evidence Tendermint had. It was produced whenever two votes from the same validator in the same round +was observed and thus it was designed that each evidence was for a single validator. It was predicted that there may come more forms of evidence and thus `DuplicateVoteEvidence` was used as the model for the `Evidence` interface and also for the form of the evidence data sent to the application. It is important to note that Tendermint concerns itself just with the detection and reporting of evidence and it is the responsibility of the application to exercise punishment. + +```go +type Evidence interface { //existing + Height() int64 // height of the offense + Time() time.Time // time of the offense + Address() []byte // address of the offending validator + Bytes() []byte // bytes which comprise the evidence + Hash() []byte // hash of the evidence + Verify(chainID string, pubKey crypto.PubKey) error // verify the evidence + Equal(Evidence) bool // check equality of evidence + + ValidateBasic() error + String() string +} +``` + +```go +type DuplicateVoteEvidence struct { + VoteA *Vote + VoteB *Vote + + timestamp time.Time // taken from the block time +} +``` + +Tendermint has now introduced a new type of evidence to protect light clients from being attacked. This `LightClientAttackEvidence` (see [here](https://github.com/informalsystems/tendermint-rs/blob/31ca3e64ce90786c1734caf186e30595832297a4/docs/spec/lightclient/attacks/evidence-handling.md) for more information) is vastly different to `DuplicateVoteEvidence` in that it is physically a much different size containing a complete signed header and validator set. It is formed within the light client, not the consensus reactor and requires a lot more information from state to verify (`VerifyLightClientAttack(commonHeader, trustedHeader *SignedHeader, commonVals *ValidatorSet)` vs `VerifyDuplicateVote(chainID string, pubKey PubKey)`). Finally it batches validators together (a single piece of evidence that implicates multiple malicious validators at a height) as opposed to having individual evidence (each piece of evidence is per validator per height). This evidence stretches the existing mould that was used to accommodate new types of evidence and has thus caused us to reconsider how evidence should be formatted and processed. + +```go +type LightClientAttackEvidence struct { // proposed struct in spec + ConflictingBlock *LightBlock + CommonHeight int64 + Type AttackType // enum: {Lunatic|Equivocation|Amnesia} + + timestamp time.Time // taken from the block time at the common height +} +``` +*Note: These three attack types have been proven by the research team to be exhaustive* + +## Possible Approaches for Evidence Composition + +### Individual framework + +Evidence remains on a per validator basis. This causes the least disruption to the current processes but requires that we break `LightClientAttackEvidence` into several pieces of evidence for each malicious validator. This not only has performance consequences in that there are n times as many database operations and that the gossiping of evidence will require more bandwidth then necessary (by requiring a header for each piece) but it potentially impacts our ability to validate it. In batch form, the full node can run the same process the light client did to see that 1/3 validating power was present in both the common block and the conflicting block whereas this becomes more difficult to verify individually without opening the possibility that malicious validators forge evidence against innocent . Not only that, but `LightClientAttackEvidence` also deals with amnesia attacks which unfortunately have the characteristic where we know the set of validators involved but not the subset that were actually malicious (more to be said about this later). And finally splitting the evidence into individual pieces makes it difficult to understand the severity of the attack (i.e. the total voting power involved in the attack) + +#### An example of a possible implementation path + +We would ignore amnesia evidence (as individually it's hard to make) and revert to the initial split we had before where `DuplicateVoteEvidence` is also used for light client equivocation attacks and thus we only need `LunaticEvidence`. We would also most likely need to remove `Verify` from the interface as this isn't really something that can be used. + +``` go +type LunaticEvidence struct { // individual lunatic attack + header *Header + commonHeight int64 + vote *Vote + + timestamp time.Time // once again taken from the block time at the height of the common header +} +``` + +### Batch Framework + +The last approach of this category would be to consider batch only evidence. This works fine with `LightClientAttackEvidence` but would require alterations to `DuplicateVoteEvidence` which would most likely mean that the consensus would send conflicting votes to a buffer in the evidence module which would then wrap all the votes together per height before gossiping them to other nodes and trying to commit it on chain. At a glance this may improve IO and verification speed and perhaps more importantly grouping validators gives the application and Tendermint a better overview of the severity of the attack. + +However individual evidence has the advantage that it is easy to check if a node already has that evidence meaning we just need to check hashes to know that we've already verified this evidence before. Batching evidence would imply that each node may have a different combination of duplicate votes which may complicate things. + +#### An example of a possible implementation path + +`LightClientAttackEvidence` won't change but the evidence interface will need to look like the proposed one above and `DuplicateVoteEvidence` will need to change to encompass multiple double votes. A problem with batch evidence is that it needs to be unique to avoid people from submitting different permutations. + +## Decision + +The decision is to adopt a hybrid design. + +We allow individual and batch evidence to coexist together, meaning that verification is done depending on the evidence type and that the bulk of the work is done in the evidence pool itself (including forming the evidence to be sent to the application). + + +## Detailed Design + +Evidence has the following simple interface: + +```go +type Evidence interface { //proposed + Height() int64 // height of the offense + Bytes() []byte // bytes which comprise the evidence + Hash() []byte // hash of the evidence + ValidateBasic() error + String() string +} +``` + +The changing of the interface is backwards compatible as these methods are all present in the previous version of the interface. However, networks will need to upgrade to be able to process the new evidence as verification has changed. + +We have two concrete types of evidence that fulfil this interface + +```go +type LightClientAttackEvidence struct { + ConflictingBlock *LightBlock + CommonHeight int64 // the last height at which the primary provider and witness provider had the same header + + // abci specific information + ByzantineValidators []*Validator // validators in the validator set that misbehaved in creating the conflicting block + TotalVotingPower int64 // total voting power of the validator set at the common height + Timestamp time.Time // timestamp of the block at the common height +} +``` +where the `Hash()` is the hash of the header and commonHeight. + +Note: It was also discussed whether to include the commit hash which captures the validators that signed the header. However this would open the opportunity for someone to propose multiple permutations of the same evidence (through different commit signatures) hence it was omitted. Consequentially, when it comes to verifying evidence in a block, for `LightClientAttackEvidence` we can't just check the hashes because someone could have the same hash as us but a different commit where less than 1/3 validators voted which would be an invalid version of the evidence. (see `fastCheck` for more details) + +```go +type DuplicateVoteEvidence { + VoteA *Vote + VoteB *Vote + + // abci specific information + TotalVotingPower int64 + ValidatorPower int64 + Timestamp time.Time +} +``` +where the `Hash()` is the hash of the two votes + +For both of these types of evidence, `Bytes()` represents the proto-encoded byte array format of the evidence and `ValidateBasic` is +an initial consistency check to make sure the evidence has a valid structure. + +### The Evidence Pool + +`LightClientAttackEvidence` is generated in the light client and `DuplicateVoteEvidence` in consensus. Both are sent to the evidence pool through `AddEvidence(ev Evidence) error`. The evidence pool's primary purpose is to verify evidence. It also gossips evidence to other peers' evidence pool and serves it to consensus so it can be committed on chain and the relevant information can be sent to the application in order to exercise punishment. When evidence is added, the pool first runs `Has(ev Evidence)` to check if it has already received it (by comparing hashes) and then `Verify(ev Evidence) error`. Once verified the evidence pool stores it it's pending database. There are two databases: one for pending evidence that is not yet committed and another of the committed evidence (to avoid committing evidence twice) + +#### Verification + +`Verify()` does the following: + +- Use the hash to see if we already have this evidence in our committed database. + +- Use the height to check if the evidence hasn't expired. + +- If it has expired then use the height to find the block header and check if the time has also expired in which case we drop the evidence + +- Then proceed with switch statement for each of the two evidence: + +For `DuplicateVote`: + +- Check that height, round, type and validator address are the same + +- Check that the Block ID is different + +- Check the look up table for addresses to make sure there already isn't evidence against this validator + +- Fetch the validator set and confirm that the address is in the set at the height of the attack + +- Check that the chain ID and signature is valid. + +For `LightClientAttack` + +- Fetch the common signed header and val set from the common height and use skipping verification to verify the conflicting header + +- Fetch the trusted signed header at the same height as the conflicting header and compare with the conflicting header to work out which type of attack it is and in doing so return the malicious validators. NOTE: If the node doesn't have the signed header at the height of the conflicting header, it instead fetches the latest header it has and checks to see if it can prove the evidence based on a violation of header time. This is known as forward lunatic attack. + + - If equivocation, return the validators that signed for the commits of both the trusted and signed header + + - If lunatic, return the validators from the common val set that signed in the conflicting block + + - If amnesia, return no validators (since we can't know which validators are malicious). This also means that we don't currently send amnesia evidence to the application, although we will introduce more robust amnesia evidence handling in future Tendermint Core releases + +- Check that the hashes of the conflicting header and the trusted header are different + +- In the case of a forward lunatic attack, where the trusted header height is less than the conflicting header height, the node checks that the time of the trusted header is later than the time of conflicting header. This proves that the conflicting header breaks monotonically increasing time. If the node doesn't have a trusted header with a later time then it is unable to validate the evidence for now. + +- Lastly, for each validator, check the look up table to make sure there already isn't evidence against this validator + +After verification we persist the evidence with the key `height/hash` to the pending evidence database in the evidence pool. + +#### ABCI Evidence + +Both evidence structures contain data (such as timestamp) that are necessary to be passed to the application but do not strictly constitute evidence of misbehavior. As such, these fields are verified last. If any of these fields are invalid to a node i.e. they don't correspond with their state, nodes will reconstruct a new evidence struct from the existing fields and repopulate the abci specific fields with their own state data. + +#### Broadcasting and receiving evidence + +The evidence pool also runs a reactor that broadcasts the newly validated +evidence to all connected peers. + +Receiving evidence from other evidence reactors works in the same manner as receiving evidence from the consensus reactor or a light client. + + +#### Proposing evidence on the block + +When it comes to prevoting and precomitting a proposal that contains evidence, the full node will once again +call upon the evidence pool to verify the evidence using `CheckEvidence(ev []Evidence)`: + +This performs the following actions: + +1. Loops through all the evidence to check that nothing has been duplicated + +2. For each evidence, run `fastCheck(ev evidence)` which works similar to `Has` but instead for `LightClientAttackEvidence` if it has the +same hash it then goes on to check that the validators it has are all signers in the commit of the conflicting header. If it doesn't pass fast check (because it hasn't seen the evidence before) then it will have to verify the evidence. + +3. runs `Verify(ev Evidence)` - Note: this also saves the evidence to the db as mentioned before. + + +#### Updating application and pool + +The final part of the lifecycle is when the block is committed and the `BlockExecutor` then updates state. As part of this process, the `BlockExecutor` gets the evidence pool to create a simplified format for the evidence to be sent to the application. This happens in `ApplyBlock` where the executor calls `Update(Block, State) []abci.Evidence`. + +```go +abciResponses.BeginBlock.ByzantineValidators = evpool.Update(block, state) +``` + +Here is the format of the evidence that the application will receive. As seen above, this is stored as an array within `BeginBlock`. +The changes to the application are minimal (it is still formed one for each malicious validator) with the exception of using an enum instead of a string for the evidence type. + +```go +type Evidence struct { + // either LightClientAttackEvidence or DuplicateVoteEvidence as an enum (abci.EvidenceType) + Type EvidenceType `protobuf:"varint,1,opt,name=type,proto3,enum=tendermint.abci.EvidenceType" json:"type,omitempty"` + // The offending validator + Validator Validator `protobuf:"bytes,2,opt,name=validator,proto3" json:"validator"` + // The height when the offense occurred + Height int64 `protobuf:"varint,3,opt,name=height,proto3" json:"height,omitempty"` + // The corresponding time where the offense occurred + Time time.Time `protobuf:"bytes,4,opt,name=time,proto3,stdtime" json:"time"` + // Total voting power of the validator set in case the ABCI application does + // not store historical validators. + // https://github.com/tendermint/tendermint/issues/4581 + TotalVotingPower int64 `protobuf:"varint,5,opt,name=total_voting_power,json=totalVotingPower,proto3" json:"total_voting_power,omitempty"` +} +``` + + +This `Update()` function does the following: + +- Increments state which keeps track of both the current time and height used for measuring expiry + +- Marks evidence as committed and saves to db. This prevents validators from proposing committed evidence in the future + Note: the db just saves the height and the hash. There is no need to save the entire committed evidence + +- Forms ABCI evidence as such: (note for `DuplicateVoteEvidence` the validators array size is 1) + ```go + for _, val := range evInfo.Validators { + abciEv = append(abciEv, &abci.Evidence{ + Type: evType, // either DuplicateVote or LightClientAttack + Validator: val, // the offending validator (which includes the address, pubkey and power) + Height: evInfo.ev.Height(), // the height when the offense happened + Time: evInfo.time, // the time when the offense happened + TotalVotingPower: evInfo.totalVotingPower // the total voting power of the validator set + }) + } + ``` + +- Removes expired evidence from both pending and committed databases + +The ABCI evidence is then sent via the `BlockExecutor` to the application. + +#### Summary + +To summarize, we can see the lifecycle of evidence as such: + +![evidence_lifecycle](../imgs/evidence_lifecycle.png) + +Evidence is first detected and created in the light client and consensus reactor. It is verified and stored as `EvidenceInfo` and gossiped to the evidence pools in other nodes. The consensus reactor later communicates with the evidence pool to either retrieve evidence to be put into a block, or verify the evidence the consensus reactor has retrieved in a block. Lastly when a block is added to the chain, the block executor sends the committed evidence back to the evidence pool so a pointer to the evidence can be stored in the evidence pool and it can update it's height and time. Finally, it turns the committed evidence into ABCI evidence and through the block executor passes the evidence to the application so the application can handle it. + +## Status + +Implemented + +## Consequences + + + +### Positive + +- Evidence is better contained to the evidence pool / module +- LightClientAttack is kept together (easier for verification and bandwidth) +- Variations on commit sigs in LightClientAttack doesn't lead to multiple permutations and multiple evidence +- Address to evidence map prevents DOS attacks, where a single validator could DOS the network by flooding it with evidence submissions + +### Negative + +- Changes the `Evidence` interface and thus is a block breaking change +- Changes the ABCI `Evidence` and is thus a ABCI breaking change +- Unable to query evidence for address / time without evidence pool + +### Neutral + + +## References + + + +- [LightClientAttackEvidence](https://github.com/informalsystems/tendermint-rs/blob/31ca3e64ce90786c1734caf186e30595832297a4/docs/spec/lightclient/attacks/evidence-handling.md) diff --git a/docs/architecture/adr-060-go-api-stability.md b/docs/architecture/adr-060-go-api-stability.md new file mode 100644 index 000000000..d900733b7 --- /dev/null +++ b/docs/architecture/adr-060-go-api-stability.md @@ -0,0 +1,193 @@ +# ADR 060: Go API Stability + +## Changelog + +- 2020-09-08: Initial version. (@erikgrinaker) + +- 2020-09-09: Tweak accepted changes, add initial public API packages, add consequences. (@erikgrinaker) + +- 2020-09-17: Clarify initial public API. (@erikgrinaker) + +## Context + +With the release of Tendermint 1.0 we will adopt [semantic versioning](https://semver.org). One major implication is a guarantee that we will not make backwards-incompatible changes until Tendermint 2.0 (except in pre-release versions). In order to provide this guarantee for our Go API, we must clearly define which of our APIs are public, and what changes are considered backwards-compatible. + +Currently, we list packages that we consider public in our [README](https://github.com/tendermint/tendermint#versioning), but since we are still at version 0.x we do not provide any backwards compatiblity guarantees at all. + +### Glossary + +* **External project:** a different Git/VCS repository or code base. + +* **External package:** a different Go package, can be a child or sibling package in the same project. + +* **Internal code:** code not intended for use in external projects. + +* **Internal directory:** code under `internal/` which cannot be imported in external projects. + +* **Exported:** a Go identifier starting with an uppercase letter, which can therefore be accessed by an external package. + +* **Private:** a Go identifier starting with a lowercase letter, which therefore cannot be accessed by an external package unless via an exported field, variable, or function/method return value. + +* **Public API:** any Go identifier that can be imported or accessed by an external project, except test code in `_test.go` files. + +* **Private API:** any Go identifier that is not accessible via a public API, including all code in the internal directory. + +## Alternative Approaches + +- Split all public APIs out to separate Go modules in separate Git repositories, and consider all Tendermint code internal and not subject to API backwards compatibility at all. This was rejected, since it has been attempted by the Tendermint project earlier, resulting in too much dependency management overhead. + +- Simply document which APIs are public and which are private. This is the current approach, but users should not be expected to self-enforce this, the documentation is not always up-to-date, and external projects will often end up depending on internal code anyway. + +## Decision + +From Tendermint 1.0, all internal code (except private APIs) will be placed in a root-level [`internal` directory](https://golang.org/cmd/go/#hdr-Internal_Directories), which the Go compiler will block for use by external projects. All exported items outside of the `internal` directory are considered a public API and subject to backwards compatibility guarantees, except files ending in `_test.go`. + +The `crypto` package may be split out to a separate module in a separate repo. This is the main general-purpose package used by external projects, and is the only Tendermint dependency in e.g. IAVL which can cause some problems for projects depending on both IAVL and Tendermint. This will be decided after further discussion. + +The `tm-db` package will remain a separate module in a separate repo. The `crypto` package may possibly be split out, pending further discussion, as this is the main general-purpose package used by other projects. + +## Detailed Design + +### Public API + +When preparing our public API for 1.0, we should keep these principles in mind: + +- Limit the number of public APIs that we start out with - we can always add new APIs later, but we can't change or remove APIs once they're made public. + +- Before an API is made public, do a thorough review of the API to make sure it covers any future needs, can accomodate expected changes, and follows good API design practices. + +The following is the minimum set of public APIs that will be included in 1.0, in some form: + +- `abci` +- packages used for constructing nodes `config`, `libs/log`, and `version` +- Client APIs, i.e. `rpc/client`, `light`, and `privval`. +- `crypto` (possibly as a separate repo) + +We may offer additional APIs as well, following further discussions internally and with other stakeholders. However, public APIs for providing custom components (e.g. reactors and mempools) are not planned for 1.0, but may be added in a later 1.x version if this is something we want to offer. + +For comparison, the following are the number of Tendermint imports in the Cosmos SDK (excluding tests), which should be mostly satisfied by the planned APIs. + +``` + 1 github.com/tendermint/tendermint/abci/server + 73 github.com/tendermint/tendermint/abci/types + 2 github.com/tendermint/tendermint/cmd/tendermint/commands + 7 github.com/tendermint/tendermint/config + 68 github.com/tendermint/tendermint/crypto + 1 github.com/tendermint/tendermint/crypto/armor + 10 github.com/tendermint/tendermint/crypto/ed25519 + 2 github.com/tendermint/tendermint/crypto/encoding + 3 github.com/tendermint/tendermint/crypto/merkle + 3 github.com/tendermint/tendermint/crypto/sr25519 + 8 github.com/tendermint/tendermint/crypto/tmhash + 1 github.com/tendermint/tendermint/crypto/xsalsa20symmetric + 11 github.com/tendermint/tendermint/libs/bytes + 2 github.com/tendermint/tendermint/libs/bytes.HexBytes + 15 github.com/tendermint/tendermint/libs/cli + 2 github.com/tendermint/tendermint/libs/cli/flags + 2 github.com/tendermint/tendermint/libs/json + 30 github.com/tendermint/tendermint/libs/log + 1 github.com/tendermint/tendermint/libs/math + 11 github.com/tendermint/tendermint/libs/os + 4 github.com/tendermint/tendermint/libs/rand + 1 github.com/tendermint/tendermint/libs/strings + 5 github.com/tendermint/tendermint/light + 1 github.com/tendermint/tendermint/internal/mempool + 3 github.com/tendermint/tendermint/node + 5 github.com/tendermint/tendermint/internal/p2p + 4 github.com/tendermint/tendermint/privval + 10 github.com/tendermint/tendermint/proto/tendermint/crypto + 1 github.com/tendermint/tendermint/proto/tendermint/libs/bits + 24 github.com/tendermint/tendermint/proto/tendermint/types + 3 github.com/tendermint/tendermint/proto/tendermint/version + 2 github.com/tendermint/tendermint/proxy + 3 github.com/tendermint/tendermint/rpc/client + 1 github.com/tendermint/tendermint/rpc/client/http + 2 github.com/tendermint/tendermint/rpc/client/local + 3 github.com/tendermint/tendermint/rpc/core/types + 1 github.com/tendermint/tendermint/rpc/jsonrpc/server + 33 github.com/tendermint/tendermint/types + 2 github.com/tendermint/tendermint/types/time + 1 github.com/tendermint/tendermint/version +``` + +### Backwards-Compatible Changes + +In Go, [almost all API changes are backwards-incompatible](https://blog.golang.org/module-compatibility) and thus exported items in public APIs generally cannot be changed until Tendermint 2.0. The only backwards-compatible changes we can make to public APIs are: + +- Adding a package. + +- Adding a new identifier to the package scope (e.g. const, var, func, struct, interface, etc.). + +- Adding a new method to a struct. + +- Adding a new field to a struct, if the zero-value preserves any old behavior. + +- Changing the order of fields in a struct. + +- Adding a variadic parameter to a named function or struct method, if the function type itself is not assignable in any public APIs (e.g. a callback). + +- Adding a new method to an interface, or a variadic parameter to an interface method, _if the interface already has a private method_ (which prevents external packages from implementing it). + +- Widening a numeric type as long as it is a named type (e.g. `type Number int32` can change to `int64`, but not `int8` or `uint32`). + +Note that public APIs can expose private types (e.g. via an exported variable, field, or function/method return value), in which case the exported fields and methods on these private types are also part of the public API and covered by its backwards compatiblity guarantees. In general, private types should never be accessible via public APIs unless wrapped in an exported interface. + +Also note that if we accept, return, export, or embed types from a dependency, we assume the backwards compatibility responsibility for that dependency, and must make sure any dependency upgrades comply with the above constraints. + +We should run CI linters for minor version branches to enforce this, e.g. [apidiff](https://go.googlesource.com/exp/+/refs/heads/master/apidiff/README.md), [breakcheck](https://github.com/gbbr/breakcheck), and [apicombat](https://github.com/bradleyfalzon/apicompat). + +#### Accepted Breakage + +The above changes can still break programs in a few ways - these are _not_ considered backwards-incompatible changes, and users are advised to avoid this usage: + +- If a program uses unkeyed struct literals (e.g. `Foo{"bar", "baz"}`) and we add fields or change the field order, the program will no longer compile or may have logic errors. + +- If a program embeds two structs in a struct, and we add a new field or method to an embedded Tendermint struct which also exists in the other embedded struct, the program will no longer compile. + +- If a program compares two structs (e.g. with `==`), and we add a new field of an incomparable type (slice, map, func, or struct that contains these) to a Tendermint struct which is compared, the program will no longer compile. + +- If a program assigns a Tendermint function to an identifier, and we add a variadic parameter to the function signature, the program will no longer compile. + +### Strategies for API Evolution + +The API guarantees above can be fairly constraining, but are unavoidable given the Go language design. The following tricks can be employed where appropriate to allow us to make changes to the API: + +- We can add a new function or method with a different name that takes additional parameters, and have the old function call the new one. + +- Functions and methods can take an options struct instead of separate parameters, to allow adding new options - this is particularly suitable for functions that take many parameters and are expected to be extended, and especially for interfaces where we cannot add new methods with different parameters at all. + +- Interfaces can include a private method, e.g. `interface { private() }`, to make them unimplementable by external packages and thus allow us to add new methods to the interface without breaking other programs. Of course, this can't be used for interfaces that should be implementable externally. + +- We can use [interface upgrades](https://avtok.com/2014/11/05/interface-upgrades.html) to allow implementers of an existing interface to also implement a new interface, as long as the old interface can still be used - e.g. the new interface `BetterReader` may have a method `ReadBetter()`, and a function that takes a `Reader` interface as an input can check if the implementer also implements `BetterReader` and in that case call `ReadBetter()` instead of `Read()`. + +## Status + +Accepted + +## Consequences + +### Positive + +- Users can safely upgrade with less fear of applications breaking, and know whether an upgrade only includes bug fixes or also functional enhancements + +- External developers have a predictable and well-defined API to build on that will be supported for some time + +- Less synchronization between teams, since there is a clearer contract and timeline for changes and they happen less frequently + +- More documentation will remain accurate, since it's not chasing a moving target + +- Less time will be spent on code churn and more time spent on functional improvements, both for the community and for our teams + +### Negative + +- Many improvements, changes, and bug fixes will have to be postponed until the next major version, possibly for a year or more + +- The pace of development will slow down, since we must work within the existing API constraints, and spend more time planning public APIs + +- External developers may lose access to some currently exported APIs and functionality + +## References + +- [#4451: Place internal APIs under internal package](https://github.com/tendermint/tendermint/issues/4451) + +- [On Pluggability](https://docs.google.com/document/d/1G08LnwSyb6BAuCVSMF3EKn47CGdhZ5wPZYJQr4-bw58/edit?ts=5f609f11) diff --git a/docs/architecture/adr-061-p2p-refactor-scope.md b/docs/architecture/adr-061-p2p-refactor-scope.md new file mode 100644 index 000000000..7a9cb04be --- /dev/null +++ b/docs/architecture/adr-061-p2p-refactor-scope.md @@ -0,0 +1,109 @@ +# ADR 061: P2P Refactor Scope + +## Changelog + +- 2020-10-30: Initial version (@erikgrinaker) + +## Context + +The `p2p` package responsible for peer-to-peer networking is rather old and has a number of weaknesses, including tight coupling, leaky abstractions, lack of tests, DoS vulnerabilites, poor performance, custom protocols, and incorrect behavior. A refactor has been discussed for several years ([#2067](https://github.com/tendermint/tendermint/issues/2067)). + +Informal Systems are also building a Rust implementation of Tendermint, [Tendermint-rs](https://github.com/informalsystems/tendermint-rs), and plan to implement P2P networking support over the next year. As part of this work, they have requested adopting e.g. [QUIC](https://datatracker.ietf.org/doc/draft-ietf-quic-transport/) as a transport protocol instead of implementing the custom application-level `MConnection` stream multiplexing protocol that Tendermint currently uses. + +This ADR summarizes recent discussion with stakeholders on the scope of a P2P refactor. Specific designs and implementations will be submitted as separate ADRs. + +## Alternative Approaches + +There have been recurring proposals to adopt [LibP2P](https://libp2p.io) instead of maintaining our own P2P networking stack (see [#3696](https://github.com/tendermint/tendermint/issues/3696)). While this appears to be a good idea in principle, it would be a highly breaking protocol change, there are indications that we might have to fork and modify LibP2P, and there are concerns about the abstractions used. + +In discussions with Informal Systems we decided to begin with incremental improvements to the current P2P stack, add support for pluggable transports, and then gradually start experimenting with LibP2P as a transport layer. If this proves successful, we can consider adopting it for higher-level components at a later time. + +## Decision + +The P2P stack will be refactored and improved iteratively, in several phases: + +* **Phase 1:** code and API refactoring, maintaining protocol compatibility as far as possible. + +* **Phase 2:** additional transports and incremental protocol improvements. + +* **Phase 3:** disruptive protocol changes. + +The scope of phases 2 and 3 is still uncertain, and will be revisited once the preceding phases have been completed as we'll have a better sense of requirements and challenges. + +## Detailed Design + +Separate ADRs will be submitted for specific designs and changes in each phase, following research and prototyping. Below are objectives in order of priority. + +### Phase 1: Code and API Refactoring + +This phase will focus on improving the internal abstractions and implementations in the `p2p` package. As far as possible, it should not change the P2P protocol in a backwards-incompatible way. + +* Cleaner, decoupled abstractions for e.g. `Reactor`, `Switch`, and `Peer`. [#2067](https://github.com/tendermint/tendermint/issues/2067) [#5287](https://github.com/tendermint/tendermint/issues/5287) [#3833](https://github.com/tendermint/tendermint/issues/3833) + * Reactors should receive messages in separate goroutines or via buffered channels. [#2888](https://github.com/tendermint/tendermint/issues/2888) +* Improved peer lifecycle management. [#3679](https://github.com/tendermint/tendermint/issues/3679) [#3719](https://github.com/tendermint/tendermint/issues/3719) [#3653](https://github.com/tendermint/tendermint/issues/3653) [#3540](https://github.com/tendermint/tendermint/issues/3540) [#3183](https://github.com/tendermint/tendermint/issues/3183) [#3081](https://github.com/tendermint/tendermint/issues/3081) [#1356](https://github.com/tendermint/tendermint/issues/1356) + * Peer prioritization. [#2860](https://github.com/tendermint/tendermint/issues/2860) [#2041](https://github.com/tendermint/tendermint/issues/2041) +* Pluggable transports, with `MConnection` as one implementation. [#5587](https://github.com/tendermint/tendermint/issues/5587) [#2430](https://github.com/tendermint/tendermint/issues/2430) [#805](https://github.com/tendermint/tendermint/issues/805) +* Improved peer address handling. + * Address book refactor. [#4848](https://github.com/tendermint/tendermint/issues/4848) [#2661](https://github.com/tendermint/tendermint/issues/2661) + * Transport-agnostic peer addressing. [#5587](https://github.com/tendermint/tendermint/issues/5587) [#3782](https://github.com/tendermint/tendermint/issues/3782) [#3692](https://github.com/tendermint/tendermint/issues/3692) + * Improved detection and advertisement of own address. [#5588](https://github.com/tendermint/tendermint/issues/5588) [#4260](https://github.com/tendermint/tendermint/issues/4260) [#3716](https://github.com/tendermint/tendermint/issues/3716) [#1727](https://github.com/tendermint/tendermint/issues/1727) + * Support multiple IPs per peer. [#1521](https://github.com/tendermint/tendermint/issues/1521) [#2317](https://github.com/tendermint/tendermint/issues/2317) + +The refactor should attempt to address the following secondary objectives: testability, observability, performance, security, quality-of-service, backpressure, and DoS resilience. Much of this will be revisited as explicit objectives in phase 2. + +Ideally, the refactor should happen incrementally, with regular merges to `master` every few weeks. This will take more time overall, and cause frequent breaking changes to internal Go APIs, but it reduces the branch drift and gets the code tested sooner and more broadly. + +### Phase 2: Additional Transports and Protocol Improvements + +This phase will focus on protocol improvements and other breaking changes. The following are considered proposals that will need to be evaluated separately once the refactor is done. Additional proposals are likely to be added during phase 1. + +* QUIC transport. [#198](https://github.com/tendermint/spec/issues/198) +* Noise protocol for secret connection handshake. [#5589](https://github.com/tendermint/tendermint/issues/5589) [#3340](https://github.com/tendermint/tendermint/issues/3340) +* Peer ID in connection handshake. [#5590](https://github.com/tendermint/tendermint/issues/5590) +* Peer and service discovery (e.g. RPC nodes, state sync snapshots). [#5481](https://github.com/tendermint/tendermint/issues/5481) [#4583](https://github.com/tendermint/tendermint/issues/4583) +* Rate-limiting, backpressure, and QoS scheduling. [#4753](https://github.com/tendermint/tendermint/issues/4753) [#2338](https://github.com/tendermint/tendermint/issues/2338) +* Compression. [#2375](https://github.com/tendermint/tendermint/issues/2375) +* Improved metrics and tracing. [#3849](https://github.com/tendermint/tendermint/issues/3849) [#2600](https://github.com/tendermint/tendermint/issues/2600) +* Simplified P2P configuration options. + +### Phase 3: Disruptive Protocol Changes + +This phase covers speculative, wide-reaching proposals that are poorly defined and highly uncertain. They will be evaluated once the previous phases are done. + +* Adopt LibP2P. [#3696](https://github.com/tendermint/tendermint/issues/3696) +* Allow cross-reactor communication, possibly without channels. +* Dynamic channel advertisment, as reactors are enabled/disabled. [#4394](https://github.com/tendermint/tendermint/issues/4394) [#1148](https://github.com/tendermint/tendermint/issues/1148) +* Pubsub-style networking topology and pattern. +* Support multiple chain IDs in the same network. + +## Status + +Accepted + +## Consequences + +### Positive + +* Cleaner, simpler architecture that's easier to reason about and test, and thus hopefully less buggy. + +* Improved performance and robustness. + +* Reduced maintenance burden and increased interoperability by the possible adoption of standardized protocols such as QUIC and Noise. + +* Improved usability, with better observability, simpler configuration, and more automation (e.g. peer/service/address discovery, rate-limiting, and backpressure). + +### Negative + +* Maintaining our own P2P networking stack is resource-intensive. + +* Abstracting away the underlying transport may prevent usage of advanced transport features. + +* Breaking changes to APIs and protocols are disruptive to users. + +## References + +See issue links above. + +- [#2067: P2P Refactor](https://github.com/tendermint/tendermint/issues/2067) + +- [P2P refactor brainstorm document](https://docs.google.com/document/d/1FUTADZyLnwA9z7ndayuhAdAFRKujhh_y73D0ZFdKiOQ/edit?pli=1#) diff --git a/docs/architecture/adr-062-p2p-architecture.md b/docs/architecture/adr-062-p2p-architecture.md new file mode 100644 index 000000000..bf8e0f01d --- /dev/null +++ b/docs/architecture/adr-062-p2p-architecture.md @@ -0,0 +1,615 @@ +# ADR 062: P2P Architecture and Abstractions + +## Changelog + +- 2020-11-09: Initial version (@erikgrinaker) + +- 2020-11-13: Remove stream IDs, move peer errors onto channel, note on moving PEX into core (@erikgrinaker) + +- 2020-11-16: Notes on recommended reactor implementation patterns, approve ADR (@erikgrinaker) + +- 2021-02-04: Update with new P2P core and Transport API changes (@erikgrinaker). + +## Context + +In [ADR 061](adr-061-p2p-refactor-scope.md) we decided to refactor the peer-to-peer (P2P) networking stack. The first phase is to redesign and refactor the internal P2P architecture, while retaining protocol compatibility as far as possible. + +## Alternative Approaches + +Several variations of the proposed design were considered, including e.g. calling interface methods instead of passing messages (like the current architecture), merging channels with streams, exposing the internal peer data structure to reactors, being message format-agnostic via arbitrary codecs, and so on. This design was chosen because it has very loose coupling, is simpler to reason about and more convenient to use, avoids race conditions and lock contention for internal data structures, gives reactors better control of message ordering and processing semantics, and allows for QoS scheduling and backpressure in a very natural way. + +[multiaddr](https://github.com/multiformats/multiaddr) was considered as a transport-agnostic peer address format over regular URLs, but it does not appear to have very widespread adoption, and advanced features like protocol encapsulation and tunneling do not appear to be immediately useful to us. + +There were also proposals to use LibP2P instead of maintaining our own P2P stack, which were rejected (for now) in [ADR 061](adr-061-p2p-refactor-scope.md). + +The initial version of this ADR had a byte-oriented multi-stream transport API, but this had to be abandoned/postponed to maintain backwards-compatibility with the existing MConnection protocol which is message-oriented. See the rejected RFC in [tendermint/spec#227](https://github.com/tendermint/spec/pull/227) for details. + +## Decision + +The P2P stack will be redesigned as a message-oriented architecture, primarily relying on Go channels for communication and scheduling. It will use a message-oriented transport to binary messages with individual peers, bidirectional peer-addressable channels to send and receive Protobuf messages, a router to route messages between reactors and peers, and a peer manager to manage peer lifecycle information. Message passing is asynchronous with at-most-once delivery. + +## Detailed Design + +This ADR is primarily concerned with the architecture and interfaces of the P2P stack, not implementation details. The interfaces described here should therefore be considered a rough architecture outline, not a complete and final design. + +Primary design objectives have been: + +* Loose coupling between components, for a simpler, more robust, and test-friendly architecture. +* Pluggable transports (not necessarily networked). +* Better scheduling of messages, with improved prioritization, backpressure, and performance. +* Centralized peer lifecycle and connection management. +* Better peer address detection, advertisement, and exchange. +* Wire-level backwards compatibility with current P2P network protocols, except where it proves too obstructive. + +The main abstractions in the new stack are: + +* `Transport`: An arbitrary mechanism to exchange binary messages with a peer across a `Connection`. +* `Channel`: A bidirectional channel to asynchronously exchange Protobuf messages with peers using node ID addressing. +* `Router`: Maintains transport connections to relevant peers and routes channel messages. +* `PeerManager`: Manages peer lifecycle information, e.g. deciding which peers to dial and when, using a `peerStore` for storage. +* Reactor: A design pattern loosely defined as "something which listens on a channel and reacts to messages". + +These abstractions are illustrated in the following diagram (representing the internals of node A) and described in detail below. + +![P2P Architecture Diagram](img/adr-062-architecture.svg) + +### Transports + +Transports are arbitrary mechanisms for exchanging binary messages with a peer. For example, a gRPC transport would connect to a peer over TCP/IP and send data using the gRPC protocol, while an in-memory transport might communicate with a peer running in another goroutine using internal Go channels. Note that transports don't have a notion of a "peer" or "node" as such - instead, they establish connections between arbitrary endpoint addresses (e.g. IP address and port number), to decouple them from the rest of the P2P stack. + +Transports must satisfy the following requirements: + +* Be connection-oriented, and support both listening for inbound connections and making outbound connections using endpoint addresses. + +* Support sending binary messages with distinct channel IDs (although channels and channel IDs are a higher-level application protocol concept explained in the Router section, they are threaded through the transport layer as well for backwards compatibilty with the existing MConnection protocol). + +* Exchange the MConnection `NodeInfo` and public key via a node handshake, and possibly encrypt or sign the traffic as appropriate. + +The initial transport is a port of the current MConnection protocol currently used by Tendermint, and should be backwards-compatible at the wire level. An in-memory transport for testing has also been implemented. There are plans to explore a QUIC transport that may replace the MConnection protocol. + +The `Transport` interface is as follows: + +```go +// Transport is a connection-oriented mechanism for exchanging data with a peer. +type Transport interface { + // Protocols returns the protocols supported by the transport. The Router + // uses this to pick a transport for an Endpoint. + Protocols() []Protocol + + // Endpoints returns the local endpoints the transport is listening on, if any. + // How to listen is transport-dependent, e.g. MConnTransport uses Listen() while + // MemoryTransport starts listening via MemoryNetwork.CreateTransport(). + Endpoints() []Endpoint + + // Accept waits for the next inbound connection on a listening endpoint, blocking + // until either a connection is available or the transport is closed. On closure, + // io.EOF is returned and further Accept calls are futile. + Accept() (Connection, error) + + // Dial creates an outbound connection to an endpoint. + Dial(context.Context, Endpoint) (Connection, error) + + // Close stops accepting new connections, but does not close active connections. + Close() error +} +``` + +How the transport configures listening is transport-dependent, and not covered by the interface. This typically happens during transport construction, where a single instance of the transport is created and set to listen on an appropriate network interface before being passed to the router. + +#### Endpoints + +`Endpoint` represents a transport endpoint (e.g. an IP address and port). A connection always has two endpoints: one at the local node and one at the remote peer. Outbound connections to remote endpoints are made via `Dial()`, and inbound connections to listening endpoints are returned via `Accept()`. + +The `Endpoint` struct is: + +```go +// Endpoint represents a transport connection endpoint, either local or remote. +// +// Endpoints are not necessarily networked (see e.g. MemoryTransport) but all +// networked endpoints must use IP as the underlying transport protocol to allow +// e.g. IP address filtering. Either IP or Path (or both) must be set. +type Endpoint struct { + // Protocol specifies the transport protocol. + Protocol Protocol + + // IP is an IP address (v4 or v6) to connect to. If set, this defines the + // endpoint as a networked endpoint. + IP net.IP + + // Port is a network port (either TCP or UDP). If 0, a default port may be + // used depending on the protocol. + Port uint16 + + // Path is an optional transport-specific path or identifier. + Path string +} + +// Protocol identifies a transport protocol. +type Protocol string +``` + +Endpoints are arbitrary transport-specific addresses, but if they are networked they must use IP addresses and thus rely on IP as a fundamental packet routing protocol. This enables policies for address discovery, advertisement, and exchange - for example, a private `192.168.0.0/24` IP address should only be advertised to peers on that IP network, while the public address `8.8.8.8` may be advertised to all peers. Similarly, any port numbers if given must represent TCP and/or UDP port numbers, in order to use [UPnP](https://en.wikipedia.org/wiki/Universal_Plug_and_Play) to autoconfigure e.g. NAT gateways. + +Non-networked endpoints (without an IP address) are considered local, and will only be advertised to other peers connecting via the same protocol. For example, the in-memory transport used for testing uses `Endpoint{Protocol: "memory", Path: "foo"}` as an address for the node "foo", and this should only be advertised to other nodes using `Protocol: "memory"`. + +#### Connections + +A connection represents an established transport connection between two endpoints (i.e. two nodes), which can be used to exchange binary messages with logical channel IDs (corresponding to the higher-level channel IDs used in the router). Connections are set up either via `Transport.Dial()` (outbound) or `Transport.Accept()` (inbound). + +Once a connection is esablished, `Transport.Handshake()` must be called to perform a node handshake, exchanging node info and public keys to verify node identities. Node handshakes should not really be part of the transport layer (it's an application protocol concern), this exists for backwards-compatibility with the existing MConnection protocol which conflates the two. `NodeInfo` is part of the existing MConnection protocol, but does not appear to be documented in the specification -- refer to the Go codebase for details. + +The `Connection` interface is shown below. It omits certain additions that are currently implemented for backwards compatibility with the legacy P2P stack and are planned to be removed before the final release. + +```go +// Connection represents an established connection between two endpoints. +type Connection interface { + // Handshake executes a node handshake with the remote peer. It must be + // called once the connection is established, and returns the remote peer's + // node info and public key. The caller is responsible for validation. + Handshake(context.Context, NodeInfo, crypto.PrivKey) (NodeInfo, crypto.PubKey, error) + + // ReceiveMessage returns the next message received on the connection, + // blocking until one is available. Returns io.EOF if closed. + ReceiveMessage() (ChannelID, []byte, error) + + // SendMessage sends a message on the connection. Returns io.EOF if closed. + SendMessage(ChannelID, []byte) error + + // LocalEndpoint returns the local endpoint for the connection. + LocalEndpoint() Endpoint + + // RemoteEndpoint returns the remote endpoint for the connection. + RemoteEndpoint() Endpoint + + // Close closes the connection. + Close() error +} +``` + +This ADR initially proposed a byte-oriented multi-stream connection API that follows more typical networking API conventions (using e.g. `io.Reader` and `io.Writer` interfaces which easily compose with other libraries). This would also allow moving the responsibility for message framing, node handshakes, and traffic scheduling to the common router instead of reimplementing this across transports, and would allow making better use of multi-stream protocols such as QUIC. However, this would require minor breaking changes to the MConnection protocol which were rejected, see [tendermint/spec#227](https://github.com/tendermint/spec/pull/227) for details. This should be revisited when starting work on a QUIC transport. + +### Peer Management + +Peers are other Tendermint nodes. Each peer is identified by a unique `NodeID` (tied to the node's private key). + +#### Peer Addresses + +Nodes have one or more `NodeAddress` addresses expressed as URLs that they can be reached at. Examples of node addresses might be e.g.: + +* `mconn://nodeid@host.domain.com:25567/path` +* `memory:nodeid` + +Addresses are resolved into one or more transport endpoints, e.g. by resolving DNS hostnames into IP addresses. Peers should always be expressed as address URLs rather than endpoints (which are a lower-level transport construct). + +```go +// NodeID is a hex-encoded crypto.Address. It must be lowercased +// (for uniqueness) and of length 40. +type NodeID string + +// NodeAddress is a node address URL. It differs from a transport Endpoint in +// that it contains the node's ID, and that the address hostname may be resolved +// into multiple IP addresses (and thus multiple endpoints). +// +// If the URL is opaque, i.e. of the form "scheme:opaque", then the opaque part +// is expected to contain a node ID. +type NodeAddress struct { + NodeID NodeID + Protocol Protocol + Hostname string + Port uint16 + Path string +} + +// ParseNodeAddress parses a node address URL into a NodeAddress, normalizing +// and validating it. +func ParseNodeAddress(urlString string) (NodeAddress, error) + +// Resolve resolves a NodeAddress into a set of Endpoints, e.g. by expanding +// out a DNS hostname to IP addresses. +func (a NodeAddress) Resolve(ctx context.Context) ([]Endpoint, error) +``` + +#### Peer Manager + +The P2P stack needs to track a lot of internal state about peers, such as their addresses, connection state, priorities, availability, failures, retries, and so on. This responsibility has been separated out to a `PeerManager`, which track this state for the `Router` (but does not maintain the actual transport connections themselves, which is the router's responsibility). + +The `PeerManager` is a synchronous state machine, where all state transitions are serialized (implemented as synchronous method calls holding an exclusive mutex lock). Most peer state is intentionally kept internal, stored in a `peerStore` database that persists it as appropriate, and the external interfaces pass the minimum amount of information necessary in order to avoid shared state between router goroutines. This design significantly simplifies the model, making it much easier to reason about and test than if it was baked into the asynchronous ball of concurrency that the P2P networking core must necessarily be. As peer lifecycle events are expected to be relatively infrequent, this should not significantly impact performance either. + +The `Router` uses the `PeerManager` to request which peers to dial and evict, and reports in with peer lifecycle events such as connections, disconnections, and failures as they occur. The manager can reject these events (e.g. reject an inbound connection) by returning errors. This happens as follows: + +* Outbound connections, via `Transport.Dial`: + * `DialNext()`: returns a peer address to dial, or blocks until one is available. + * `DialFailed()`: reports a peer dial failure. + * `Dialed()`: reports a peer dial success. + * `Ready()`: reports the peer as routed and ready. + * `Disconnected()`: reports a peer disconnection. + +* Inbound connections, via `Transport.Accept`: + * `Accepted()`: reports an inbound peer connection. + * `Ready()`: reports the peer as routed and ready. + * `Disconnected()`: reports a peer disconnection. + +* Evictions, via `Connection.Close`: + * `EvictNext()`: returns a peer to disconnect, or blocks until one is available. + * `Disconnected()`: reports a peer disconnection. + +These calls have the following interface: + +```go +// DialNext returns a peer address to dial, blocking until one is available. +func (m *PeerManager) DialNext(ctx context.Context) (NodeAddress, error) + +// DialFailed reports a dial failure for the given address. +func (m *PeerManager) DialFailed(address NodeAddress) error + +// Dialed reports a successful outbound connection to the given address. +func (m *PeerManager) Dialed(address NodeAddress) error + +// Accepted reports a successful inbound connection from the given node. +func (m *PeerManager) Accepted(peerID NodeID) error + +// Ready reports the peer as fully routed and ready for use. +func (m *PeerManager) Ready(peerID NodeID) error + +// EvictNext returns a peer ID to disconnect, blocking until one is available. +func (m *PeerManager) EvictNext(ctx context.Context) (NodeID, error) + +// Disconnected reports a peer disconnection. +func (m *PeerManager) Disconnected(peerID NodeID) error +``` + +Internally, the `PeerManager` uses a numeric peer score to prioritize peers, e.g. when deciding which peers to dial next. The scoring policy has not yet been implemented, but should take into account e.g. node configuration such a `persistent_peers`, uptime and connection failures, performance, and so on. The manager will also attempt to automatically upgrade to better-scored peers by evicting lower-scored peers when a better one becomes available (e.g. when a persistent peer comes back online after an outage). + +The `PeerManager` should also have an API for reporting peer behavior from reactors that affects its score (e.g. signing a block increases the score, double-voting decreases it or even bans the peer), but this has not yet been designed and implemented. + +Additionally, the `PeerManager` provides `PeerUpdates` subscriptions that will receive `PeerUpdate` events whenever significant peer state changes happen. Reactors can use these e.g. to know when peers are connected or disconnected, and take appropriate action. This is currently fairly minimal: + +```go +// Subscribe subscribes to peer updates. The caller must consume the peer updates +// in a timely fashion and close the subscription when done, to avoid stalling the +// PeerManager as delivery is semi-synchronous, guaranteed, and ordered. +func (m *PeerManager) Subscribe() *PeerUpdates + +// PeerUpdate is a peer update event sent via PeerUpdates. +type PeerUpdate struct { + NodeID NodeID + Status PeerStatus +} + +// PeerStatus is a peer status. +type PeerStatus string + +const ( + PeerStatusUp PeerStatus = "up" // Connected and ready. + PeerStatusDown PeerStatus = "down" // Disconnected. +) + +// PeerUpdates is a real-time peer update subscription. +type PeerUpdates struct { ... } + +// Updates returns a channel for consuming peer updates. +func (pu *PeerUpdates) Updates() <-chan PeerUpdate + +// Close closes the peer updates subscription. +func (pu *PeerUpdates) Close() +``` + +The `PeerManager` will also be responsible for providing peer information to the PEX reactor that can be gossipped to other nodes. This requires an improved system for peer address detection and advertisement, that e.g. reliably detects peer and self addresses and only gossips private network addresses to other peers on the same network, but this system has not yet been fully designed and implemented. + +### Channels + +While low-level data exchange happens via the `Transport`, the high-level API is based on a bidirectional `Channel` that can send and receive Protobuf messages addressed by `NodeID`. A channel is identified by an arbitrary `ChannelID` identifier, and can exchange Protobuf messages of one specific type (since the type to unmarshal into must be predefined). Message delivery is asynchronous and at-most-once. + +The channel can also be used to report peer errors, e.g. when receiving an invalid or malignant message. This may cause the peer to be disconnected or banned depending on `PeerManager` policy, but should probably be replaced by a broader peer behavior API that can also report good behavior. + +A `Channel` has this interface: + +```go +// ChannelID is an arbitrary channel ID. +type ChannelID uint16 + +// Channel is a bidirectional channel to exchange Protobuf messages with peers. +type Channel struct { + ID ChannelID // Channel ID. + In <-chan Envelope // Inbound messages (peers to reactors). + Out chan<- Envelope // outbound messages (reactors to peers) + Error chan<- PeerError // Peer error reporting. + messageType proto.Message // Channel's message type, for e.g. unmarshaling. +} + +// Close closes the channel, also closing Out and Error. +func (c *Channel) Close() error + +// Envelope specifies the message receiver and sender. +type Envelope struct { + From NodeID // Sender (empty if outbound). + To NodeID // Receiver (empty if inbound). + Broadcast bool // Send to all connected peers, ignoring To. + Message proto.Message // Message payload. +} + +// PeerError is a peer error reported via the Error channel. +type PeerError struct { + NodeID NodeID + Err error +} +``` + +A channel can reach any connected peer, and will automatically (un)marshal the Protobuf messages. Message scheduling and queueing is a `Router` implementation concern, and can use any number of algorithms such as FIFO, round-robin, priority queues, etc. Since message delivery is not guaranteed, both inbound and outbound messages may be dropped, buffered, reordered, or blocked as appropriate. + +Since a channel can only exchange messages of a single type, it is often useful to use a wrapper message type with e.g. a Protobuf `oneof` field that specifies a set of inner message types that it can contain. The channel can automatically perform this (un)wrapping if the outer message type implements the `Wrapper` interface (see [Reactor Example](#reactor-example) for an example): + +```go +// Wrapper is a Protobuf message that can contain a variety of inner messages. +// If a Channel's message type implements Wrapper, the channel will +// automatically (un)wrap passed messages using the container type, such that +// the channel can transparently support multiple message types. +type Wrapper interface { + proto.Message + + // Wrap will take a message and wrap it in this one. + Wrap(proto.Message) error + + // Unwrap will unwrap the inner message contained in this message. + Unwrap() (proto.Message, error) +} +``` + +### Routers + +The router exeutes P2P networking for a node, taking instructions from and reporting events to the `PeerManager`, maintaining transport connections to peers, and routing messages between channels and peers. + +Practically all concurrency in the P2P stack has been moved into the router and reactors, while as many other responsibilities as possible have been moved into separate components such as the `Transport` and `PeerManager` that can remain largely synchronous. Limiting concurrency to a single core component makes it much easier to reason about since there is only a single concurrency structure, while the remaining components can be serial, simple, and easily testable. + +The `Router` has a very minimal API, since it is mostly driven by `PeerManager` and `Transport` events: + +```go +// Router maintains peer transport connections and routes messages between +// peers and channels. +type Router struct { + // Some details have been omitted below. + + logger log.Logger + options RouterOptions + nodeInfo NodeInfo + privKey crypto.PrivKey + peerManager *PeerManager + transports []Transport + + peerMtx sync.RWMutex + peerQueues map[NodeID]queue + + channelMtx sync.RWMutex + channelQueues map[ChannelID]queue +} + +// OpenChannel opens a new channel for the given message type. The caller must +// close the channel when done, before stopping the Router. messageType is the +// type of message passed through the channel. +func (r *Router) OpenChannel(id ChannelID, messageType proto.Message) (*Channel, error) + +// Start starts the router, connecting to peers and routing messages. +func (r *Router) Start() error + +// Stop stops the router, disconnecting from all peers and stopping message routing. +func (r *Router) Stop() error +``` + +All Go channel sends in the `Router` and reactors are blocking (the router also selects on signal channels for closure and shutdown). The responsibility for message scheduling, prioritization, backpressure, and load shedding is centralized in a core `queue` interface that is used at contention points (i.e. from all peers to a single channel, and from all channels to a single peer): + +```go +// queue does QoS scheduling for Envelopes, enqueueing and dequeueing according +// to some policy. Queues are used at contention points, i.e.: +// - Receiving inbound messages to a single channel from all peers. +// - Sending outbound messages to a single peer from all channels. +type queue interface { + // enqueue returns a channel for submitting envelopes. + enqueue() chan<- Envelope + + // dequeue returns a channel ordered according to some queueing policy. + dequeue() <-chan Envelope + + // close closes the queue. After this call enqueue() will block, so the + // caller must select on closed() as well to avoid blocking forever. The + // enqueue() and dequeue() channels will not be closed. + close() + + // closed returns a channel that's closed when the scheduler is closed. + closed() <-chan struct{} +} +``` + +The current implementation is `fifoQueue`, which is a simple unbuffered lossless queue that passes messages in the order they were received and blocks until the message is delivered (i.e. it is a Go channel). The router will need a more sophisticated queueing policy, but this has not yet been implemented. + +The internal `Router` goroutine structure and design is described in the `Router` GoDoc, which is included below for reference: + +```go +// On startup, three main goroutines are spawned to maintain peer connections: +// +// dialPeers(): in a loop, calls PeerManager.DialNext() to get the next peer +// address to dial and spawns a goroutine that dials the peer, handshakes +// with it, and begins to route messages if successful. +// +// acceptPeers(): in a loop, waits for an inbound connection via +// Transport.Accept() and spawns a goroutine that handshakes with it and +// begins to route messages if successful. +// +// evictPeers(): in a loop, calls PeerManager.EvictNext() to get the next +// peer to evict, and disconnects it by closing its message queue. +// +// When a peer is connected, an outbound peer message queue is registered in +// peerQueues, and routePeer() is called to spawn off two additional goroutines: +// +// sendPeer(): waits for an outbound message from the peerQueues queue, +// marshals it, and passes it to the peer transport which delivers it. +// +// receivePeer(): waits for an inbound message from the peer transport, +// unmarshals it, and passes it to the appropriate inbound channel queue +// in channelQueues. +// +// When a reactor opens a channel via OpenChannel, an inbound channel message +// queue is registered in channelQueues, and a channel goroutine is spawned: +// +// routeChannel(): waits for an outbound message from the channel, looks +// up the recipient peer's outbound message queue in peerQueues, and submits +// the message to it. +// +// All channel sends in the router are blocking. It is the responsibility of the +// queue interface in peerQueues and channelQueues to prioritize and drop +// messages as appropriate during contention to prevent stalls and ensure good +// quality of service. +``` + +### Reactor Example + +While reactors are a first-class concept in the current P2P stack (i.e. there is an explicit `p2p.Reactor` interface), they will simply be a design pattern in the new stack, loosely defined as "something which listens on a channel and reacts to messages". + +Since reactors have very few formal constraints, they can be implemented in a variety of ways. There is currently no recommended pattern for implementing reactors, to avoid overspecification and scope creep in this ADR. However, prototyping and developing a reactor pattern should be done early during implementation, to make sure reactors built using the `Channel` interface can satisfy the needs for convenience, deterministic tests, and reliability. + +Below is a trivial example of a simple echo reactor implemented as a function. The reactor will exchange the following Protobuf messages: + +```protobuf +message EchoMessage { + oneof inner { + PingMessage ping = 1; + PongMessage pong = 2; + } +} + +message PingMessage { + string content = 1; +} + +message PongMessage { + string content = 1; +} +``` + +Implementing the `Wrapper` interface for `EchoMessage` allows transparently passing `PingMessage` and `PongMessage` through the channel, where it will automatically be (un)wrapped in an `EchoMessage`: + +```go +func (m *EchoMessage) Wrap(inner proto.Message) error { + switch inner := inner.(type) { + case *PingMessage: + m.Inner = &EchoMessage_PingMessage{Ping: inner} + case *PongMessage: + m.Inner = &EchoMessage_PongMessage{Pong: inner} + default: + return fmt.Errorf("unknown message %T", inner) + } + return nil +} + +func (m *EchoMessage) Unwrap() (proto.Message, error) { + switch inner := m.Inner.(type) { + case *EchoMessage_PingMessage: + return inner.Ping, nil + case *EchoMessage_PongMessage: + return inner.Pong, nil + default: + return nil, fmt.Errorf("unknown message %T", inner) + } +} +``` + +The reactor itself would be implemented e.g. like this: + +```go +// RunEchoReactor wires up an echo reactor to a router and runs it. +func RunEchoReactor(router *p2p.Router, peerManager *p2p.PeerManager) error { + channel, err := router.OpenChannel(1, &EchoMessage{}) + if err != nil { + return err + } + defer channel.Close() + peerUpdates := peerManager.Subscribe() + defer peerUpdates.Close() + + return EchoReactor(context.Background(), channel, peerUpdates) +} + +// EchoReactor provides an echo service, pinging all known peers until the given +// context is canceled. +func EchoReactor(ctx context.Context, channel *p2p.Channel, peerUpdates *p2p.PeerUpdates) error { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + for { + select { + // Send ping message to all known peers every 5 seconds. + case <-ticker.C: + channel.Out <- Envelope{ + Broadcast: true, + Message: &PingMessage{Content: "👋"}, + } + + // When we receive a message from a peer, either respond to ping, output + // pong, or report peer error on unknown message type. + case envelope := <-channel.In: + switch msg := envelope.Message.(type) { + case *PingMessage: + channel.Out <- Envelope{ + To: envelope.From, + Message: &PongMessage{Content: msg.Content}, + } + + case *PongMessage: + fmt.Printf("%q replied with %q\n", envelope.From, msg.Content) + + default: + channel.Error <- PeerError{ + PeerID: envelope.From, + Err: fmt.Errorf("unexpected message %T", msg), + } + } + + // Output info about any peer status changes. + case peerUpdate := <-peerUpdates: + fmt.Printf("Peer %q changed status to %q", peerUpdate.PeerID, peerUpdate.Status) + + // Exit when context is canceled. + case <-ctx.Done(): + return nil + } + } +} +``` + +## Status + +Partially implemented ([#5670](https://github.com/tendermint/tendermint/issues/5670)) + +## Consequences + +### Positive + +* Reduced coupling and simplified interfaces should lead to better understandability, increased reliability, and more testing. + +* Using message passing via Go channels gives better control of backpressure and quality-of-service scheduling. + +* Peer lifecycle and connection management is centralized in a single entity, making it easier to reason about. + +* Detection, advertisement, and exchange of node addresses will be improved. + +* Additional transports (e.g. QUIC) can be implemented and used in parallel with the existing MConn protocol. + +* The P2P protocol will not be broken in the initial version, if possible. + +### Negative + +* Fully implementing the new design as indended is likely to require breaking changes to the P2P protocol at some point, although the initial implementation shouldn't. + +* Gradually migrating the existing stack and maintaining backwards-compatibility will be more labor-intensive than simply replacing the entire stack. + +* A complete overhaul of P2P internals is likely to cause temporary performance regressions and bugs as the implementation matures. + +* Hiding peer management information inside the `PeerManager` may prevent certain functionality or require additional deliberate interfaces for information exchange, as a tradeoff to simplify the design, reduce coupling, and avoid race conditions and lock contention. + +### Neutral + +* Implementation details around e.g. peer management, message scheduling, and peer and endpoint advertisement are not yet determined. + +## References + +* [ADR 061: P2P Refactor Scope](adr-061-p2p-refactor-scope.md) +* [#5670 p2p: internal refactor and architecture redesign](https://github.com/tendermint/tendermint/issues/5670) diff --git a/docs/architecture/adr-063-privval-grpc.md b/docs/architecture/adr-063-privval-grpc.md new file mode 100644 index 000000000..53fdb129c --- /dev/null +++ b/docs/architecture/adr-063-privval-grpc.md @@ -0,0 +1,109 @@ +# ADR 063: Privval gRPC + +## Changelog + +- 23/11/2020: Initial Version (@marbar3778) + +## Context + +Validators use remote signers to help secure their keys. This system is Tendermint's recommended way to secure validators, but the path to integration with Tendermint's private validator client is plagued with custom protocols. + +Tendermint uses its own custom secure connection protocol (`SecretConnection`) and a raw tcp/unix socket connection protocol. The secure connection protocol until recently was exposed to man in the middle attacks and can take longer to integrate if not using Golang. The raw tcp connection protocol is less custom, but has been causing minute issues with users. + +Migrating Tendermint's private validator client to a widely adopted protocol, gRPC, will ease the current maintenance and integration burden experienced with the current protocol. + +## Decision + +After discussing with multiple stake holders, [gRPC](https://grpc.io/) was decided on to replace the current private validator protocol. gRPC is a widely adopted protocol in the micro-service and cloud infrastructure world. gRPC uses [protocol-buffers](https://developers.google.com/protocol-buffers) to describe its services, providing a language agnostic implementation. Tendermint uses protobuf for on disk and over the wire encoding already making the integration with gRPC simpler. + +## Alternative Approaches + +- JSON-RPC: We did not consider JSON-RPC because Tendermint uses protobuf extensively making gRPC a natural choice. + +## Detailed Design + +With the recent integration of [Protobuf](https://developers.google.com/protocol-buffers) into Tendermint the needed changes to migrate from the current private validator protocol to gRPC is not large. + +The [service definition](https://grpc.io/docs/what-is-grpc/core-concepts/#service-definition) for gRPC will be defined as: + +```proto + service PrivValidatorAPI { + rpc GetPubKey(tendermint.proto.privval.PubKeyRequest) returns (tendermint.proto.privval.PubKeyResponse); + rpc SignVote(tendermint.proto.privval.SignVoteRequest) returns (tendermint.proto.privval.SignedVoteResponse); + rpc SignProposal(tendermint.proto.privval.SignProposalRequest) returns (tendermint.proto.privval.SignedProposalResponse); + + message PubKeyRequest { + string chain_id = 1; + } + + // PubKeyResponse is a response message containing the public key. + message PubKeyResponse { + tendermint.crypto.PublicKey pub_key = 1 [(gogoproto.nullable) = false]; + } + + // SignVoteRequest is a request to sign a vote + message SignVoteRequest { + tendermint.types.Vote vote = 1; + string chain_id = 2; + } + + // SignedVoteResponse is a response containing a signed vote or an error + message SignedVoteResponse { + tendermint.types.Vote vote = 1 [(gogoproto.nullable) = false]; + } + + // SignProposalRequest is a request to sign a proposal + message SignProposalRequest { + tendermint.types.Proposal proposal = 1; + string chain_id = 2; + } + + // SignedProposalResponse is response containing a signed proposal or an error + message SignedProposalResponse { + tendermint.types.Proposal proposal = 1 [(gogoproto.nullable) = false]; + } +} +``` + +> Note: Remote Singer errors are removed in favor of [grpc status error codes](https://grpc.io/docs/guides/error/). + +In previous versions of the remote signer, Tendermint acted as the server and the remote signer as the client. In this process the client established a long lived connection providing a way for the server to make requests to the client. In the new version it has been simplified. Tendermint is the client and the remote signer is the server. This follows client and server architecture and simplifies the previous protocol. + +#### Keep Alive + +If you have worked on the private validator system you will see that we are removing the `PingRequest` and `PingResponse` messages. These messages were used to create functionality which kept the connection alive. With gRPC there is a [keep alive feature](https://github.com/grpc/grpc/blob/master/doc/keepalive.md) that will be added along side the integration to provide the same functionality. + +#### Metrics + +Remote signers are crucial to operating secure and consistently up Validators. In the past there were no metrics to tell the operator if something is wrong other than the node not signing. Integrating metrics into the client and provided server will be done with [prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus). This will be integrated into node's prometheus export for node operators. + +#### Security + +[TLS](https://en.wikipedia.org/wiki/Transport_Layer_Security) is widely adopted with the use of gRPC. There are various forms of TLS (one-way & two-way). One way is the client identifying who the server is, while two way is both parties identifying the other. For Tendermint's use case having both parties identifying each other provides adds an extra layer of security. This requires users to generate both client and server certificates for a TLS connection. + +An insecure option will be provided for users who do not wish to secure the connection. + +#### Upgrade Path + +This is a largely breaking change for validator operators. The optimal upgrade path would be to release gRPC in a minor release, allow key management systems to migrate to the new protocol. In the next major release the current system (raw tcp/unix) is removed. This allows users to migrate to the new system and not have to coordinate upgrading the key management system alongside a network upgrade. + +The upgrade of [tmkms](https://github.com/iqlusioninc/tmkms) will be coordinated with Iqlusion. They will be able to make the necessary upgrades to allow users to migrate to gRPC from the current protocol. + +## Status + + +Implemented + +### Positive + +- Use an adopted standard for secure communication. (TLS) +- Use an adopted communication protocol. (gRPC) +- Requests are multiplexed onto the tcp connection. (http/2) +- Language agnostic service definition. + +### Negative + +- Users will need to generate certificates to use TLS. (Added step) +- Users will need to find a supported gRPC supported key management system + +### Neutral diff --git a/docs/architecture/adr-064-batch-verification.md b/docs/architecture/adr-064-batch-verification.md new file mode 100644 index 000000000..13bba25e4 --- /dev/null +++ b/docs/architecture/adr-064-batch-verification.md @@ -0,0 +1,90 @@ +# ADR 064: Batch Verification + +## Changelog + +- January 28, 2021: Created (@marbar3778) + +## Context + +Tendermint uses public private key cryptography for validator signing. When a block is proposed and voted on validators sign a message representing acceptance of a block, rejection is signaled via a nil vote. These signatures are also used to verify previous blocks are correct if a node is syncing. Currently, Tendermint requires each signature to be verified individually, this leads to a slow down of block times. + +Batch Verification is the process of taking many messages, keys, and signatures adding them together and verifying them all at once. The public key can be the same in which case it would mean a single user is signing many messages. In our case each public key is unique, each validator has their own and contribute a unique message. The algorithm can vary from curve to curve but the performance benefit, over single verifying messages, public keys and signatures is shared. + +## Alternative Approaches + +- Signature aggregation + - Signature aggregation is an alternative to batch verification. Signature aggregation leads to fast verification and smaller block sizes. At the time of writing this ADR there is on going work to enable signature aggregation in Tendermint. The reason why we have opted to not introduce it at this time is because every validator signs a unique message. + Signing a unique message prevents aggregation before verification. For example if we were to implement signature aggregation with BLS, there could be a potential slow down of 10x-100x in verification speeds. + +## Decision + +Adopt Batch Verification. + +## Detailed Design + +A new interface will be introduced. This interface will have three methods `NewBatchVerifier`, `Add` and `VerifyBatch`. + +```go +type BatchVerifier interface { + Add(key crypto.Pubkey, signature, message []byte) error // Add appends an entry into the BatchVerifier. + Verify() bool // Verify verifies all the entries in the BatchVerifier. If the verification fails it is unknown which entry failed and each entry will need to be verified individually. +} +``` + +- `NewBatchVerifier` creates a new verifier. This verifier will be populated with entries to be verified. +- `Add` adds an entry to the Verifier. Add accepts a public key and two slice of bytes (signature and message). +- `Verify` verifies all the entires. At the end of Verify if the underlying API does not reset the Verifier to its initial state (empty), it should be done here. This prevents accidentally reusing the verifier with entries from a previous verification. + +Above there is mention of an entry. An entry can be constructed in many ways depending on the needs of the underlying curve. A simple approach would be: + +```go +type entry struct { + pubKey crypto.Pubkey + signature []byte + message []byte +} +``` + +The main reason this approach is being taken is to prevent simple mistakes. Some APIs allow the user to create three slices and pass them to the `VerifyBatch` function but this relies on the user to safely generate all the slices (see example below). We would like to minimize the possibility of making a mistake. + +```go +func Verify(keys []crypto.Pubkey, signatures, messages[][]byte) bool +``` + +This change will not affect any users in anyway other than faster verification times. + +This new api will be used for verification in both consensus and block syncing. Within the current Verify functions there will be a check to see if the key types supports the BatchVerification API. If it does it will execute batch verification, if not single signature verification will be used. + +#### Consensus + + The process within consensus will be to wait for 2/3+ of the votes to be received, once they are received `Verify()` will be called to batch verify all the messages. The messages that come in after 2/3+ has been verified will be individually verified. + +#### Block Sync & Light Client + + The process for block sync & light client verification will be to verify only 2/3+ in a batch style. Since these processes are not participating in consensus there is no need to wait for more messages. + +If batch verifications fails for any reason, it will not be known which entry caused the failure. Verification will need to revert to single signature verification. + +Starting out, only ed25519 will support batch verification. + +## Status + +Implemented + +### Positive + +- Faster verification times, if the curve supports it + +### Negative + +- No way to see which key failed verification + - A failure means reverting back to single signature verification. + +### Neutral + +## References + +[Ed25519 Library](https://github.com/hdevalence/ed25519consensus) +[Ed25519 spec](https://ed25519.cr.yp.to/) +[Signature Aggregation for votes](https://github.com/tendermint/tendermint/issues/1319) +[Proposer-based timestamps](https://github.com/tendermint/tendermint/issues/2840) diff --git a/docs/architecture/adr-065-custom-event-indexing.md b/docs/architecture/adr-065-custom-event-indexing.md new file mode 100644 index 000000000..83a96de48 --- /dev/null +++ b/docs/architecture/adr-065-custom-event-indexing.md @@ -0,0 +1,425 @@ +# ADR 065: Custom Event Indexing + +- [ADR 065: Custom Event Indexing](#adr-065-custom-event-indexing) + - [Changelog](#changelog) + - [Status](#status) + - [Context](#context) + - [Alternative Approaches](#alternative-approaches) + - [Decision](#decision) + - [Detailed Design](#detailed-design) + - [EventSink](#eventsink) + - [Supported Sinks](#supported-sinks) + - [`KVEventSink`](#kveventsink) + - [`PSQLEventSink`](#psqleventsink) + - [Configuration](#configuration) + - [Future Improvements](#future-improvements) + - [Consequences](#consequences) + - [Positive](#positive) + - [Negative](#negative) + - [Neutral](#neutral) + - [References](#references) + +## Changelog + +- April 1, 2021: Initial Draft (@alexanderbez) +- April 28, 2021: Specify search capabilities are only supported through the KV indexer (@marbar3778) +- May 19, 2021: Update the SQL schema and the eventsink interface (@jayt106) +- Aug 30, 2021: Update the SQL schema and the psql implementation (@creachadair) +- Oct 5, 2021: Clarify goals and implementation changes (@creachadair) + +## Status + +Accepted + +## Context + +Currently, Tendermint Core supports block and transaction event indexing through +the `tx_index.indexer` configuration. Events are captured in transactions and +are indexed via a `TxIndexer` type. Events are captured in blocks, specifically +from `BeginBlock` and `EndBlock` application responses, and are indexed via a +`BlockIndexer` type. Both of these types are managed by a single `IndexerService` +which is responsible for consuming events and sending those events off to be +indexed by the respective type. + +In addition to indexing, Tendermint Core also supports the ability to query for +both indexed transaction and block events via Tendermint's RPC layer. The ability +to query for these indexed events facilitates a great multitude of upstream client +and application capabilities, e.g. block explorers, IBC relayers, and auxiliary +data availability and indexing services. + +Currently, Tendermint only supports indexing via a `kv` indexer, which is supported +by an underlying embedded key/value store database. The `kv` indexer implements +its own indexing and query mechanisms. While the former is somewhat trivial, +providing a rich and flexible query layer is not as trivial and has caused many +issues and UX concerns for upstream clients and applications. + +The fragile nature of the proprietary `kv` query engine and the potential +performance and scaling issues that arise when a large number of consumers are +introduced, motivate the need for a more robust and flexible indexing and query +solution. + +## Alternative Approaches + +With regards to alternative approaches to a more robust solution, the only serious +contender that was considered was to transition to using [SQLite](https://www.sqlite.org/index.html). + +While the approach would work, it locks us into a specific query language and +storage layer, so in some ways it's only a bit better than our current approach. +In addition, the implementation would require the introduction of CGO into the +Tendermint Core stack, whereas right now CGO is only introduced depending on +the database used. + +## Decision + +We will adopt a similar approach to that of the Cosmos SDK's `KVStore` state +listening described in [ADR-038](https://github.com/cosmos/cosmos-sdk/blob/master/docs/architecture/adr-038-state-listening.md). + +We will implement the following changes: + +- Introduce a new interface, `EventSink`, that all data sinks must implement. +- Augment the existing `tx_index.indexer` configuration to now accept a series + of one or more indexer types, i.e., sinks. +- Combine the current `TxIndexer` and `BlockIndexer` into a single `KVEventSink` + that implements the `EventSink` interface. +- Introduce an additional `EventSink` implementation that is backed by + [PostgreSQL](https://www.postgresql.org/). + - Implement the necessary schemas to support both block and transaction event indexing. +- Update `IndexerService` to use a series of `EventSinks`. + +In addition: + +- The Postgres indexer implementation will _not_ implement the proprietary `kv` + query language. Users wishing to write queries against the Postgres indexer + will connect to the underlying DBMS directly and use SQL queries based on the + indexing schema. + + Future custom indexer implementations will not be required to support the + proprietary query language either. + +- For now, the existing `kv` indexer will be left in place with its current + query support, but will be marked as deprecated in a subsequent release, and + the documentation will be updated to encourage users who need to query the + event index to migrate to the Postgres indexer. + +- In the future we may remove the `kv` indexer entirely, or replace it with a + different implementation; that decision is deferred as future work. + +- In the future, we may remove the index query endpoints from the RPC service + entirely; that decision is deferred as future work, but recommended. + + +## Detailed Design + +### EventSink + +We introduce the `EventSink` interface type that all supported sinks must implement. +The interface is defined as follows: + +```go +type EventSink interface { + IndexBlockEvents(types.EventDataNewBlockHeader) error + IndexTxEvents([]*abci.TxResult) error + + SearchBlockEvents(context.Context, *query.Query) ([]int64, error) + SearchTxEvents(context.Context, *query.Query) ([]*abci.TxResult, error) + + GetTxByHash([]byte) (*abci.TxResult, error) + HasBlock(int64) (bool, error) + + Type() EventSinkType + Stop() error +} +``` + +The `IndexerService` will accept a list of one or more `EventSink` types. During +the `OnStart` method it will call the appropriate APIs on each `EventSink` to +index both block and transaction events. + +### Supported Sinks + +We will initially support two `EventSink` types out of the box. + +#### `KVEventSink` + +This type of `EventSink` is a combination of the `TxIndexer` and `BlockIndexer` +indexers, both of which are backed by a single embedded key/value database. + +A bulk of the existing business logic will remain the same, but the existing APIs +mapped to the new `EventSink` API. Both types will be removed in favor of a single +`KVEventSink` type. + +The `KVEventSink` will be the only `EventSink` enabled by default, so from a UX +perspective, operators should not notice a difference apart from a configuration +change. + +We omit `EventSink` implementation details as it should be fairly straightforward +to map the existing business logic to the new APIs. + +#### `PSQLEventSink` + +This type of `EventSink` indexes block and transaction events into a [PostgreSQL](https://www.postgresql.org/). +database. We define and automatically migrate the following schema when the +`IndexerService` starts. + +The postgres eventsink will not support `tx_search`, `block_search`, `GetTxByHash` and `HasBlock`. + +```sql +-- Table Definition ---------------------------------------------- + +-- The blocks table records metadata about each block. +-- The block record does not include its events or transactions (see tx_results). +CREATE TABLE blocks ( + rowid BIGSERIAL PRIMARY KEY, + + height BIGINT NOT NULL, + chain_id VARCHAR NOT NULL, + + -- When this block header was logged into the sink, in UTC. + created_at TIMESTAMPTZ NOT NULL, + + UNIQUE (height, chain_id) +); + +-- Index blocks by height and chain, since we need to resolve block IDs when +-- indexing transaction records and transaction events. +CREATE INDEX idx_blocks_height_chain ON blocks(height, chain_id); + +-- The tx_results table records metadata about transaction results. Note that +-- the events from a transaction are stored separately. +CREATE TABLE tx_results ( + rowid BIGSERIAL PRIMARY KEY, + + -- The block to which this transaction belongs. + block_id BIGINT NOT NULL REFERENCES blocks(rowid), + -- The sequential index of the transaction within the block. + index INTEGER NOT NULL, + -- When this result record was logged into the sink, in UTC. + created_at TIMESTAMPTZ NOT NULL, + -- The hex-encoded hash of the transaction. + tx_hash VARCHAR NOT NULL, + -- The protobuf wire encoding of the TxResult message. + tx_result BYTEA NOT NULL, + + UNIQUE (block_id, index) +); + +-- The events table records events. All events (both block and transaction) are +-- associated with a block ID; transaction events also have a transaction ID. +CREATE TABLE events ( + rowid BIGSERIAL PRIMARY KEY, + + -- The block and transaction this event belongs to. + -- If tx_id is NULL, this is a block event. + block_id BIGINT NOT NULL REFERENCES blocks(rowid), + tx_id BIGINT NULL REFERENCES tx_results(rowid), + + -- The application-defined type label for the event. + type VARCHAR NOT NULL +); + +-- The attributes table records event attributes. +CREATE TABLE attributes ( + event_id BIGINT NOT NULL REFERENCES events(rowid), + key VARCHAR NOT NULL, -- bare key + composite_key VARCHAR NOT NULL, -- composed type.key + value VARCHAR NULL, + + UNIQUE (event_id, key) +); + +-- A joined view of events and their attributes. Events that do not have any +-- attributes are represented as a single row with empty key and value fields. +CREATE VIEW event_attributes AS + SELECT block_id, tx_id, type, key, composite_key, value + FROM events LEFT JOIN attributes ON (events.rowid = attributes.event_id); + +-- A joined view of all block events (those having tx_id NULL). +CREATE VIEW block_events AS + SELECT blocks.rowid as block_id, height, chain_id, type, key, composite_key, value + FROM blocks JOIN event_attributes ON (blocks.rowid = event_attributes.block_id) + WHERE event_attributes.tx_id IS NULL; + +-- A joined view of all transaction events. +CREATE VIEW tx_events AS + SELECT height, index, chain_id, type, key, composite_key, value, tx_results.created_at + FROM blocks JOIN tx_results ON (blocks.rowid = tx_results.block_id) + JOIN event_attributes ON (tx_results.rowid = event_attributes.tx_id) + WHERE event_attributes.tx_id IS NOT NULL; +``` + +The `PSQLEventSink` will implement the `EventSink` interface as follows +(some details omitted for brevity): + +```go +func NewEventSink(connStr, chainID string) (*EventSink, error) { + db, err := sql.Open(driverName, connStr) + // ... + + return &EventSink{ + store: db, + chainID: chainID, + }, nil +} + +func (es *EventSink) IndexBlockEvents(h types.EventDataNewBlockHeader) error { + ts := time.Now().UTC() + + return runInTransaction(es.store, func(tx *sql.Tx) error { + // Add the block to the blocks table and report back its row ID for use + // in indexing the events for the block. + blockID, err := queryWithID(tx, ` +INSERT INTO blocks (height, chain_id, created_at) + VALUES ($1, $2, $3) + ON CONFLICT DO NOTHING + RETURNING rowid; +`, h.Header.Height, es.chainID, ts) + // ... + + // Insert the special block meta-event for height. + if err := insertEvents(tx, blockID, 0, []abci.Event{ + makeIndexedEvent(types.BlockHeightKey, fmt.Sprint(h.Header.Height)), + }); err != nil { + return fmt.Errorf("block meta-events: %w", err) + } + // Insert all the block events. Order is important here, + if err := insertEvents(tx, blockID, 0, h.ResultBeginBlock.Events); err != nil { + return fmt.Errorf("begin-block events: %w", err) + } + if err := insertEvents(tx, blockID, 0, h.ResultEndBlock.Events); err != nil { + return fmt.Errorf("end-block events: %w", err) + } + return nil + }) +} + +func (es *EventSink) IndexTxEvents(txrs []*abci.TxResult) error { + ts := time.Now().UTC() + + for _, txr := range txrs { + // Encode the result message in protobuf wire format for indexing. + resultData, err := proto.Marshal(txr) + // ... + + // Index the hash of the underlying transaction as a hex string. + txHash := fmt.Sprintf("%X", types.Tx(txr.Tx).Hash()) + + if err := runInTransaction(es.store, func(tx *sql.Tx) error { + // Find the block associated with this transaction. + blockID, err := queryWithID(tx, ` +SELECT rowid FROM blocks WHERE height = $1 AND chain_id = $2; +`, txr.Height, es.chainID) + // ... + + // Insert a record for this tx_result and capture its ID for indexing events. + txID, err := queryWithID(tx, ` +INSERT INTO tx_results (block_id, index, created_at, tx_hash, tx_result) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT DO NOTHING + RETURNING rowid; +`, blockID, txr.Index, ts, txHash, resultData) + // ... + + // Insert the special transaction meta-events for hash and height. + if err := insertEvents(tx, blockID, txID, []abci.Event{ + makeIndexedEvent(types.TxHashKey, txHash), + makeIndexedEvent(types.TxHeightKey, fmt.Sprint(txr.Height)), + }); err != nil { + return fmt.Errorf("indexing transaction meta-events: %w", err) + } + // Index any events packaged with the transaction. + if err := insertEvents(tx, blockID, txID, txr.Result.Events); err != nil { + return fmt.Errorf("indexing transaction events: %w", err) + } + return nil + + }); err != nil { + return err + } + } + return nil +} + +// SearchBlockEvents is not implemented by this sink, and reports an error for all queries. +func (es *EventSink) SearchBlockEvents(ctx context.Context, q *query.Query) ([]int64, error) + +// SearchTxEvents is not implemented by this sink, and reports an error for all queries. +func (es *EventSink) SearchTxEvents(ctx context.Context, q *query.Query) ([]*abci.TxResult, error) + +// GetTxByHash is not implemented by this sink, and reports an error for all queries. +func (es *EventSink) GetTxByHash(hash []byte) (*abci.TxResult, error) + +// HasBlock is not implemented by this sink, and reports an error for all queries. +func (es *EventSink) HasBlock(h int64) (bool, error) +``` + +### Configuration + +The current `tx_index.indexer` configuration would be changed to accept a list +of supported `EventSink` types instead of a single value. + +Example: + +```toml +[tx_index] + +indexer = [ + "kv", + "psql" +] +``` + +If the `indexer` list contains the `null` indexer, then no indexers will be used +regardless of what other values may exist. + +Additional configuration parameters might be required depending on what event +sinks are supplied to `tx_index.indexer`. The `psql` will require an additional +connection configuration. + +```toml +[tx_index] + +indexer = [ + "kv", + "psql" +] + +pqsql_conn = "postgresql://:@:/?" +``` + +Any invalid or misconfigured `tx_index` configuration should yield an error as +early as possible. + +## Future Improvements + +Although not technically required to maintain feature parity with the current +existing Tendermint indexer, it would be beneficial for operators to have a method +of performing a "re-index". Specifically, Tendermint operators could invoke an +RPC method that allows the Tendermint node to perform a re-indexing of all block +and transaction events between two given heights, H1 and H2, +so long as the block store contains the blocks and transaction results for all +the heights specified in a given range. + +## Consequences + +### Positive + +- A more robust and flexible indexing and query engine for indexing and search + block and transaction events. +- The ability to not have to support a custom indexing and query engine beyond + the legacy `kv` type. +- The ability to offload/proxy indexing and querying to the underling sink. +- Scalability and reliability that essentially comes "for free" from the underlying + sink, if it supports it. + +### Negative + +- The need to support multiple and potentially a growing set of custom `EventSink` + types. + +### Neutral + +## References + +- [Cosmos SDK ADR-038](https://github.com/cosmos/cosmos-sdk/blob/master/docs/architecture/adr-038-state-listening.md) +- [PostgreSQL](https://www.postgresql.org/) +- [SQLite](https://www.sqlite.org/index.html) diff --git a/docs/architecture/adr-066-e2e-testing.md b/docs/architecture/adr-066-e2e-testing.md new file mode 100644 index 000000000..528e25238 --- /dev/null +++ b/docs/architecture/adr-066-e2e-testing.md @@ -0,0 +1,140 @@ +# ADR 66: End-to-End Testing + +## Changelog + +- 2020-09-07: Initial draft (@erikgrinaker) +- 2020-09-08: Minor improvements (@erikgrinaker) +- 2021-04-12: Renamed from RFC 001 (@tessr) + +## Authors + +- Erik Grinaker (@erikgrinaker) + +## Context + +The current set of end-to-end tests under `test/` are very limited, mostly focusing on P2P testing in a standard configuration. They do not test various configurations (e.g. fast sync reactor versions, state sync, block pruning, genesis vs InitChain setup), nor do they test various network topologies (e.g. sentry node architecture). This leads to poor test coverage, which has caused several serious bugs to go unnoticed. + +We need an end-to-end test suite that can run a large number of combinations of configuration options, genesis settings, network topologies, ABCI interactions, and failure scenarios and check that the network is still functional. This ADR outlines the basic requirements and design for such a system. + +This ADR will not cover comprehensive chaos testing, only a few simple scenarios (e.g. abrupt process termination and network partitioning). Chaos testing of the core consensus algorithm should be implemented e.g. via Jepsen tests or a similar framework, or alternatively be added to these end-to-end tests at a later time. Similarly, malicious or adversarial behavior is out of scope for the first implementation, but may be added later. + +## Proposal + +### Functional Coverage + +The following lists the functionality we would like to test: + +#### Environments + +- **Topology:** single node, 4 nodes (seeds and persistent), sentry architecture, NAT (UPnP) +- **Networking:** IPv4, IPv6 +- **ABCI connection:** UNIX socket, TCP, gRPC +- **PrivVal:** file, UNIX socket, TCP + +#### Node/App Configurations + +- **Database:** goleveldb, cleveldb, boltdb, rocksdb, badgerdb +- **Fast sync:** disabled, v0, v2 +- **State sync:** disabled, enabled +- **Block pruning:** none, keep 20, keep 1, keep random +- **Role:** validator, full node +- **App persistence:** enabled, disabled +- **Node modes:** validator, full, light, seed + +#### Geneses + +- **Validators:** none (InitChain), given +- **Initial height:** 1, 1000 +- **App state:** none, given + +#### Behaviors + +- **Recovery:** stop/start, power cycling, validator outage, network partition, total network loss +- **Validators:** add, remove, change power +- **Evidence:** injection of DuplicateVoteEvidence and LightClientAttackEvidence + +### Functional Combinations + +Running separate tests for all combinations of the above functionality is not feasible, as there are millions of them. However, the functionality can be grouped into three broad classes: + +- **Global:** affects the entire network, needing a separate testnet for each combination (e.g. topology, network protocol, genesis settings) + +- **Local:** affects a single node, and can be varied per node in a testnet (e.g. ABCI/privval connections, database backend, block pruning) + +- **Temporal:** can be run after each other in the same testnet (e.g. recovery and validator changes) + +Thus, we can run separate testnets for all combinations of global options (on the order of 100). In each testnet, we run nodes with randomly generated node configurations optimized for broad coverage (i.e. if one node is using GoLevelDB, then no other node should use it if possible). And in each testnet, we sequentially and randomly pick nodes to stop/start, power cycle, add/remove, disconnect, and so on. + +All of the settings should be specified in a testnet configuration (or alternatively the seed that generated it) such that it can be retrieved from CI and debugged locally. + +A custom ABCI application will have to be built that can exhibit the necessary behavior (e.g. make validator changes, prune blocks, enable/disable persistence, and so on). + +### Test Stages + +Given a test configuration, the test runner has the following stages: + +- **Setup:** configures the Docker containers and networks, but does not start them. + +- **Initialization:** starts the Docker containers, performs fast sync/state sync. Accomodates for different start heights. + +- **Perturbation:** adds/removes validators, restarts nodes, perturbs networking, etc - liveness and readiness checked between each operation. + +- **Testing:** runs RPC tests independently against all network nodes, making sure data matches expectations and invariants hold. + +### Tests + +The general approach will be to put the network through a sequence of operations (see stages above), check basic liveness and readiness after each operation, and then once the network stabilizes run an RPC test suite against each node in the network. + +The test suite will do black-box testing against a single node's RPC service. We will be testing the behavior of the network as a whole, e.g. that a fast synced node correctly catches up to the chain head and serves basic block data via RPC. Thus the tests will not send e.g. P2P messages or examine the node database, as these are considered internal implementation details - if the network behaves correctly, presumably the internal components function correctly. Comprehensive component testing (e.g. each and every RPC method parameter) should be done via unit/integration tests. + +The tests must take into account the node configuration (e.g. some nodes may be pruned, others may not be validators), and should somehow be provided access to expected data (i.e. complete block headers for the entire chain). + +The test suite should use the Tendermint RPC client and the Tendermint light client, to exercise the client code as well. + +### Implementation Considerations + +The testnets should run in Docker Compose, both locally and in CI. This makes it easier to reproduce test failures locally. Supporting multiple test-runners (e.g. on VMs or Kubernetes) is out of scope. The same image should be used for all tests, with configuration passed via a mounted volume. + +There does not appear to be any off-the-shelf solutions that would do this for us, so we will have to roll our own on top of Docker Compose. This gives us more flexibility, but is estimated to be a few weeks of work. + +Testnets should be configured via a YAML file. These are used as inputs for the test runner, which e.g. generates Docker Compose configurations from them. An additional layer on top should generate these testnet configurations from a YAML file that specifies all the option combinations to test. + +Comprehensive testnets should run against master nightly. However, a small subset of representative testnets should run for each pull request, e.g. a four-node IPv4 network with state sync and fast sync. + +Tests should be written using the standard Go test framework (and e.g. Testify), with a helper function to fetch info from the test configuration. The test runner will run the tests separately for each network node, and the test must vary its expectations based on the node's configuration. + +It should be possible to launch a specific testnet and run individual test cases from the IDE or local terminal against a it. + +If possible, the existing `testnet` command should be extended to set up the network topologies needed by the end-to-end tests. + +## Status + +Implemented + +## Consequences + +### Positive + +- Comprehensive end-to-end test coverage of basic Tendermint functionality, exercising common code paths in the same way that users would + +- Test environments can easily be reproduced locally and debugged via standard tooling + +### Negative + +- Limited coverage of consensus correctness testing (e.g. Jepsen) + +- No coverage of malicious or adversarial behavior + +- Have to roll our own test framework, which takes engineering resources + +- Possibly slower CI times, depending on which tests are run + +- Operational costs and overhead, e.g. infrastructure costs and system maintenance + +### Neutral + +- No support for alternative infrastructure platforms, e.g. Kubernetes or VMs + +## References + +- [#5291: new end-to-end test suite](https://github.com/tendermint/tendermint/issues/5291) diff --git a/docs/architecture/adr-067-mempool-refactor.md b/docs/architecture/adr-067-mempool-refactor.md new file mode 100644 index 000000000..7b881937e --- /dev/null +++ b/docs/architecture/adr-067-mempool-refactor.md @@ -0,0 +1,303 @@ +# ADR 067: Mempool Refactor + +- [ADR 067: Mempool Refactor](#adr-067-mempool-refactor) + - [Changelog](#changelog) + - [Status](#status) + - [Context](#context) + - [Current Design](#current-design) + - [Alternative Approaches](#alternative-approaches) + - [Prior Art](#prior-art) + - [Ethereum](#ethereum) + - [Diem](#diem) + - [Decision](#decision) + - [Detailed Design](#detailed-design) + - [CheckTx](#checktx) + - [Mempool](#mempool) + - [Eviction](#eviction) + - [Gossiping](#gossiping) + - [Performance](#performance) + - [Future Improvements](#future-improvements) + - [Consequences](#consequences) + - [Positive](#positive) + - [Negative](#negative) + - [Neutral](#neutral) + - [References](#references) + +## Changelog + +- April 19, 2021: Initial Draft (@alexanderbez) + +## Status + +Accepted + +## Context + +Tendermint Core has a reactor and data structure, mempool, that facilitates the +ephemeral storage of uncommitted transactions. Honest nodes participating in a +Tendermint network gossip these uncommitted transactions to each other if they +pass the application's `CheckTx`. In addition, block proposers select from the +mempool a subset of uncommitted transactions to include in the next block. + +Currently, the mempool in Tendermint Core is designed as a FIFO queue. In other +words, transactions are included in blocks as they are received by a node. There +currently is no explicit and prioritized ordering of these uncommitted transactions. +This presents a few technical and UX challenges for operators and applications. + +Namely, validators are not able to prioritize transactions by their fees or any +incentive aligned mechanism. In addition, the lack of prioritization also leads +to cascading effects in terms of DoS and various attack vectors on networks, +e.g. [cosmos/cosmos-sdk#8224](https://github.com/cosmos/cosmos-sdk/discussions/8224). + +Thus, Tendermint Core needs the ability for an application and its users to +prioritize transactions in a flexible and performant manner. Specifically, we're +aiming to either improve, maintain or add the following properties in the +Tendermint mempool: + +- Allow application-determined transaction priority. +- Allow efficient concurrent reads and writes. +- Allow block proposers to reap transactions efficiently by priority. +- Maintain a fixed mempool capacity by transaction size and evict lower priority + transactions to make room for higher priority transactions. +- Allow transactions to be gossiped by priority efficiently. +- Allow operators to specify a maximum TTL for transactions in the mempool before + they're automatically evicted if not selected for a block proposal in time. +- Ensure the design allows for future extensions, such as replace-by-priority and + allowing multiple pending transactions per sender, to be incorporated easily. + +Note, not all of these properties will be addressed by the proposed changes in +this ADR. However, this proposal will ensure that any unaddressed properties +can be addressed in an easy and extensible manner in the future. + +### Current Design + +![mempool](./img/mempool-v0.jpeg) + +At the core of the `v0` mempool reactor is a concurrent linked-list. This is the +primary data structure that contains `Tx` objects that have passed `CheckTx`. +When a node receives a transaction from another peer, it executes `CheckTx`, which +obtains a read-lock on the `*CListMempool`. If the transaction passes `CheckTx` +locally on the node, it is added to the `*CList` by obtaining a write-lock. It +is also added to the `cache` and `txsMap`, both of which obtain their own respective +write-locks and map a reference from the transaction hash to the `Tx` itself. + +Transactions are continuously gossiped to peers whenever a new transaction is added +to a local node's `*CList`, where the node at the front of the `*CList` is selected. +Another transaction will not be gossiped until the `*CList` notifies the reader +that there are more transactions to gossip. + +When a proposer attempts to propose a block, they will execute `ReapMaxBytesMaxGas` +on the reactor's `*CListMempool`. This call obtains a read-lock on the `*CListMempool` +and selects as many transactions as possible starting from the front of the `*CList` +moving to the back of the list. + +When a block is finally committed, a caller invokes `Update` on the reactor's +`*CListMempool` with all the selected transactions. Note, the caller must also +explicitly obtain a write-lock on the reactor's `*CListMempool`. This call +will remove all the supplied transactions from the `txsMap` and the `*CList`, both +of which obtain their own respective write-locks. In addition, the transaction +may also be removed from the `cache` which obtains it's own write-lock. + +## Alternative Approaches + +When considering which approach to take for a priority-based flexible and +performant mempool, there are two core candidates. The first candidate is less +invasive in the required set of protocol and implementation changes, which +simply extends the existing `CheckTx` ABCI method. The second candidate essentially +involves the introduction of new ABCI method(s) and would require a higher degree +of complexity in protocol and implementation changes, some of which may either +overlap or conflict with the upcoming introduction of [ABCI++](https://github.com/tendermint/tendermint/blob/master/docs/rfc/rfc-013-abci%2B%2B.md). + +For more information on the various approaches and proposals, please see the +[mempool discussion](https://github.com/tendermint/tendermint/discussions/6295). + +## Prior Art + +### Ethereum + +The Ethereum mempool, specifically [Geth](https://github.com/ethereum/go-ethereum), +contains a mempool, `*TxPool`, that contains various mappings indexed by account, +such as a `pending` which contains all processable transactions for accounts +prioritized by nonce. It also contains a `queue` which is the exact same mapping +except it contains not currently processable transactions. The mempool also +contains a `priced` index of type `*txPricedList` that is a priority queue based +on transaction price. + +### Diem + +The [Diem mempool](https://github.com/diem/diem/blob/master/mempool/README.md#implementation-details) +contains a similar approach to the one we propose. Specifically, the Diem mempool +contains a mapping from `Account:[]Tx`. On top of this primary mapping from account +to a list of transactions, are various indexes used to perform certain actions. + +The main index, `PriorityIndex`. is an ordered queue of transactions that are +“consensus-ready” (i.e., they have a sequence number which is sequential to the +current sequence number for the account). This queue is ordered by gas price so +that if a client is willing to pay more (than other clients) per unit of +execution, then they can enter consensus earlier. + +## Decision + +To incorporate a priority-based flexible and performant mempool in Tendermint Core, +we will introduce new fields, `priority` and `sender`, into the `ResponseCheckTx` +type. + +We will introduce a new versioned mempool reactor, `v1` and assume an implicit +version of the current mempool reactor as `v0`. In the new `v1` mempool reactor, +we largely keep the functionality the same as `v0` except we augment the underlying +data structures. Specifically, we keep a mapping of senders to transaction objects. +On top of this mapping, we index transactions to provide the ability to efficiently +gossip and reap transactions by priority. + +## Detailed Design + +### CheckTx + +We introduce the following new fields into the `ResponseCheckTx` type: + +```diff +message ResponseCheckTx { + uint32 code = 1; + bytes data = 2; + string log = 3; // nondeterministic + string info = 4; // nondeterministic + int64 gas_wanted = 5 [json_name = "gas_wanted"]; + int64 gas_used = 6 [json_name = "gas_used"]; + repeated Event events = 7 [(gogoproto.nullable) = false, (gogoproto.jsontag) = "events,omitempty"]; + string codespace = 8; ++ int64 priority = 9; ++ string sender = 10; +} +``` + +It is entirely up the application in determining how these fields are populated +and with what values, e.g. the `sender` could be the signer and fee payer +of the transaction, the `priority` could be the cumulative sum of the fee(s). + +Only `sender` is required, while `priority` can be omitted which would result in +using the default value of zero. + +### Mempool + +The existing concurrent-safe linked-list will be replaced by a thread-safe map +of ``, i.e a mapping from `sender` to a single `*Tx` object, where +each `*Tx` is the next valid and processable transaction from the given `sender`. + +On top of this mapping, we index all transactions by priority using a thread-safe +priority queue, i.e. a [max heap](https://en.wikipedia.org/wiki/Min-max_heap). +When a proposer is ready to select transactions for the next block proposal, +transactions are selected from this priority index by highest priority order. +When a transaction is selected and reaped, it is removed from this index and +from the `` mapping. + +We define `Tx` as the following data structure: + +```go +type Tx struct { + // Tx represents the raw binary transaction data. + Tx []byte + + // Priority defines the transaction's priority as specified by the application + // in the ResponseCheckTx response. + Priority int64 + + // Sender defines the transaction's sender as specified by the application in + // the ResponseCheckTx response. + Sender string + + // Index defines the current index in the priority queue index. Note, if + // multiple Tx indexes are needed, this field will be removed and each Tx + // index will have its own wrapped Tx type. + Index int +} +``` + +### Eviction + +Upon successfully executing `CheckTx` for a new `Tx` and the mempool is currently +full, we must check if there exists a `Tx` of lower priority that can be evicted +to make room for the new `Tx` with higher priority and with sufficient size +capacity left. + +If such a `Tx` exists, we find it by obtaining a read lock and sorting the +priority queue index. Once sorted, we find the first `Tx` with lower priority and +size such that the new `Tx` would fit within the mempool's size limit. We then +remove this `Tx` from the priority queue index as well as the `` +mapping. + +This will require additional `O(n)` space and `O(n*log(n))` runtime complexity. Note that the space complexity does not depend on the size of the tx. + +### Gossiping + +We keep the existing thread-safe linked list as an additional index. Using this +index, we can efficiently gossip transactions in the same manner as they are +gossiped now (FIFO). + +Gossiping transactions will not require locking any other indexes. + +### Performance + +Performance should largely remain unaffected apart from the space overhead of +keeping an additional priority queue index and the case where we need to evict +transactions from the priority queue index. There should be no reads which +block writes on any index + +## Future Improvements + +There are a few considerable ways in which the proposed design can be improved or +expanded upon. Namely, transaction gossiping and for the ability to support +multiple transactions from the same `sender`. + +With regards to transaction gossiping, we need empirically validate whether we +need to gossip by priority. In addition, the current method of gossiping may not +be the most efficient. Specifically, broadcasting all the transactions a node +has in it's mempool to it's peers. Rather, we should explore for the ability to +gossip transactions on a request/response basis similar to Ethereum and other +protocols. Not only does this reduce bandwidth and complexity, but also allows +for us to explore gossiping by priority or other dimensions more efficiently. + +Allowing for multiple transactions from the same `sender` is important and will +most likely be a needed feature in the future development of the mempool, but for +now it suffices to have the preliminary design agreed upon. Having the ability +to support multiple transactions per `sender` will require careful thought with +regards to the interplay of the corresponding ABCI application. Regardless, the +proposed design should allow for adaptations to support this feature in a +non-contentious and backwards compatible manner. + +## Consequences + +### Positive + +- Transactions are allowed to be prioritized by the application. + +### Negative + +- Increased size of the `ResponseCheckTx` Protocol Buffer type. +- Causal ordering is NOT maintained. + - It is possible that certain transactions broadcasted in a particular order may + pass `CheckTx` but not end up being committed in a block because they fail + `CheckTx` later. e.g. Consider Tx1 that sends funds from existing + account Alice to a _new_ account Bob with priority P1 and then later + Bob's _new_ account sends funds back to Alice in Tx2 with P2, + such that P2 > P1. If executed in this order, both + transactions will pass `CheckTx`. However, when a proposer is ready to select + transactions for the next block proposal, they will select Tx2 before + Tx1 and thus Tx2 will _fail_ because Tx1 must + be executed first. This is because there is a _causal ordering_, + Tx1 ➝ Tx2. These types of situations should be rare as + most transactions are not causally ordered and can be circumvented by simply + trying again at a later point in time or by ensuring the "child" priority is + lower than the "parent" priority. In other words, if parents always have + priories that are higher than their children, then the new mempool design will + maintain causal ordering. + +### Neutral + +- A transaction that passed `CheckTx` and entered the mempool can later be evicted + at a future point in time if a higher priority transaction entered while the + mempool was full. + +## References + +- [ABCI++](https://github.com/tendermint/tendermint/blob/master/docs/rfc/rfc-013-abci%2B%2B.md) +- [Mempool Discussion](https://github.com/tendermint/tendermint/discussions/6295) diff --git a/docs/architecture/adr-068-reverse-sync.md b/docs/architecture/adr-068-reverse-sync.md new file mode 100644 index 000000000..7926e0b20 --- /dev/null +++ b/docs/architecture/adr-068-reverse-sync.md @@ -0,0 +1,97 @@ +# ADR 068: Reverse Sync + +## Changelog + +- 20 April 2021: Initial Draft (@cmwaters) + +## Status + +Accepted + +## Context + +The advent of state sync and block pruning gave rise to the opportunity for full nodes to participate in consensus without needing complete block history. This also introduced a problem with respect to evidence handling. Nodes that didn't have all the blocks within the evidence age were incapable of validating evidence, thus halting if that evidence was committed on chain. + +[ADR 068](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-068-reverse-sync.md) was published in response to this problem and modified the spec to add a minimum block history invariant. This predominantly sought to extend state sync so that it was capable of fetching and storing the `Header`, `Commit` and `ValidatorSet` (essentially a `LightBlock`) of the last `n` heights, where `n` was calculated based from the evidence age. + +This ADR sets out to describe the design of this state sync extension as well as modifications to the light client provider and the merging of tm store. + +## Decision + +The state sync reactor will be extended by introducing 2 new P2P messages (and a new channel). + +```protobuf +message LightBlockRequest { + uint64 height = 1; +} + +message LightBlockResponse { + tendermint.types.LightBlock light_block = 1; +} +``` + +This will be used by the "reverse sync" protocol that will fetch, verify and store prior light blocks such that the node can safely participate in consensus. + +Furthermore this allows for a new light client provider which offers the ability for the `StateProvider` to use the underlying P2P stack instead of RPC. + +## Detailed Design + +This section will focus first on the reverse sync (here we call it `backfill`) mechanism as a standalone protocol and then look to decribe how it integrates within the state sync reactor and how we define the new p2p light client provider. + +```go +// Backfill fetches, verifies, and stores necessary history +// to participate in consensus and validate evidence. +func (r *Reactor) backfill(state State) error {} +``` + +`State` is used to work out how far to go back, namely we need all light blocks that have: +- a height: `h >= state.LastBlockHeight - state.ConsensusParams.Evidence.MaxAgeNumBlocks` +- a time: `t >= state.LastBlockTime - state.ConsensusParams.Evidence.MaxAgeDuration` + +Reverse Sync relies on two components: A `Dispatcher` and a `BlockQueue`. The `Dispatcher` is a pattern taken from a similar [PR](https://github.com/tendermint/tendermint/pull/4508). It is wired to the `LightBlockChannel` and allows for concurrent light block requests by shifting through a linked list of peers. This abstraction has the nice quality that it can also be used as an array of light providers for a P2P based light client. + +The `BlockQueue` is a data structure that allows for multiple workers to fetch light blocks, serializing them for the main thread which picks them off the end of the queue, verifies the hashes and persists them. + +### Integration with State Sync + +Reverse sync is a blocking process that runs directly after syncing state and before transitioning into either fast sync or consensus. + +Prior, the state sync service was not connected to any db, instead it passed the state and commit back to the node. For reverse sync, state sync will be given access to both the `StateStore` and `BlockStore` to be able to write `Header`'s, `Commit`'s and `ValidatorSet`'s and read them so as to serve other state syncing peers. + +This also means adding new methods to these respective stores in order to persist them + +### P2P Light Client Provider + +As mentioned previously, the `Dispatcher` is capable of handling requests to multiple peers. We can therefore simply peel off a `blockProvider` instance which is assigned to each peer. By giving it the chain ID, the `blockProvider` is capable of doing a basic validation of the light block before returning it to the client. + +It's important to note that because state sync doesn't have access to the evidence channel it is incapable of allowing the light client to report evidence thus `ReportEvidence` is a no op. This is not too much of a concern for reverse sync but will need to be addressed for pure p2p light clients. + +### Pruning + +A final small note is with pruning. This ADR will introduce changes that will not allow an application to prune blocks that are within the evidence age. + +## Future Work + +This ADR tries to remain within the scope of extending state sync, however the changes made opens the door for several areas to be followed up: +- Properly integrate p2p messaging in the light client package. This will require adding the evidence channel so the light client is capable of reporting evidence. We may also need to rethink the providers model (i.e. currently providers are only added on start up) +- Merge and clean up the tendermint stores (state, block and evidence). This ADR adds new methods to both the state and block store for saving headers, commits and validator sets. This doesn't quite fit with the current struct (i.e. only `BlockMeta`s instead of `Header`s are saved). We should explore consolidating this for the sake of atomicity and the opportunity for batching. There are also other areas for changes such as the way we store block parts. See [here](https://github.com/tendermint/tendermint/issues/5383) and [here](https://github.com/tendermint/tendermint/issues/4630) for more context. +- Explore opportunistic reverse sync. Technically we don't need to reverse sync if no evidence is observed. I've tried to design the protocol such that it could be possible to move it across to the evidence package if we see fit. Thus only when evidence is seen where we don't have the necessary data, do we perform a reverse sync. The problem with this is that imagine we are in consensus and some evidence pops up requiring us to first fetch and verify the last 10,000 blocks. There's no way a node could do this (sequentially) and vote before the round finishes. Also as we don't punish invalid evidence, a malicious node could easily spam the chain just to get a bunch of "stateless" nodes to perform a bunch of useless work. +- Explore full reverse sync. Currently we only fetch light blocks. There might be benefits in the future to fetch and persist entire blocks especially if we give control to the application to do this. + +## Consequences + +### Positive + +- All nodes should have sufficient history to validate all types of evidence +- State syncing nodes can use the p2p layer for light client verification of state. This has better UX and could be faster but I haven't benchmarked. + +### Negative + +- Introduces more code = more maintenance + +### Neutral + +## References + +- [Reverse Sync RFC](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-068-reverse-sync.md) +- [Original Issue](https://github.com/tendermint/tendermint/issues/5617) diff --git a/docs/architecture/adr-069-flexible-node-initialization.md b/docs/architecture/adr-069-flexible-node-initialization.md new file mode 100644 index 000000000..4e66d88d6 --- /dev/null +++ b/docs/architecture/adr-069-flexible-node-initialization.md @@ -0,0 +1,268 @@ +# ADR 069: Flexible Node Initialization + +## Changlog + +- 2021-06-09: Initial Draft (@tychoish) + +- 2021-07-21: Major Revision (@tychoish) + +## Status + +Proposed. + +## Context + +In an effort to support [Go-API-Stability](./adr-060-go-api-stability.md), +during the 0.35 development cycle, we have attempted to reduce the the API +surface area by moving most of the interface of the `node` package into +unexported functions, as well as moving the reactors to an `internal` +package. Having this coincide with the 0.35 release made a lot of sense +because these interfaces were _already_ changing as a result of the `p2p` +[refactor](./adr-061-p2p-refactor-scope.md), so it made sense to think a bit +more about how tendermint exposes this API. + +While the interfaces of the P2P layer and most of the node package are already +internalized, this precludes some operational patterns that are important to +users who use tendermint as a library. Specifically, introspecting the +tendermint node service and replacing components is not supported in the latest +version of the code, and some of these use cases would require maintaining a +vendor copy of the code. Adding these features requires rather extensive +(internal/implementation) changes to the `node` and `rpc` packages, and this +ADR describes a model for changing the way that tendermint nodes initialize, in +service of providing this kind of functionality. + +We consider node initialization, because the current implemention +provides strong connections between all components, as well as between +the components of the node and the RPC layer, and being able to think +about the interactions of these components will help enable these +features and help define the requirements of the node package. + +## Alternative Approaches + +These alternatives are presented to frame the design space and to +contextualize the decision in terms of product requirements. These +ideas are not inherently bad, and may even be possible or desireable +in the (distant) future, and merely provide additional context for how +we, in the moment came to our decision(s). + +### Do Nothing + +The current implementation is functional and sufficient for the vast +majority of use cases (e.g., all users of the Cosmos-SDK as well as +anyone who runs tendermint and the ABCI application in separate +processes). In the current implementation, and even previous versions, +modifying node initialization or injecting custom components required +copying most of the `node` package, which required such users +to maintain a vendored copy of tendermint. + +While this is (likely) not tenable in the long term, as users do want +more modularity, and the current service implementation is brittle and +difficult to maintain, in the short term it may be possible to delay +implementation somewhat. Eventually, however, we will need to make the +`node` package easier to maintain and reason about. + +### Generic Service Pluggability + +One possible system design would export interfaces (in the Golang +sense) for all components of the system, to permit runtime dependency +injection of all components in the system, so that users can compose +tendermint nodes of arbitrary user-supplied components. + +Although this level of customization would provide benefits, it would be a huge +undertaking (particularly with regards to API design work) that we do not have +scope for at the moment. Eventually providing support for some kinds of +pluggability may be useful, so the current solution does not explicitly +foreclose the possibility of this alternative. + +### Abstract Dependency Based Startup and Shutdown + +The main proposal in this document makes tendermint node initialization simpler +and more abstract, but the system lacks a number of +features which daemon/service initialization could provide, such as a +system allowing the authors of services to control initialization and shutdown order +of components using dependency relationships. + +Such a system could work by allowing services to declare +initialization order dependencies to other reactors (by ID, perhaps) +so that the node could decide the initialization based on the +dependencies declared by services rather than requiring the node to +encode this logic directly. + +This level of configuration is probably more complicated than is needed. Given +that the authors of components in the current implementation of tendermint +already *do* need to know about other components, a dependency-based system +would probably be overly-abstract at this stage. + +## Decisions + +- To the greatest extent possible, factor the code base so that + packages are responsible for their own initialization, and minimize + the amount of code in the `node` package itself. + +- As a design goal, reduce direct coupling and dependencies between + components in the implementation of `node`. + +- Begin iterating on a more-flexible internal framework for + initializing tendermint nodes to make the initatilization process + less hard-coded by the implementation of the node objects. + + - Reactors should not need to expose their interfaces *within* the + implementation of the node type + + - This refactoring should be entirely opaque to users. + + - These node initialization changes should not require a + reevaluation of the `service.Service` or a generic initialization + orchestration framework. + +- Do not proactively provide a system for injecting + components/services within a tendtermint node, though make it + possible to retrofit this kind of plugability in the future if + needed. + +- Prioritize implementation of p2p-based statesync reactor to obviate + need for users to inject a custom state-sync provider. + +## Detailed Design + +The [current +nodeImpl](https://github.com/tendermint/tendermint/blob/master/node/node.go#L47) +includes direct references to the implementations of each of the +reactors, which should be replaced by references to `service.Service` +objects. This will require moving construction of the [rpc +service](https://github.com/tendermint/tendermint/blob/master/node/node.go#L771) +into the constructor of +[makeNode](https://github.com/tendermint/tendermint/blob/master/node/node.go#L126). One +possible implementation of this would be to eliminate the current +`ConfigureRPC` method on the node package and instead [configure it +here](https://github.com/tendermint/tendermint/pull/6798/files#diff-375d57e386f20eaa5f09f02bb9d28bfc48ac3dca18d0325f59492208219e5618R441). + +To avoid adding complexity to the `node` package, we will add a +composite service implementation to the `service` package +that implements `service.Service` and is composed of a sequence of +underlying `service.Service` objects and handles their +startup/shutdown in the specified sequential order. + +Consensus, blocksync (*née* fast sync), and statesync all depend on +each other, and have significant initialization dependencies that are +presently encoded in the `node` package. As part of this change, a +new package/component (likely named `blocks` located at +`internal/blocks`) will encapsulate the initialization of these block +management areas of the code. + +### Injectable Component Option + +This section briefly describes a possible implementation for +user-supplied services running within a node. This should not be +implemented unless user-supplied components are a hard requirement for +a user. + +In order to allow components to be replaced, a new public function +will be added to the public interface of `node` with a signature that +resembles the following: + +```go +func NewWithServices(conf *config.Config, + logger log.Logger, + cf proxy.ClientCreator, + gen *types.GenesisDoc, + srvs []service.Service, +) (service.Service, error) { +``` + +The `service.Service` objects will be initialized in the order supplied, after +all pre-configured/default services have started (and shut down in reverse +order). The given services may implement additional interfaces, allowing them +to replace specific default services. `NewWithServices` will validate input +service lists with the following rules: + +- None of the services may already be running. +- The caller may not supply more than one replacement reactor for a given + default service type. + +If callers violate any of these rules, `NewWithServices` will return +an error. To retract support for this kind of operation in the future, +the function can be modified to *always* return an error. + +## Consequences + +### Positive + +- The node package will become easier to maintain. + +- It will become easier to add additional services within tendermint + nodes. + +- It will become possible to replace default components in the node + package without vendoring the tendermint repo and modifying internal + code. + +- The current end-to-end (e2e) test suite will be able to prevent any + regressions, and the new functionality can be thoroughly unit tested. + +- The scope of this project is very narrow, which minimizes risk. + +### Negative + +- This increases our reliance on the `service.Service` interface which + is probably not an interface that we want to fully commit to. + +- This proposal implements a fairly minimal set of functionality and + leaves open the possibility for many additional features which are + not included in the scope of this proposal. + +### Neutral + +N/A + +## Open Questions + +- To what extent does this new initialization framework need to accommodate + the legacy p2p stack? Would it be possible to delay a great deal of this + work to the 0.36 cycle to avoid this complexity? + + - Answer: _depends on timing_, and the requirement to ship pluggable reactors in 0.35. + +- Where should additional public types be exported for the 0.35 + release? + + Related to the general project of API stabilization we want to deprecate + the `types` package, and move its contents into a new `pkg` hierarchy; + however, the design of the `pkg` interface is currently underspecified. + If `types` is going to remain for the 0.35 release, then we should consider + the impact of using multiple organizing modalities for this code within a + single release. + +## Future Work + +- Improve or simplify the `service.Service` interface. There are some + pretty clear limitations with this interface as written (there's no + way to timeout slow startup or shut down, the cycle between the + `service.BaseService` and `service.Service` implementations is + troubling, the default panic in `OnReset` seems troubling.) + +- As part of the refactor of `service.Service` have all services/nodes + respect the lifetime of a `context.Context` object, and avoid the + current practice of creating `context.Context` objects in p2p and + reactor code. This would be required for in-process multi-tenancy. + +- Support explicit dependencies between components and allow for + parallel startup, so that different reactors can startup at the same + time, where possible. + +## References + +- [the component + graph](https://peter.bourgon.org/go-for-industrial-programming/#the-component-graph) + as a framing for internal service construction. + +## Appendix + +### Dependencies + +There's a relationship between the blockchain and consensus reactor +described by the following dependency graph makes replacing some of +these components more difficult relative to other reactors or +components. + +![consensus blockchain dependency graph](./img/consensus_blockchain.png) diff --git a/docs/architecture/adr-071-proposer-based-timestamps.md b/docs/architecture/adr-071-proposer-based-timestamps.md new file mode 100644 index 000000000..793f105b3 --- /dev/null +++ b/docs/architecture/adr-071-proposer-based-timestamps.md @@ -0,0 +1,333 @@ +# ADR 71: Proposer-Based Timestamps + +## Changelog + + - July 15 2021: Created by @williambanfield + - Aug 4 2021: Draft completed by @williambanfield + - Aug 5 2021: Draft updated to include data structure changes by @williambanfield + - Aug 20 2021: Language edits completed by @williambanfield + - Oct 25 2021: Update the ADR to match updated spec from @cason by @williambanfield + - Nov 10 2021: Additional language updates by @williambanfield per feedback from @cason + - Feb 2 2022: Synchronize logic for timely with latest version of the spec by @williambanfield + +## Status + + **Accepted** + +## Context + +Tendermint currently provides a monotonically increasing source of time known as [BFTTime](https://github.com/tendermint/tendermint/blob/master/spec/consensus/bft-time.md). +This mechanism for producing a source of time is reasonably simple. +Each correct validator adds a timestamp to each `Precommit` message it sends. +The timestamp it sends is either the validator's current known Unix time or one millisecond greater than the previous block time, depending on which value is greater. +When a block is produced, the proposer chooses the block timestamp as the weighted median of the times in all of the `Precommit` messages the proposer received. +The weighting is proportional to the amount of voting power, or stake, a validator has on the network. +This mechanism for producing timestamps is both deterministic and byzantine fault tolerant. + +This current mechanism for producing timestamps has a few drawbacks. +Validators do not have to agree at all on how close the selected block timestamp is to their own currently known Unix time. +Additionally, any amount of voting power `>1/3` may directly control the block timestamp. +As a result, it is quite possible that the timestamp is not particularly meaningful. + +These drawbacks present issues in the Tendermint protocol. +Timestamps are used by light clients to verify blocks. +Light clients rely on correspondence between their own currently known Unix time and the block timestamp to verify blocks they see; +However, their currently known Unix time may be greatly divergent from the block timestamp as a result of the limitations of `BFTTime`. + +The proposer-based timestamps specification suggests an alternative approach for producing block timestamps that remedies these issues. +Proposer-based timestamps alter the current mechanism for producing block timestamps in two main ways: + +1. The block proposer is amended to offer up its currently known Unix time as the timestamp for the next block instead of the `BFTTime`. +1. Correct validators only approve the proposed block timestamp if it is close enough to their own currently known Unix time. + +The result of these changes is a more meaningful timestamp that cannot be controlled by `<= 2/3` of the validator voting power. +This document outlines the necessary code changes in Tendermint to implement the corresponding [proposer-based timestamps specification](https://github.com/tendermint/tendermint/tree/master/spec/consensus/proposer-based-timestamp). + +## Alternative Approaches + +### Remove timestamps altogether + +Computer clocks are bound to skew for a variety of reasons. +Using timestamps in our protocol means either accepting the timestamps as not reliable or impacting the protocol’s liveness guarantees. +This design requires impacting the protocol’s liveness in order to make the timestamps more reliable. +An alternate approach is to remove timestamps altogether from the block protocol. +`BFTTime` is deterministic but may be arbitrarily inaccurate. +However, having a reliable source of time is quite useful for applications and protocols built on top of a blockchain. + +We therefore decided not to remove the timestamp. +Applications often wish for some transactions to occur on a certain day, on a regular period, or after some time following a different event. +All of these require some meaningful representation of agreed upon time. +The following protocols and application features require a reliable source of time: +* Tendermint Light Clients [rely on correspondence between their known time](https://github.com/tendermint/tendermint/blob/master/spec/light-client/verification/README.md#definitions-1) and the block time for block verification. +* Tendermint Evidence validity is determined [either in terms of heights or in terms of time](https://github.com/tendermint/tendermint/blob/8029cf7a0fcc89a5004e173ec065aa48ad5ba3c8/spec/consensus/evidence.md#verification). +* Unbonding of staked assets in the Cosmos Hub [occurs after a period of 21 days](https://github.com/cosmos/governance/blob/ce75de4019b0129f6efcbb0e752cd2cc9e6136d3/params-change/Staking.md#unbondingtime). +* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.44/ibc/overview.html#acknowledgements) + +Finally, inflation distribution in the Cosmos Hub uses an approximation of time to calculate an annual percentage rate. +This approximation of time is calculated using [block heights with an estimated number of blocks produced in a year](https://github.com/cosmos/governance/blob/master/params-change/Mint.md#blocksperyear). +Proposer-based timestamps will allow this inflation calculation to use a more meaningful and accurate source of time. + + +## Decision + +Implement proposer-based timestamps and remove `BFTTime`. + +## Detailed Design + +### Overview + +Implementing proposer-based timestamps will require a few changes to Tendermint’s code. +These changes will be to the following components: +* The `internal/consensus/` package. +* The `state/` package. +* The `Vote`, `CommitSig` and `Header` types. +* The consensus parameters. + +### Changes to `CommitSig` + +The [CommitSig](https://github.com/tendermint/tendermint/blob/a419f4df76fe4aed668a6c74696deabb9fe73211/types/block.go#L604) struct currently contains a timestamp. +This timestamp is the current Unix time known to the validator when it issued a `Precommit` for the block. +This timestamp is no longer used and will be removed in this change. + +`CommitSig` will be updated as follows: + +```diff +type CommitSig struct { + BlockIDFlag BlockIDFlag `json:"block_id_flag"` + ValidatorAddress Address `json:"validator_address"` +-- Timestamp time.Time `json:"timestamp"` + Signature []byte `json:"signature"` +} +``` + +### Changes to `Vote` messages + +`Precommit` and `Prevote` messages use a common [Vote struct](https://github.com/tendermint/tendermint/blob/a419f4df76fe4aed668a6c74696deabb9fe73211/types/vote.go#L50). +This struct currently contains a timestamp. +This timestamp is set using the [voteTime](https://github.com/tendermint/tendermint/blob/e8013281281985e3ada7819f42502b09623d24a0/internal/consensus/state.go#L2241) function and therefore vote times correspond to the current Unix time known to the validator, provided this time is greater than the timestamp of the previous block. +For precommits, this timestamp is used to construct the [CommitSig that is included in the block in the LastCommit](https://github.com/tendermint/tendermint/blob/e8013281281985e3ada7819f42502b09623d24a0/types/block.go#L754) field. +For prevotes, this field is currently unused. +Proposer-based timestamps will use the timestamp that the proposer sets into the block and will therefore no longer require that a timestamp be included in the vote messages. +This timestamp is therefore no longer useful as part of consensus and may optionally be dropped from the message. + +`Vote` will be updated as follows: + +```diff +type Vote struct { + Type tmproto.SignedMsgType `json:"type"` + Height int64 `json:"height"` + Round int32 `json:"round"` + BlockID BlockID `json:"block_id"` // zero if vote is nil. +-- Timestamp time.Time `json:"timestamp"` + ValidatorAddress Address `json:"validator_address"` + ValidatorIndex int32 `json:"validator_index"` + Signature []byte `json:"signature"` +} +``` + +### New consensus parameters + +The proposer-based timestamp specification includes a pair of new parameters that must be the same among all validators. +These parameters are `PRECISION`, and `MSGDELAY`. + +The `PRECISION` and `MSGDELAY` parameters are used to determine if the proposed timestamp is acceptable. +A validator will only Prevote a proposal if the proposal timestamp is considered `timely`. +A proposal timestamp is considered `timely` if it is within `PRECISION` and `MSGDELAY` of the Unix time known to the validator. +More specifically, a proposal timestamp is `timely` if `proposalTimestamp - PRECISION ≤ validatorLocalTime ≤ proposalTimestamp + PRECISION + MSGDELAY`. + +Because the `PRECISION` and `MSGDELAY` parameters must be the same across all validators, they will be added to the [consensus parameters](https://github.com/tendermint/spec/blob/master/proto/tendermint/types/params.proto#L11) as [durations](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Duration). + +The consensus parameters will be updated to include this `Synchrony` field as follows: + +```diff +type ConsensusParams struct { + Block BlockParams `json:"block"` + Evidence EvidenceParams `json:"evidence"` + Validator ValidatorParams `json:"validator"` + Version VersionParams `json:"version"` +++ Synchrony SynchronyParams `json:"synchrony"` +} +``` + +```go +type SynchronyParams struct { + MessageDelay time.Duration `json:"message_delay"` + Precision time.Duration `json:"precision"` +} +``` + +### Changes to the block proposal step + +#### Proposer selects block timestamp + +Tendermint currently uses the `BFTTime` algorithm to produce the block's `Header.Timestamp`. +The [proposal logic](https://github.com/tendermint/tendermint/blob/68ca65f5d79905abd55ea999536b1a3685f9f19d/internal/state/state.go#L269) sets the weighted median of the times in the `LastCommit.CommitSigs` as the proposed block's `Header.Timestamp`. + +In proposer-based timestamps, the proposer will still set a timestamp into the `Header.Timestamp`. +The timestamp the proposer sets into the `Header` will change depending on if the block has previously received a [polka](https://github.com/tendermint/tendermint/blob/053651160f496bb44b107a434e3e6482530bb287/docs/introduction/what-is-tendermint.md#consensus-overview) or not. + +#### Proposal of a block that has not previously received a polka + +If a proposer is proposing a new block then it will set the Unix time currently known to the proposer into the `Header.Timestamp` field. +The proposer will also set this same timestamp into the `Timestamp` field of the `Proposal` message that it issues. + +#### Re-proposal of a block that has previously received a polka + +If a proposer is re-proposing a block that has previously received a polka on the network, then the proposer does not update the `Header.Timestamp` of that block. +Instead, the proposer simply re-proposes the exact same block. +This way, the proposed block has the exact same block ID as the previously proposed block and the validators that have already received that block do not need to attempt to receive it again. + +The proposer will set the re-proposed block's `Header.Timestamp` as the `Proposal` message's `Timestamp`. + +#### Proposer waits + +Block timestamps must be monotonically increasing. +In `BFTTime`, if a validator’s clock was behind, the [validator added 1 millisecond to the previous block’s time and used that in its vote messages](https://github.com/tendermint/tendermint/blob/e8013281281985e3ada7819f42502b09623d24a0/internal/consensus/state.go#L2246). +A goal of adding proposer-based timestamps is to enforce some degree of clock synchronization, so having a mechanism that completely ignores the Unix time of the validator time no longer works. +Validator clocks will not be perfectly in sync. +Therefore, the proposer’s current known Unix time may be less than the previous block's `Header.Time`. +If the proposer’s current known Unix time is less than the previous block's `Header.Time`, the proposer will sleep until its known Unix time exceeds it. + +This change will require amending the [defaultDecideProposal](https://github.com/tendermint/tendermint/blob/822893615564cb20b002dd5cf3b42b8d364cb7d9/internal/consensus/state.go#L1180) method. +This method should now schedule a timeout that fires when the proposer’s time is greater than the previous block's `Header.Time`. +When the timeout fires, the proposer will finally issue the `Proposal` message. + +### Changes to proposal validation rules + +The rules for validating a proposed block will be modified to implement proposer-based timestamps. +We will change the validation logic to ensure that a proposal is `timely`. + +Per the proposer-based timestamps spec, `timely` only needs to be checked if a block has not received a +2/3 majority of `Prevotes` in a round. +If a block previously received a +2/3 majority of prevotes in a previous round, then +2/3 of the voting power considered the block's timestamp near enough to their own currently known Unix time in that round. + +The validation logic will be updated to check `timely` for blocks that did not previously receive +2/3 prevotes in a round. +Receiving +2/3 prevotes in a round is frequently referred to as a 'polka' and we will use this term for simplicity. + +#### Current timestamp validation logic + +To provide a better understanding of the changes needed to timestamp validation, we will first detail how timestamp validation works currently in Tendermint. + +The [validBlock function](https://github.com/tendermint/tendermint/blob/c3ae6f5b58e07b29c62bfdc5715b6bf8ae5ee951/state/validation.go#L14) currently [validates the proposed block timestamp in three ways](https://github.com/tendermint/tendermint/blob/c3ae6f5b58e07b29c62bfdc5715b6bf8ae5ee951/state/validation.go#L118). +First, the validation logic checks that this timestamp is greater than the previous block’s timestamp. + +Second, it validates that the block timestamp is correctly calculated as the weighted median of the timestamps in the [block’s LastCommit](https://github.com/tendermint/tendermint/blob/e8013281281985e3ada7819f42502b09623d24a0/types/block.go#L48). + +Finally, the validation logic authenticates the timestamps in the `LastCommit.CommitSig`. +The cryptographic signature in each `CommitSig` is created by signing a hash of fields in the block with the voting validator’s private key. +One of the items in this `signedBytes` hash is the timestamp in the `CommitSig`. +To authenticate the `CommitSig` timestamp, the validator authenticating votes builds a hash of fields that includes the `CommitSig` timestamp and checks this hash against the signature. +This takes place in the [VerifyCommit function](https://github.com/tendermint/tendermint/blob/e8013281281985e3ada7819f42502b09623d24a0/types/validation.go#L25). + +#### Remove unused timestamp validation logic + +`BFTTime` validation is no longer applicable and will be removed. +This means that validators will no longer check that the block timestamp is a weighted median of `LastCommit` timestamps. +Specifically, we will remove the call to [MedianTime in the validateBlock function](https://github.com/tendermint/tendermint/blob/4db71da68e82d5cb732b235eeb2fd69d62114b45/state/validation.go#L117). +The `MedianTime` function can be completely removed. + +Since `CommitSig`s will no longer contain a timestamp, the validator authenticating a commit will no longer include the `CommitSig` timestamp in the hash of fields it builds to check against the cryptographic signature. + +#### Timestamp validation when a block has not received a polka + +The [POLRound](https://github.com/tendermint/tendermint/blob/68ca65f5d79905abd55ea999536b1a3685f9f19d/types/proposal.go#L29) in the `Proposal` message indicates which round the block received a polka. +A negative value in the `POLRound` field indicates that the block has not previously been proposed on the network. +Therefore the validation logic will check for timely when `POLRound < 0`. + +When a validator receives a `Proposal` message, the validator will check that the `Proposal.Timestamp` is at most `PRECISION` greater than the current Unix time known to the validator, and at maximum `PRECISION + MSGDELAY` less than the current Unix time known to the validator. +If the timestamp is not within these bounds, the proposed block will not be considered `timely`. + +Once a full block matching the `Proposal` message is received, the validator will also check that the timestamp in the `Header.Timestamp` of the block matches this `Proposal.Timestamp`. +Using the `Proposal.Timestamp` to check `timely` allows for the `MSGDELAY` parameter to be more finely tuned since `Proposal` messages do not change sizes and are therefore faster to gossip than full blocks across the network. + +A validator will also check that the proposed timestamp is greater than the timestamp of the block for the previous height. +If the timestamp is not greater than the previous block's timestamp, the block will not be considered valid, which is the same as the current logic. + +#### Timestamp validation when a block has received a polka + +When a block is re-proposed that has already received a +2/3 majority of `Prevote`s on the network, the `Proposal` message for the re-proposed block is created with a `POLRound` that is `>= 0`. +A validator will not check that the `Proposal` is `timely` if the propose message has a non-negative `POLRound`. +If the `POLRound` is non-negative, each validator will simply ensure that it received the `Prevote` messages for the proposed block in the round indicated by `POLRound`. + +If the validator does not receive `Prevote` messages for the proposed block before the proposal timeout, then it will prevote nil. +Validators already check that +2/3 prevotes were seen in `POLRound`, so this does not represent a change to the prevote logic. + +A validator will also check that the proposed timestamp is greater than the timestamp of the block for the previous height. +If the timestamp is not greater than the previous block's timestamp, the block will not be considered valid, which is the same as the current logic. + +Additionally, this validation logic can be updated to check that the `Proposal.Timestamp` matches the `Header.Timestamp` of the proposed block, but it is less relevant since checking that votes were received is sufficient to ensure the block timestamp is correct. + +#### Relaxation of the 'Timely' check + +The `Synchrony` parameters, `MessageDelay` and `Precision` provide a means to bound the timestamp of a proposed block. +Selecting values that are too small presents a possible liveness issue for the network. +If a Tendermint network selects a `MessageDelay` parameter that does not accurately reflect the time to broadcast a proposal message to all of the validators on the network, nodes will begin rejecting proposals from otherwise correct proposers because these proposals will appear to be too far in the past. + +`MessageDelay` and `Precision` are planned to be configured as `ConsensusParams`. +A very common way to update `ConsensusParams` is by executing a transaction included in a block that specifies new values for them. +However, if the network is unable to produce blocks because of this liveness issue, no such transaction may be executed. +To prevent this dangerous condition, we will add a relaxation mechanism to the `Timely` predicate. +If consensus takes more than 10 rounds to produce a block for any reason, the `MessageDelay` will be doubled. +This doubling will continue for each subsequent 10 rounds of consensus. +This will enable chains that selected too small of a value for the `MessageDelay` parameter to eventually issue a transaction and readjust the parameters to more accurately reflect the broadcast time. + +This liveness issue is not as problematic for chains with very small `Precision` values. +Operators can more easily readjust local validator clocks to be more aligned. +Additionally, chains that wish to increase a small `Precision` value can still take advantage of the `MessageDelay` relaxation, waiting for the `MessageDelay` value to grow significantly and issuing proposals with timestamps that are far in the past of their peers. + +For more discussion of this, see [issue 371](https://github.com/tendermint/spec/issues/371). + +### Changes to the prevote step + +Currently, a validator will prevote a proposal in one of three cases: + +* Case 1: Validator has no locked block and receives a valid proposal. +* Case 2: Validator has a locked block and receives a valid proposal matching its locked block. +* Case 3: Validator has a locked block, sees a valid proposal not matching its locked block but sees +2/3 prevotes for the proposal’s block, either in the current round or in a round greater than or equal to the round in which it locked its locked block. + +The only change we will make to the prevote step is to what a validator considers a valid proposal as detailed above. + +### Changes to the precommit step + +The precommit step will not require much modification. +Its proposal validation rules will change in the same ways that validation will change in the prevote step with the exception of the `timely` check: precommit validation will never check that the timestamp is `timely`. + +### Remove voteTime Completely + +[voteTime](https://github.com/tendermint/tendermint/blob/822893615564cb20b002dd5cf3b42b8d364cb7d9/internal/consensus/state.go#L2229) is a mechanism for calculating the next `BFTTime` given both the validator's current known Unix time and the previous block timestamp. +If the previous block timestamp is greater than the validator's current known Unix time, then voteTime returns a value one millisecond greater than the previous block timestamp. +This logic is used in multiple places and is no longer needed for proposer-based timestamps. +It should therefore be removed completely. + +## Future Improvements + +* Implement BLS signature aggregation. +By removing fields from the `Precommit` messages, we are able to aggregate signatures. + +## Consequences + +### Positive + +* `<2/3` of validators can no longer influence block timestamps. +* Block timestamp will have stronger correspondence to real time. +* Improves the reliability of light client block verification. +* Enables BLS signature aggregation. +* Enables evidence handling to use time instead of height for evidence validity. + +### Neutral + +* Alters Tendermint’s liveness properties. +Liveness now requires that all correct validators have synchronized clocks within a bound. +Liveness will now also require that validators’ clocks move forward, which was not required under `BFTTime`. + +### Negative + +* May increase the length of the propose step if there is a large skew between the previous proposer and the current proposer’s local Unix time. +This skew will be bound by the `PRECISION` value, so it is unlikely to be too large. + +* Current chains with block timestamps far in the future will either need to pause consensus until after the erroneous block timestamp or must maintain synchronized but very inaccurate clocks. + +## References + +* [PBTS Spec](https://github.com/tendermint/tendermint/tree/master/spec/consensus/proposer-based-timestamp) +* [BFTTime spec](https://github.com/tendermint/spec/blob/master/spec/consensus/bft-time.md) +* [Issue 371](https://github.com/tendermint/spec/issues/371) diff --git a/docs/architecture/adr-072-request-for-comments.md b/docs/architecture/adr-072-request-for-comments.md new file mode 100644 index 000000000..7b656d05e --- /dev/null +++ b/docs/architecture/adr-072-request-for-comments.md @@ -0,0 +1,105 @@ +# ADR 72: Restore Requests for Comments + +## Changelog + +- 20-Aug-2021: Initial draft (@creachadair) + +## Status + +Implemented + +## Context + +In the past, we kept a collection of Request for Comments (RFC) documents in `docs/rfc`. +Prior to the creation of the ADR process, these documents were used to document +design and implementation decisions about Tendermint Core. The RFC directory +was removed in favor of ADRs, in commit 3761aa69 (PR +[\#6345](https://github.com/tendermint/tendermint/pull/6345)). + +For issues where an explicit design decision or implementation change is +required, an ADR is generally preferable to an open-ended RFC: An ADR is +relatively narrowly-focused, identifies a specific design or implementation +question, and documents the consensus answer to that question. + +Some discussions are more open-ended, however, or don't require a specific +decision to be made (yet). Such conversations are still valuable to document, +and several members of the Tendermint team have been doing so by writing gists +or Google docs to share them around. That works well enough in the moment, but +gists do not support any kind of collaborative editing, and both gists and docs +are hard to discover after the fact. Google docs have much better collaborative +editing, but are worse for discoverability, especially when contributors span +different Google accounts. + +Discoverability is important, because these kinds of open-ended discussions are +useful to people who come later -- either as new team members or as outside +contributors seeking to use and understand the thoughts behind our designs and +the architectural decisions that arose from those discussion. + +With these in mind, I propose that: + +- We re-create a new, initially empty `docs/rfc` directory in the repository, + and use it to capture these kinds of open-ended discussions in supplement to + ADRs. + +- Unlike in the previous RFC scheme, documents in this new directory will + _not_ be used directly for decision-making. This is the key difference + between an RFC and an ADR. + + Instead, an RFC will exist to document background, articulate general + principles, and serve as a historical record of discussion and motivation. + + In this system, an RFC may _only_ result in a decision indirectly, via ADR + documents created in response to the RFC. + + **In short:** If a decision is required, write an ADR; otherwise if a + sufficiently broad discussion is needed, write an RFC. + +Just so that there is a consistent format, I also propose that: + +- RFC files are named `rfc-XXX-title.{md,rst,txt}` and are written in plain + text, Markdown, or ReStructured Text. + +- Like an ADR, an RFC should include a high-level change log at the top of the + document, and sections for: + + * Abstract: A brief, high-level synopsis of the topic. + * Background: Any background necessary to understand the topic. + * Discussion: Detailed discussion of the issue being considered. + +- Unlike an ADR, an RFC does _not_ include sections for Decisions, Detailed + Design, or evaluation of proposed solutions. If an RFC leads to a proposal + for an actual architectural change, that must be recorded in an ADR in the + usual way, and may refer back to the RFC in its References section. + +## Alternative Approaches + +Leaving aside implementation details, the main alternative to this proposal is +to leave things as they are now, with ADRs as the only log of record and other +discussions being held informally in whatever medium is convenient at the time. + +## Decision + +(pending) + +## Detailed Design + +- Create a new `docs/rfc` directory in the `tendermint` repository. Note that + this proposal intentionally does _not_ pull back the previous contents of + that path from Git history, as those documents were appropriately merged into + the ADR process. + +- Create a `README.md` for RFCs that explains the rules and their relationship + to ADRs. + +- Create an `rfc-template.md` file for RFC files. + +## Consequences + +### Positive + +- We will have a more discoverable place to record open-ended discussions that + do not immediately result in a design change. + +### Negative + +- Potentially some people could be confused about the RFC/ADR distinction. diff --git a/docs/architecture/adr-073-libp2p.md b/docs/architecture/adr-073-libp2p.md new file mode 100644 index 000000000..080fecbcd --- /dev/null +++ b/docs/architecture/adr-073-libp2p.md @@ -0,0 +1,235 @@ +# ADR 073: Adopt LibP2P + +## Changelog + +- 2021-11-02: Initial Draft (@tychoish) + +## Status + +Proposed. + +## Context + + +As part of the 0.35 development cycle, the Tendermint team completed +the first phase of the work described in ADRs 61 and 62, which included a +large scale refactoring of the reactors and the p2p message +routing. This replaced the switch and many of the other legacy +components without breaking protocol or network-level +interoperability and left the legacy connection/socket handling code. + +Following the release, the team has reexamined the state of the code +and the design, as well as Tendermint's requirements. The notes +from that process are available in the [P2P Roadmap +RFC][rfc]. + +This ADR supersedes the decisions made in ADRs 60 and 61, but +builds on the completed portions of this work. Previously, the +boundaries of peer management, message handling, and the higher level +business logic (e.g., "the reactors") were intermingled, and core +elements of the p2p system were responsible for the orchestration of +higher-level business logic. Refactoring the legacy components +made it more obvious that this entanglement of responsibilities +had outsized influence on the entire implementation, making +it difficult to iterate within the current abstractions. +It would not be viable to maintain interoperability with legacy +systems while also achieving many of our broader objectives. + +LibP2P is a thoroughly-specified implementation of a peer-to-peer +networking stack, designed specifically for systems such as +ours. Adopting LibP2P as the basis of Tendermint will allow the +Tendermint team to focus more of their time on other differentiating +aspects of the system, and make it possible for the ecosystem as a +whole to take advantage of tooling and efforts of the LibP2P +platform. + +## Alternative Approaches + +As discussed in the [P2P Roadmap RFC][rfc], the primary alternative would be to +continue development of Tendermint's home-grown peer-to-peer +layer. While that would give the Tendermint team maximal control +over the peer system, the current design is unexceptional on its +own merits, and the prospective maintenance burden for this system +exceeds our tolerances for the medium term. + +Tendermint can and should differentiate itself not on the basis of +its networking implementation or peer management tools, but providing +a consistent operator experience, a battle-tested consensus algorithm, +and an ergonomic user experience. + +## Decision + +Tendermint will adopt libp2p during the 0.37 development cycle, +replacing the bespoke Tendermint P2P stack. This will remove the +`Endpoint`, `Transport`, `Connection`, and `PeerManager` abstractions +and leave the reactors, `p2p.Router` and `p2p.Channel` +abstractions. + +LibP2P may obviate the need for a dedicated peer exchange (PEX) +reactor, which would also in turn obviate the need for a dedicated +seed mode. If this is the case, then all of this functionality would +be removed. + +If it turns out (based on the advice of Protocol Labs) that it makes +sense to maintain separate pubsub or gossipsub topics +per-message-type, then the `Router` abstraction could also +be entirely subsumed. + +## Detailed Design + +### Implementation Changes + +The seams in the P2P implementation between the higher level +constructs (reactors), the routing layer (`Router`) and the lower +level connection and peer management code make this operation +relatively straightforward to implement. A key +goal in this design is to minimize the impact on the reactors +(potentially entirely,) and completely remove the lower level +components (e.g., `Transport`, `Connection` and `PeerManager`) using the +separation afforded by the `Router` layer. The current state of the +code makes these changes relatively surgical, and limited to a small +number of methods: + +- `p2p.Router.OpenChannel` will still return a `Channel` structure + which will continue to serve as a pipe between the reactors and the + `Router`. The implementation will no longer need the queue + implementation, and will instead start goroutines that + are responsible for routing the messages from the channel to libp2p + fundamentals, replacing the current `p2p.Router.routeChannel`. + +- The current `p2p.Router.dialPeers` and `p2p.Router.acceptPeers`, + are responsible for establishing outbound and inbound connections, + respectively. These methods will be removed, along with + `p2p.Router.openConnection`, and the libp2p connection manager will + be responsible for maintaining network connectivity. + +- The `p2p.Channel` interface will change to replace Go + channels with a more functional interface for sending messages. + New methods on this object will take contexts to support safe + cancellation, and return errors, and will block rather than + running asynchronously. The `Out` channel through which + reactors send messages to Peers, will be replaced by a `Send` + method, and the Error channel will be replaced by an `Error` + method. + +- Reactors will be passed an interface that will allow them to + access Peer information from libp2p. This will supplant the + `p2p.PeerUpdates` subscription. + +- Add some kind of heartbeat message at the application level + (e.g. with a reactor,) potentially connected to libp2p's DHT to be + used by reactors for service discovery, message targeting, or other + features. + +- Replace the existing/legacy handshake protocol with [Noise](http://www.noiseprotocol.org/noise.html). + +This project will initially use the TCP-based transport protocols within +libp2p. QUIC is also available as an option that we may implement later. +We will not support mixed networks in the initial release, but will +revisit that possibility later if there is a demonstrated need. + +### Upgrade and Compatibility + +Because the routers and all current P2P libraries are `internal` +packages and not part of the public API, the only changes to the public +API surface area of Tendermint will be different configuration +file options, replacing the current P2P options with options relevant +to libp2p. + +However, it will not be possible to run a network with both networking +stacks active at once, so the upgrade to the version of Tendermint +will need to be coordinated between all nodes of the network. This is +consistent with the expectations around upgrades for Tendermint moving +forward, and will help manage both the complexity of the project and +the implementation timeline. + +## Open Questions + +- What is the role of Protocol Labs in the implementation of libp2p in + tendermint, both during the initial implementation and on an ongoing + basis thereafter? + +- Should all P2P traffic for a given node be pushed to a single topic, + so that a topic maps to a specific ChainID, or should + each reactor (or type of message) have its own topic? How many + topics can a libp2p network support? Is there testing that validates + the capabilities? + +- Tendermint presently provides a very coarse QoS-like functionality + using priorities based on message-type. + This intuitively/theoretically ensures that evidence and consensus + messages don't get starved by blocksync/statesync messages. It's + unclear if we can or should attempt to replicate this with libp2p. + +- What kind of QoS functionality does libp2p provide and what kind of + metrics does libp2p provide about it's QoS functionality? + +- Is it possible to store additional (and potentially arbitrary) + information into the DHT as part of the heartbeats between nodes, + such as the latest height, and then access that in the + reactors. How frequently can the DHT be updated? + +- Does it make sense to have reactors continue to consume inbound + messages from a Channel (`In`) or is there another interface or + pattern that we should consider? + + - We should avoid exposing Go channels when possible, and likely + some kind of alternate iterator likely makes sense for processing + messages within the reactors. + +- What are the security and protocol implications of tracking + information from peer heartbeats and exposing that to reactors? + +- How much (or how little) configuration can Tendermint provide for + libp2p, particularly on the first release? + + - In general, we should not support byo-functionality for libp2p + components within Tendermint, and reduce the configuration surface + area, as much as possible. + +- What are the best ways to provide request/response semantics for + reactors on top of libp2p? Will it be possible to add + request/response semantics in a future release or is there + anticipatory work that needs to be done as part of the initial + release? + +## Consequences + +### Positive + +- Reduce the maintenance burden for the Tendermint Core team by + removing a large swath of legacy code that has proven to be + difficult to modify safely. + +- Remove the responsibility for maintaining and developing the entire + peer management system (p2p) and stack. + +- Provide users with a more stable peer and networking system, + Tendermint can improve operator experience and network stability. + +### Negative + +- By deferring to library implementations for peer management and + networking, Tendermint loses some flexibility for innovating at the + peer and networking level. However, Tendermint should be innovating + primarily at the consensus layer, and libp2p does not preclude + optimization or development in the peer layer. + +- Libp2p is a large dependency and Tendermint would become dependent + upon Protocol Labs' release cycle and prioritization for bug + fixes. If this proves onerous, it's possible to maintain a vendor + fork of relevant components as needed. + +### Neutral + +- N/A + +## References + +- [ADR 61: P2P Refactor Scope][adr61] +- [ADR 62: P2P Architecture][adr62] +- [P2P Roadmap RFC][rfc] + +[adr61]: ./adr-061-p2p-refactor-scope.md +[adr62]: ./adr-062-p2p-architecture.md +[rfc]: ../rfc/rfc-000-p2p-roadmap.rst diff --git a/docs/architecture/adr-074-timeout-params.md b/docs/architecture/adr-074-timeout-params.md new file mode 100644 index 000000000..22fd784bd --- /dev/null +++ b/docs/architecture/adr-074-timeout-params.md @@ -0,0 +1,203 @@ +# ADR 74: Migrate Timeout Parameters to Consensus Parameters + +## Changelog + +- 03-Jan-2022: Initial draft (@williambanfield) +- 13-Jan-2022: Updated to indicate work on upgrade path needed (@williambanfield) + +## Status + +Proposed + +## Context + +### Background + +Tendermint's consensus timeout parameters are currently configured locally by each validator +in the validator's [config.toml][config-toml]. +This means that the validators on a Tendermint network may have different timeouts +from each other. There is no reason for validators on the same network to configure +different timeout values. Proper functioning of the Tendermint consensus algorithm +relies on these parameters being uniform across validators. + +The configurable values are as follows: + +* `TimeoutPropose` + * How long the consensus algorithm waits for a proposal block before issuing a prevote. + * If no prevote arrives by `TimeoutPropose`, then the consensus algorithm will issue a nil prevote. +* `TimeoutProposeDelta` + * How much the `TimeoutPropose` grows each round. +* `TimeoutPrevote` + * How long the consensus algorithm waits after receiving +2/3 prevotes with + no quorum for a value before issuing a precommit for nil. + (See the [arXiv paper][arxiv-paper], Algorithm 1, Line 34) +* `TimeoutPrevoteDelta` + * How much the `TimeoutPrevote` increases with each round. +* `TimeoutPrecommit` + * How long the consensus algorithm waits after receiving +2/3 precommits that + do not have a quorum for a value before entering the next round. + (See the [arXiv paper][arxiv-paper], Algorithm 1, Line 47) +* `TimeoutPrecommitDelta` + * How much the `TimeoutPrecommit` increases with each round. +* `TimeoutCommit` + * How long the consensus algorithm waits after committing a block but before starting the new height. + * This gives a validator a chance to receive slow precommits. +* `SkipTimeoutCommit` + * Make progress as soon as the node has 100% of the precommits. + + +### Overview of Change + +We will consolidate the timeout parameters and migrate them from the node-local +`config.toml` file into the network-global consensus parameters. + +The 8 timeout parameters will be consolidated down to 6. These will be as follows: + +* `TimeoutPropose` + * Same as current `TimeoutPropose`. +* `TimeoutProposeDelta` + * Same as current `TimeoutProposeDelta`. +* `TimeoutVote` + * How long validators wait for votes in both the prevote + and precommit phase of the consensus algorithm. This parameter subsumes + the current `TimeoutPrevote` and `TimeoutPrecommit` parameters. +* `TimeoutVoteDelta` + * How much the `TimeoutVote` will grow each successive round. + This parameter subsumes the current `TimeoutPrevoteDelta` and `TimeoutPrecommitDelta` + parameters. +* `TimeoutCommit` + * Same as current `TimeoutCommit`. +* `BypassCommitTimeout` + * Same as current `SkipTimeoutCommit`, renamed for clarity. + +A safe default will be provided by Tendermint for each of these parameters and +networks will be able to update the parameters as they see fit. Local updates +to these parameters will no longer be possible; instead, the application will control +updating the parameters. Applications using the Cosmos SDK will be automatically be +able to change the values of these consensus parameters [via a governance proposal][cosmos-sdk-consensus-params]. + +This change is low-risk. While parameters are locally configurable, many running chains +do not change them from their default values. For example, initializing +a node on Osmosis, Terra, and the Cosmos Hub using the their `init` command produces +a `config.toml` with Tendermint's default values for these parameters. + +### Why this parameter consolidation? + +Reducing the number of parameters is good for UX. Fewer superfluous parameters makes +running and operating a Tendermint network less confusing. + +The Prevote and Precommit messages are both similar sizes, require similar amounts +of processing so there is no strong need for them to be configured separately. + +The `TimeoutPropose` parameter governs how long Tendermint will wait for the proposed +block to be gossiped. Blocks are much larger than votes and therefore tend to be +gossiped much more slowly. It therefore makes sense to keep `TimeoutPropose` and +the `TimeoutProposeDelta` as parameters separate from the vote timeouts. + +`TimeoutCommit` is used by chains to ensure that the network waits for the votes from +slower validators before proceeding to the next height. Without this timeout, the votes +from slower validators would consistently not be included in blocks and those validators +would not be counted as 'up' from the chain's perspective. Being down damages a validator's +reputation and causes potential stakers to think twice before delegating to that validator. + +`TimeoutCommit` also prevents the network from producing the next height as soon as validators +on the fastest hardware with a summed voting power of +2/3 of the network's total have +completed execution of the block. Allowing the network to proceed as soon as the fastest ++2/3 completed execution would have a cumulative effect over heights, eventually +leaving slower validators unable to participate in consensus at all. `TimeoutCommit` +therefore allows networks to have greater variability in hardware. Additional +discussion of this can be found in [tendermint issue 5911][tendermint-issue-5911-comment] +and [spec issue 359][spec-issue-359]. + +## Alternative Approaches + +### Hardcode the parameters + +Many Tendermint networks run on similar cloud-hosted infrastructure. Therefore, +they have similar bandwidth and machine resources. The timings for propagating votes +and blocks are likely to be reasonably similar across networks. As a result, the +timeout parameters are good candidates for being hardcoded. Hardcoding the timeouts +in Tendermint would mean entirely removing these parameters from any configuration +that could be altered by either an application or a node operator. Instead, +Tendermint would ship with a set of timeouts and all applications using Tendermint +would use this exact same set of values. + +While Tendermint nodes often run with similar bandwidth and on similar cloud-hosted +machines, there are enough points of variability to make configuring +consensus timeouts meaningful. Namely, Tendermint network topologies are likely to be +very different from chain to chain. Additionally, applications may vary greatly in +how long the `Commit` phase may take. Applications that perform more work during `Commit` +require a longer `TimeoutCommit` to allow the application to complete its work +and be prepared for the next height. + +## Decision + +The decision has been made to implement this work, with the caveat that the +specific mechanism for introducing the new parameters to chains is still ongoing. + +## Detailed Design + +### New Consensus Parameters + +A new `TimeoutParams` `message` will be added to the [params.proto file][consensus-params-proto]. +This message will have the following form: + +```proto +message TimeoutParams { + google.protobuf.Duration propose = 1; + google.protobuf.Duration propose_delta = 2; + google.protobuf.Duration vote = 3; + google.protobuf.Duration vote_delta = 4; + google.protobuf.Duration commit = 5; + bool bypass_commit_timeout = 6; +} +``` + +This new message will be added as a field into the [`ConsensusParams` +message][consensus-params-proto]. The same default values that are [currently +set for these parameters][current-timeout-defaults] in the local configuration +file will be used as the defaults for these new consensus parameters in the +[consensus parameter defaults][default-consensus-params]. + +The new consensus parameters will be subject to the same +[validity rules][time-param-validation] as the current configuration values, +namely, each value must be non-negative. + +### Migration + +The new `ConsensusParameters` will be added during an upcoming release. In this +release, the old `config.toml` parameters will cease to control the timeouts and +an error will be logged on nodes that continue to specify these values. The specific +mechanism by which these parameters will added to a chain is being discussed in +[RFC-009][rfc-009] and will be decided ahead of the next release. + +The specific mechanism for adding these parameters depends on work related to +[soft upgrades][soft-upgrades], which is still ongoing. + +## Consequences + +### Positive + +* Timeout parameters will be equal across all of the validators in a Tendermint network. +* Remove superfluous timeout parameters. + +### Negative + +### Neutral + +* Timeout parameters require consensus to change. + +## References + +[conseusus-params-proto]: https://github.com/tendermint/spec/blob/a00de7199f5558cdd6245bbbcd1d8405ccfb8129/proto/tendermint/types/params.proto#L11 +[hashed-params]: https://github.com/tendermint/tendermint/blob/7cdf560173dee6773b80d1c574a06489d4c394fe/types/params.go#L49 +[default-consensus-params]: https://github.com/tendermint/tendermint/blob/7cdf560173dee6773b80d1c574a06489d4c394fe/types/params.go#L79 +[current-timeout-defaults]: https://github.com/tendermint/tendermint/blob/7cdf560173dee6773b80d1c574a06489d4c394fe/config/config.go#L955 +[config-toml]: https://github.com/tendermint/tendermint/blob/5cc980698a3402afce76b26693ab54b8f67f038b/config/toml.go#L425-L440 +[cosmos-sdk-consensus-params]: https://github.com/cosmos/cosmos-sdk/issues/6197 +[time-param-validation]: https://github.com/tendermint/tendermint/blob/7cdf560173dee6773b80d1c574a06489d4c394fe/config/config.go#L1038 +[tendermint-issue-5911-comment]: https://github.com/tendermint/tendermint/issues/5911#issuecomment-973560381 +[spec-issue-359]: https://github.com/tendermint/spec/issues/359 +[arxiv-paper]: https://arxiv.org/pdf/1807.04938.pdf +[soft-upgrades]: https://github.com/tendermint/spec/pull/222 +[rfc-009]: https://github.com/tendermint/tendermint/pull/7524 diff --git a/docs/architecture/adr-075-rpc-subscription.md b/docs/architecture/adr-075-rpc-subscription.md new file mode 100644 index 000000000..1bb53f337 --- /dev/null +++ b/docs/architecture/adr-075-rpc-subscription.md @@ -0,0 +1,684 @@ +# ADR 075: RPC Event Subscription Interface + +## Changelog + +- 01-Mar-2022: Update long-polling interface (@creachadair). +- 10-Feb-2022: Updates to reflect implementation. +- 26-Jan-2022: Marked accepted. +- 22-Jan-2022: Updated and expanded (@creachadair). +- 20-Nov-2021: Initial draft (@creachadair). + +--- +## Status + +Accepted + +--- +## Background & Context + +For context, see [RFC 006: Event Subscription][rfc006]. + +The [Tendermint RPC service][rpc-service] permits clients to subscribe to the +event stream generated by a consensus node. This allows clients to observe the +state of the consensus network, including details of the consensus algorithm +state machine, proposals, transaction delivery, and block completion. The +application may also attach custom key-value attributes to events to expose +application-specific details to clients. + +The event subscription API in the RPC service currently comprises three methods: + +1. `subscribe`: A request to subscribe to the events matching a specific + [query expression][query-grammar]. Events can be filtered by their key-value + attributes, including custom attributes provided by the application. + +2. `unsubscribe`: A request to cancel an existing subscription based on its + query expression. + +3. `unsubscribe_all`: A request to cancel all existing subscriptions belonging + to the client. + +There are some important technical and UX issues with the current RPC event +subscription API. The rest of this ADR outlines these problems in detail, and +proposes a new API scheme intended to address them. + +### Issue 1: Persistent connections + +To subscribe to a node's event stream, a client needs a persistent connection +to the node. Unlike the other methods of the service, for which each call is +serviced by a short-lived HTTP round trip, subscription delivers a continuous +stream of events to the client by hijacking the HTTP channel for a websocket. +The stream (and hence the HTTP request) persists until either the subscription +is explicitly canceled, or the connection is closed. + +There are several problems with this API: + +1. **Expensive per-connection state**: The server must maintain a substantial + amount of state per subscriber client: + + - The current implementation uses a [WebSocket][ws] for each active + subscriber. The connection must be maintained even if there are no + matching events for a given client. + + The server can drop idle connections to save resources, but doing so + terminates all subscriptions on those connections and forces those clients + to re-connect, adding additional resource churn for the server. + + - In addition, the server maintains a separate buffer of undelivered events + for each client. This is to reduce the dual risks that a client will miss + events, and that a slow client could "push back" on the publisher, + impeding the progress of consensus. + + Because event traffic is quite bursty, queues can potentially take up a + lot of memory. Moreover, each subscriber may have a different filter + query, so the server winds up having to duplicate the same events among + multiple subscriber queues. Not only does this add memory pressure, but it + does so most at the worst possible time, i.e., when the server is already + under load from high event traffic. + +2. **Operational access control is difficult**: The server's websocket + interface exposes _all_ the RPC service endpoints, not only the subscription + methods. This includes methods that allow callers to inject arbitrary + transactions (`broadcast_tx_*`) and evidence (`broadcast_evidence`) into the + network, remove transactions (`remove_tx`), and request arbitrary amounts of + chain state. + + Filtering requests to the GET endpoint is straightforward: A reverse proxy + like [nginx][nginx] can easily filter methods by URL path. Filtering POST + requests takes a bit more work, but can be managed with a filter program + that speaks [FastCGI][fcgi] and parses JSON-RPC request bodies. + + Filtering the websocket interface requires a dedicated proxy implementation. + Although nginx can [reverse-proxy websockets][rp-ws], it does not support + filtering websocket traffic via FastCGI. The operator would need to either + implement a custom [nginx extension module][ng-xm] or build and run a + standalone proxy that implements websocket and filters each session. Apart + from the work, this also makes the system even more resource intensive, as + well as introducing yet another connection that could potentially time out + or stall on full buffers. + + Even for the simple case of restricting access to only event subscription, + there is no easy solution currently: Once a caller has access to the + websocket endpoint, it has complete access to the RPC service. + +### Issue 2: Inconvenient client API + +The subscription interface has some inconvenient features for the client as +well as the server. These include: + +1. **Non-standard protocol:** The RPC service is mostly [JSON-RPC 2.0][jsonrpc2], + but the subscription interface diverges from the standard. + + In a standard JSON-RPC 2.0 call, the client initiates a request to the + server with a unique ID, and the server concludes the call by sending a + reply for that ID. The `subscribe` implementation, however, sends multiple + responses to the client's request: + + - The client sends `subscribe` with some ID `x` and the desired query + + - The server responds with ID `x` and an empty confirmation response. + + - The server then (repeatedly) sends event result responses with ID `x`, one + for each item with a matching event. + + Standard JSON-RPC clients will reject the subsequent replies, as they + announce a request ID (`x`) that is already complete. This means a caller + has to implement Tendermint-specific handling for these responses. + + Moreover, the result format is different between the initial confirmation + and the subsequent responses. This means a caller has to implement special + logic for decoding the first response versus the subsequent ones. + +2. **No way to detect data loss:** The subscriber connection can be terminated + for many reasons. Even ignoring ordinary network issues (e.g., packet loss): + + - The server will drop messages and/or close the websocket if its write + buffer fills, or if the queue of undelivered matching events is not + drained fast enough. The client has no way to discover that messages were + dropped even if the connection remains open. + + - Either the client or the server may close the websocket if the websocket + PING and PONG exchanges are not handled correctly, or frequently enough. + Even if correctly implemented, this may fail if the system is under high + load and cannot service those control messages in a timely manner. + + When the connection is terminated, the server drops all the subscriptions + for that client (as if it had called `unsubscribe_all`). Even if the client + reconnects, any events that were published during the period between the + disconnect and re-connect and re-subscription will be silently lost, and the + client has no way to discover that it missed some relevant messages. + +3. **No way to replay old events:** Even if a client knew it had missed some + events (due to a disconnection, for example), the API provides no way for + the client to "play back" events it may have missed. + +4. **Large response sizes:** Some event data can be quite large, and there can + be substantial duplication across items. The API allows the client to select + _which_ events are reported, but has no way to control which parts of a + matching event it wishes to receive. + + This can be costly on the server (which has to marshal those data into + JSON), the network, and the client (which has to unmarshal the result and + then pick through for the components that are relevant to it). + + Besides being inefficient, this also contributes to some of the persistent + connection issues mentioned above, e.g., filling up the websocket write + buffer and forcing the server to queue potentially several copies of a large + value in memory. + +5. **Client identity is tied to network address:** The Tendermint event API + identifies each subscriber by a (Client ID, Query) pair. In the RPC service, + the query is provided by the client, but the client ID is set to the TCP + address of the client (typically "host:port" or "ip:port"). + + This means that even if the server did _not_ drop subscriptions immediately + when the websocket connection is closed, a client may not be able to + reattach to its existing subscription. Dialing a new connection is likely + to result in a different port (and, depending on their own proxy setup, + possibly a different public IP). + + In isolation, this problem would be easy to work around with a new + subscription parameter, but it would require several other changes to the + handling of event subscriptions for that workaround to become useful. + +--- +## Decision + +To address the described problems, we will: + +1. Introduce a new API for event subscription to the Tendermint RPC service. + The proposed API is described in [Detailed Design](#detailed-design) below. + +2. This new API will target the Tendermint v0.36 release, during which the + current ("streaming") API will remain available as-is, but deprecated. + +3. The streaming API will be entirely removed in release v0.37, which will + require all users of event subscription to switch to the new API. + +> **Point for discussion:** Given that ABCI++ and PBTS are the main priorities +> for v0.36, it would be fine to slip the first phase of this work to v0.37. +> Unless there is a time problem, however, the proposed design does not disrupt +> the work on ABCI++ or PBTS, and will not increase the scope of breaking +> changes. Therefore the plan is to begin in v0.36 and slip only if necessary. + +--- +## Detailed Design + +### Design Goals + +Specific goals of this design include: + +1. Remove the need for a persistent connection to each subscription client. + Subscribers should use the same HTTP request flow for event subscription + requests as for other RPC calls. + +2. The server retains minimal state (possibly none) per-subscriber. In + particular: + + - The server does not buffer unconsumed writes nor queue undelivered events + on a per-client basis. + - A client that stalls or goes idle does not cost the server any resources. + - Any event data that is buffered or stored is shared among _all_ + subscribers, and is not duplicated per client. + +3. Slow clients have no impact (or minimal impact) on the rate of progress of + the consensus algorithm, beyond the ambient overhead of servicing individual + RPC requests. + +4. Clients can tell when they have missed events matching their subscription, + within some reasonable (configurable) window of time, and can "replay" + events within that window to catch up. + +5. Nice to have: It should be easy to use the event subscription API from + existing standard tools and libraries, including command-line use for + testing and experimentation. + +### Definitions + +- The **event stream** of a node is a single, time-ordered, heterogeneous + stream of event items. + +- Each **event item** comprises an **event datum** (for example, block header + metadata for a new-block event), and zero or more optional **events**. + +- An **event** means the [ABCI `Event` data type][abci-event], which comprises + a string type and zero or more string key-value **event attributes**. + + The use of the new terms "event item" and "event datum" is to avert confusion + between the values that are published to the event bus (what we call here + "event items") and the ABCI `Event` data type. + +- The node assigns each event item a unique identifier string called a + **cursor**. A cursor must be unique among all events published by a single + node, but it is not required to be unique globally across nodes. + + Cursors are time-ordered so that given event items A and B, if A was + published before B, then cursor(A) < cursor(B) in lexicographic order. + + A minimum viable cursor implementation is a tuple consisting of a timestamp + and a sequence number (e.g., `16CCC798FB5F4670-0123`). However, it may also + be useful to append basic type information to a cursor, to allow efficient + filtering (e.g., `16CCC87E91869050-0091:BeginBlock`). + + The initial implementation will use the minimum viable format. + +### Discussion + +The node maintains an **event log**, a shared ordered record of the events +published to its event bus within an operator-configurable time window. The +initial implementation will store the event log in-memory, and the operator +will be given two per-node configuration settings. Note, these names are +provisional: + +- `[rpc] event-log-window-size`: A duration before the latest published event, + during which the node will retain event items published. Setting this value + to zero disables event subscription. + +- `[rpc] event-log-max-items`: A maximum number of event items that the node + will retain within the time window. If the number of items exceeds this + value, the node discardes the oldest items in the window. Setting this value + to zero means that no limit is imposed on the number of items. + +The node will retain all events within the time window, provided they do not +exceed the maximum number. These config parameters allow the operator to +loosely regulate how much memory and storage the node allocates to the event +log. The client can use the server reply to tell whether the events it wants +are still available from the event log. + +The event log is shared among all subscribers to the node. + +> **Discussion point:** Should events persist across node restarts? +> +> The current event API does not persist events across restarts, so this new +> design does not either. Note, however, that we may "spill" older event data +> to disk as a way of controlling memory use. Such usage is ephemeral, however, +> and does not need to be tracked as node data (e.g., it could be temp files). + +### Query API + +To retrieve event data, the client will call the (new) RPC method `events`. +The parameters of this method will correspond to the following Go types: + +```go +type EventParams struct { + // Optional filter spec. If nil or empty, all items are eligible. + Filter *Filter `json:"filter"` + + // The maximum number of eligible results to return. + // If zero or negative, the server will report a default number. + MaxResults int `json:"max_results"` + + // Return only items after this cursor. If empty, the limit is just + // before the the beginning of the event log. + After string `json:"after"` + + // Return only items before this cursor. If empty, the limit is just + // after the head of the event log. + Before string `json:"before"` + + // Wait for up to this long for events to be available. + WaitTime time.Duration `json:"wait_time"` +} + +type Filter struct { + Query string `json:"query"` +} +``` + +> **Discussion point:** The initial implementation will not cache filter +> queries for the client. If this turns out to be a performance issue in +> production, the service can keep a small shared cache of compiled queries. +> Given the improvements from #7319 et seq., this should not be necessary. + +> **Discussion point:** For the initial implementation, the new API will use +> the existing query language as-is. Future work may extend the Filter message +> with a more structured and/or expressive query surface, but that is beyond +> the scope of this design. + +The semantics of the request are as follows: An item in the event log is +**eligible** for a query if: + +- It is newer than the `after` cursor (if set). +- It is older than the `before` cursor (if set). +- It matches the filter (if set). + +Among the eligible items in the log, the server returns up to `max_results` of +the newest items, in reverse order of cursor. If `max_results` is unset the +server chooses a number to return, and will cap `max_results` at a sensible +limit. + +The `wait_time` parameter is used to effect polling. If `before` is empty and +no items are available, the server will wait for up to `wait_time` for matching +items to arrive at the head of the log. If `wait_time` is zero or negative, the +server will wait for a default (positive) interval. + +If `before` non-empty, `wait_time` is ignored: new results are only added to +the head of the log, so there is no need to wait. This allows the client to +poll for new data, and "page" backward through matching event items. This is +discussed in more detail below. + +The server will set a sensible cap on the maximum `wait_time`, overriding +client-requested intervals longer than that. + +A successful reply from the `events` request corresponds to the following Go +types: + +```go +type EventReply struct { + // The items matching the request parameters, from newest + // to oldest, if any were available within the timeout. + Items []*EventItem `json:"items"` + + // This is true if there is at least one older matching item + // available in the log that was not returned. + More bool `json:"more"` + + // The cursor of the oldest item in the log at the time of this reply, + // or "" if the log is empty. + Oldest string `json:"oldest"` + + // The cursor of the newest item in the log at the time of this reply, + // or "" if the log is empty. + Newest string `json:"newest"` +} + +type EventItem struct { + // The cursor of this item. + Cursor string `json:"cursor"` + + // The encoded event data for this item. + // The type identifies the structure of the value. + Data struct { + Type string `json:"type"` + Value json.RawMessage `json:"value"` + } `json:"data"` +} +``` + +The `oldest` and `newest` fields of the reply report the cursors of the oldest +and newest items (of any kind) recorded in the event log at the time of the +reply, or are `""` if the log is empty. + +The `data` field contains the type-specific event datum. The datum carries any +ABCI events that may have been defined. + +> **Discussion point**: Based on [issue #7273][i7273], I did not include a +> separate field in the response for the ABCI events, since it duplicates data +> already stored elsewhere in the event data. + +The semantics of the reply are as follows: + +- If `items` is non-empty: + + - Items are ordered from newest to oldest. + + - If `more` is true, there is at least one additional, older item in the + event log that was not returned (in excess of `max_results`). + + In this case the client can fetch the next page by setting `before` in a + new request, to the cursor of the oldest item fetched (i.e., the last one + in `items`). + + - Otherwise (if `more` is false), all the matching results have been + reported (pagination is complete). + + - The first element of `items` identifies the newest item considered. + Subsequent poll requests can set `after` to this cursor to skip items + that were already retrieved. + +- If `items` is empty: + + - If the `before` was set in the request, there are no further eligible + items for this query in the log (pagination is complete). + + This is just a safety case; the client can detect this without issuing + another call by consulting the `more` field of the previous reply. + + - If the `before` was empty in the request, no eligible items were + available before the `wait_time` expired. The client may poll again to + wait for more event items. + +A client can store cursor values to detect data loss and to recover from +crashes and connectivity issues: + +- After a crash, the client requests events after the newest cursor it has + seen. If the reply indicates that cursor is no longer in range, the client + may (conservatively) conclude some event data may have been lost. + +- On the other hand, if it _is_ in range, the client can then page back through + the results that it missed, and then resume polling. As long as its recovery + cursor does not age out before it finishes, the client can be sure it has all + the relevant results. + +### Other Notes + +- The new API supports two general "modes" of operation: + + 1. In ordinary operation, clients will **long-poll** the head of the event + log for new events matching their criteria (by setting a `wait_time` and + no `before`). + + 2. If there are more events than the client requested, or if the client needs + to to read older events to recover from a stall or crash, clients will + **page** backward through the event log (by setting `before` and `after`). + +- While the new API requires explicit polling by the client, it makes better + use of the node's existing HTTP infrastructure (e.g., connection pools). + Moreover, the direct implementation is easier to use from standard tools and + client libraries for HTTP and JSON-RPC. + + Explicit polling does shift the burden of timeliness to the client. That is + arguably preferable, however, given that the RPC service is ancillary to the + node's primary goal, viz., consensus. The details of polling can be easily + hidden from client applications with simple libraries. + +- The format of a cursor is considered opaque to the client. Clients must not + parse cursor values, but they may rely on their ordering properties. + +- To maintain the event log, the server must prune items outside the time + window and in excess of the item limit. + + The initial implementation will do this by checking the tail of the event log + after each new item is published. If the number of items in the log exceeds + the item limit, it will delete oldest items until the log is under the limit; + then discard any older than the time window before the latest. + + To minimize coordination interference between the publisher (the event bus) + and the subcribers (the `events` service handlers), the event log will be + stored as a persistent linear queue with shared structure (a cons list). A + single reader-writer mutex will guard the "head" of the queue where new + items are published: + + - **To publish a new item**, the publisher acquires the write lock, conses a + new item to the front of the existing queue, and replaces the head pointer + with the new item. + + - **To scan the queue**, a reader acquires the read lock, captures the head + pointer, and then releases the lock. The rest of its request can be served + without holding a lock, since the queue structure will not change. + + When a reader wants to wait, it will yield the lock and wait on a condition + that is signaled when the publisher swings the pointer. + + - **To prune the queue**, the publisher (who is the sole writer) will track + the queue length and the age of the oldest item separately. When the + length and or age exceed the configured bounds, it will construct a new + queue spine on the same items, discarding out-of-band values. + + Pruning can be done while the publisher already holds the write lock, or + could be done outside the lock entirely: Once the new queue is constructed, + the lock can be re-acquired to swing the pointer. This costs some extra + allocations for the cons cells, but avoids duplicating any event items. + The pruning step is a simple linear scan down the first (up to) max-items + elements of the queue, to find the breakpoint of age and length. + + Moreover, the publisher can amortize the cost of pruning by item count, if + necessary, by pruning length "more aggressively" than the configuration + requires (e.g., reducing to 3/4 of the maximum rather than 1/1). + + The state of the event log before the publisher acquires the lock: + ![Before publish and pruning](./img/adr-075-log-before.png) + + After the publisher has added a new item and pruned old ones: + ![After publish and pruning](./img/adr-075-log-after.png) + +### Migration Plan + +This design requires that clients eventually migrate to the new event +subscription API, but provides a full release cycle with both APIs in place to +make this burden more tractable. The migration strategy is broadly: + +**Phase 1**: Release v0.36. + +- Implement the new `events` endpoint, keeping the existing methods as they are. +- Update the Go clients to support the new `events` endpoint, and handle polling. +- Update the old endpoints to log annoyingly about their own deprecation. +- Write tutorials about how to migrate client usage. + +At or shortly after release, we should proactively update the Cosmos SDK to use +the new API, to remove a disincentive to upgrading. + +**Phase 2**: Release v0.37 + +- During development, we should actively seek out any existing users of the + streaming event subscription API and help them migrate. +- Possibly also: Spend some time writing clients for JS, Rust, et al. +- Release: Delete the old implementation and all the websocket support code. + +> **Discussion point**: Even though the plan is to keep the existing service, +> we might take the opportunity to restrict the websocket endpoint to _only_ +> the event streaming service, removing the other endpoints. To minimize the +> disruption for users in the v0.36 cycle, I have decided not to do this for +> the first phase. +> +> If we wind up pushing this design into v0.37, however, we should re-evaulate +> this partial turn-down of the websocket. + +### Future Work + +- This design does not immediately address the problem of allowing the client + to control which data are reported back for event items. That concern is + deferred to future work. However, it would be straightforward to extend the + filter and/or the request parameters to allow more control. + +- The node currently stores a subset of event data (specifically the block and + transaction events) for use in reindexing. While these data are redundant + with the event log described in this document, they are not sufficient to + cover event subscription, as they omit other event types. + + In the future we should investigate consolidating or removing event data from + the state store entirely. For now this issue is out of scope for purposes of + updating the RPC API. We may be able to piggyback on the database unification + plans (see [RFC 001][rfc001]) to store the event log separately, so its + pruning policy does not need to be tied to the block and state stores. + +- This design reuses the existing filter query language from the old API. In + the future we may want to use a more structured and/or expressive query. The + Filter object can be extended with more fields as needed to support this. + +- Some users have trouble communicating with the RPC service because of + configuration problems like improperly-set CORS policies. While this design + does not address those issues directly, we might want to revisit how we set + policies in the RPC service to make it less susceptible to confusing errors + caused by misconfiguration. + +--- +## Consequences + +- ✅ Reduces the number of transport options for RPC. Supports [RFC 002][rfc002]. +- ️✅ Removes the primary non-standard use of JSON-RPC. +- ⛔️ Forces clients to migrate to a different API (eventually). +- ↕️ API requires clients to poll, but this reduces client state on the server. +- ↕️ We have to maintain both implementations for a whole release, but this + gives clients time to migrate. + +--- +## Alternative Approaches + +The following alternative approaches were considered: + +1. **Leave it alone.** Since existing tools mostly already work with the API as + it stands today, we could leave it alone and do our best to improve its + performance and reliability. + + Based on many issues reported by users and node operators (e.g., + [#3380][i3380], [#6439][i6439], [#6729][i6729], [#7247][i7247]), the + problems described here affect even the existing use that works. Investing + further incremental effort in the existing API is unlikely to address these + issues. + +2. **Design a better streaming API.** Instead of polling, we might try to + design a better "streaming" API for event subscription. + + A significant advantage of switching away from streaming is to remove the + need for persistent connections between the node and subscribers. A new + streaming protocol design would lose that advantage, and would still need a + way to let clients recover and replay. + + This approach might look better if we decided to use a different protocol + for event subscription, say gRPC instead of JSON-RPC. That choice, however, + would be just as breaking for existing clients, for marginal benefit. + Moreover, this option increases both the complexity and the resource cost on + the node implementation. + + Given that resource consumption and complexity are important considerations, + this option was not chosen. + +3. **Defer to an external event broker.** We might remove the entire event + subscription infrastructure from the node, and define an optional interface + to allow the node to publish all its events to an external event broker, + such as Apache Kafka. + + This has the advantage of greatly simplifying the node, but at a great cost + to the node operator: To enable event subscription in this design, the + operator has to stand up and maintain a separate process in communion with + the node, and configuration changes would have to be coordinated across + both. + + Moreover, this approach would be highly disruptive to existing client use, + and migration would probably require switching to third-party libraries. + Despite the potential benefits for the node itself, the costs to operators + and clients seems too large for this to be the best option. + + Publishing to an external event broker might be a worthwhile future project, + if there is any demand for it. That decision is out of scope for this design, + as it interacts with the design of the indexer as well. + +--- +## References + +- [RFC 006: Event Subscription][rfc006] +- [Tendermint RPC service][rpc-service] +- [Event query grammar][query-grammar] +- [RFC 6455: The WebSocket protocol][ws] +- [JSON-RPC 2.0 Specification][jsonrpc2] +- [Nginx proxy server][nginx] + - [Proxying websockets][rp-ws] + - [Extension modules][ng-xm] +- [FastCGI][fcgi] +- [RFC 001: Storage Engines & Database Layer][rfc001] +- [RFC 002: Interprocess Communication in Tendermint][rfc002] +- Issues: + - [rpc/client: test that client resubscribes upon disconnect][i3380] (#3380) + - [Too high memory usage when creating many events subscriptions][i6439] (#6439) + - [Tendermint emits events faster than clients can pull them][i6729] (#6729) + - [indexer: unbuffered event subscription slow down the consensus][i7247] (#7247) + - [rpc: remove duplication of events when querying][i7273] (#7273) + +[rfc006]: https://github.com/tendermint/tendermint/blob/master/docs/rfc/rfc-006-event-subscription.md +[rpc-service]: https://github.com/tendermint/tendermint/blob/master/rpc/openapi/openapi.yaml +[query-grammar]: https://pkg.go.dev/github.com/tendermint/tendermint@master/internal/pubsub/query/syntax +[ws]: https://datatracker.ietf.org/doc/html/rfc6455 +[jsonrpc2]: https://www.jsonrpc.org/specification +[nginx]: https://nginx.org/en/docs/ +[fcgi]: http://www.mit.edu/~yandros/doc/specs/fcgi-spec.html +[rp-ws]: https://nginx.org/en/docs/http/websocket.html + +[ng-xm]: https://www.nginx.com/resources/wiki/extending/ +[abci-event]: https://pkg.go.dev/github.com/tendermint/tendermint/abci/types#Event +[rfc001]: https://github.com/tendermint/tendermint/blob/master/docs/rfc/rfc-001-storage-engine.rst +[rfc002]: https://github.com/tendermint/tendermint/blob/master/docs/rfc/rfc-002-ipc-ecosystem.md +[i3380]: https://github.com/tendermint/tendermint/issues/3380 +[i6439]: https://github.com/tendermint/tendermint/issues/6439 +[i6729]: https://github.com/tendermint/tendermint/issues/6729 +[i7247]: https://github.com/tendermint/tendermint/issues/7247 +[i7273]: https://github.com/tendermint/tendermint/issues/7273 diff --git a/docs/architecture/adr-076-combine-spec-repo.md b/docs/architecture/adr-076-combine-spec-repo.md new file mode 100644 index 000000000..a6365da5b --- /dev/null +++ b/docs/architecture/adr-076-combine-spec-repo.md @@ -0,0 +1,112 @@ +# ADR 076: Combine Spec and Tendermint Repositories + +## Changelog + +- 2022-02-04: Initial Draft. (@tychoish) + +## Status + +Accepted. + +## Context + +While the specification for Tendermint was originally in the same +repository as the Go implementation, at some point the specification +was split from the core repository and maintained separately from the +implementation. While this makes sense in promoting a conceptual +separation of specification and implementation, in practice this +separation was a premature optimization, apparently aimed at supporting +alternate implementations of Tendermint. + +The operational and documentary burden of maintaining a separate +spec repo has not returned value to justify its cost. There are no active +projects to develop alternate implementations of Tendermint based on the +common specification, and having separate repositories creates an ongoing +burden to coordinate versions, documentation, and releases. + +## Decision + +The specification repository will be merged back into the Tendermint +core repository. + +Stakeholders including representatives from the maintainers of the +spec, the Go implementation, and the Tendermint Rust library, agreed +to merge the repositories in the Tendermint core dev meeting on 27 +January 2022, including @williambanfield @cmwaters @creachadair and +@thanethomson. + +## Alternative Approaches + +The main alternative we considered was to keep separate repositories, +and to introduce a coordinated versioning scheme between the two, so +that users could figure out which spec versions go with which versions +of the core implementation. + +We decided against this on the grounds that it would further complicate +the release process for _both_ repositories, without mitigating any of +the other existing issues. + +## Detailed Design + +Clone and merge the master branch of the `tendermint/spec` repository +as a branch of the `tendermint/tendermint`, to ensure the commit history +of both repositories remains intact. + +### Implementation Instructions + +1. Within the `tendermint` repository, execute the following commands + to add a new branch with the history of the master branch of `spec`: + + ```bash + git remote add spec git@github.com:tendermint/spec.git + git fetch spec + git checkout -b spec-master spec/master + mkdir spec + git ls-tree -z --name-only HEAD | xargs -0 -I {} git mv {} subdir/ + git commit -m "spec: organize specification prior to merge" + git checkout -b spec-merge-mainline origin/master + git merge --allow-unrelated-histories spec-master + ``` + + This merges the spec into the `tendermint/tendermint` repository as + a normal branch. This commit can also be backported to the 0.35 + branch, if needed. + +2. Migrate outstanding issues from `tendermint/spec` to the + `tendermint/tendermint` repository. + +3. In the specification repository, add redirect to the README and mark + the repository as archived. + + +## Consequences + +### Positive + +Easier maintenance for the specification will obviate a number of +complicated and annoying versioning problems, and will help prevent the +possibility of the specification and the implementation drifting apart. + +Additionally, co-locating the specification will help encourage +cross-pollination and collaboration, between engineers focusing on the +specification and the protocol and engineers focusing on the implementation. + +### Negative + +Co-locating the spec and Go implementation has the potential effect of +prioritizing the Go implementation with regards to the spec, and +making it difficult to think about alternate implementations of the +Tendermint algorithm. Although we may want to foster additional +Tendermint implementations in the future, this isn't an active goal +in our current roadmap, and *not* merging these repos doesn't +change the fact that the Go implementation of Tendermint is already the +primary implementation. + +### Neutral + +N/A + +## References + +- https://github.com/tendermint/spec +- https://github.com/tendermint/tendermint diff --git a/docs/architecture/adr-077-block-retention.md b/docs/architecture/adr-077-block-retention.md new file mode 100644 index 000000000..714b4810a --- /dev/null +++ b/docs/architecture/adr-077-block-retention.md @@ -0,0 +1,109 @@ +# ADR 077: Configurable Block Retention + +## Changelog + +- 2020-03-23: Initial draft (@erikgrinaker) +- 2020-03-25: Use local config for snapshot interval (@erikgrinaker) +- 2020-03-31: Use ABCI commit response for block retention hint +- 2020-04-02: Resolved open questions +- 2021-02-11: Migrate to tendermint repo (Originally [RFC 001](https://github.com/tendermint/spec/pull/84)) + +## Author(s) + +- Erik Grinaker (@erikgrinaker) + +## Context + +Currently, all Tendermint nodes contain the complete sequence of blocks from genesis up to some height (typically the latest chain height). This will no longer be true when the following features are released: + +- [Block pruning](https://github.com/tendermint/tendermint/issues/3652): removes historical blocks and associated data (e.g. validator sets) up to some height, keeping only the most recent blocks. + +- [State sync](https://github.com/tendermint/tendermint/issues/828): bootstraps a new node by syncing state machine snapshots at a given height, but not historical blocks and associated data. + +To maintain the integrity of the chain, the use of these features must be coordinated such that necessary historical blocks will not become unavailable or lost forever. In particular: + +- Some nodes should have complete block histories, for auditability, querying, and bootstrapping. + +- The majority of nodes should retain blocks longer than the Cosmos SDK unbonding period, for light client verification. + +- Some nodes must take and serve state sync snapshots with snapshot intervals less than the block retention periods, to allow new nodes to state sync and then replay blocks to catch up. + +- Applications may not persist their state on commit, and require block replay on restart. + +- Only a minority of nodes can be state synced within the unbonding period, for light client verification and to serve block histories for catch-up. + +However, it is unclear if and how we should enforce this. It may not be possible to technically enforce all of these without knowing the state of the entire network, but it may also be unrealistic to expect this to be enforced entirely through social coordination. This is especially unfortunate since the consequences of misconfiguration can be permanent chain-wide data loss. + +## Proposal + +Add a new field `retain_height` to the ABCI `ResponseCommit` message: + +```proto +service ABCIApplication { + rpc Commit(RequestCommit) returns (ResponseCommit); +} + +message RequestCommit {} + +message ResponseCommit { + // reserve 1 + bytes data = 2; // the Merkle root hash + uint64 retain_height = 3; // the oldest block height to retain +} +``` + +Upon ABCI `Commit`, which finalizes execution of a block in the state machine, Tendermint removes all data for heights lower than `retain_height`. This allows the state machine to control block retention, which is preferable since only it can determine the significance of historical blocks. By default (i.e. with `retain_height=0`) all historical blocks are retained. + +Removed data includes not only blocks, but also headers, commit info, consensus params, validator sets, and so on. In the first iteration this will be done synchronously, since the number of heights removed for each run is assumed to be small (often 1) in the typical case. It can be made asynchronous at a later time if this is shown to be necessary. + +Since `retain_height` is dynamic, it is possible for it to refer to a height which has already been removed. For example, commit at height 100 may return `retain_height=90` while commit at height 101 may return `retain_height=80`. This is allowed, and will be ignored - it is the application's responsibility to return appropriate values. + +State sync will eventually support backfilling heights, via e.g. a snapshot metadata field `backfill_height`, but in the initial version it will have a fully truncated block history. + +## Cosmos SDK Example + +As an example, we'll consider how the Cosmos SDK might make use of this. The specific details should be discussed in a separate SDK proposal. + +The returned `retain_height` would be the lowest height that satisfies: + +- Unbonding time: the time interval in which validators can be economically punished for misbehavior. Blocks in this interval must be auditable e.g. by the light client. + +- IAVL snapshot interval: the block interval at which the underlying IAVL database is persisted to disk, e.g. every 10000 heights. Blocks since the last IAVL snapshot must be available for replay on application restart. + +- State sync snapshots: blocks since the _oldest_ available snapshot must be available for state sync nodes to catch up (oldest because a node may be restoring an old snapshot while a new snapshot was taken). + +- Local config: archive nodes may want to retain more or all blocks, e.g. via a local config option `min-retain-blocks`. There may also be a need to vary rentention for other nodes, e.g. sentry nodes which do not need historical blocks. + +![Cosmos SDK block retention diagram](img/block-retention.png) + +## Status + +Accepted + +## Consequences + +### Positive + +- Application-specified block retention allows the application to take all relevant factors into account and prevent necessary blocks from being accidentally removed. + +- Node operators can independently decide whether they want to provide complete block histories (if local configuration for this is provided) and snapshots. + +### Negative + +- Social coordination is required to run archival nodes, failure to do so may lead to permanent loss of historical blocks. + +- Social coordination is required to run snapshot nodes, failure to do so may lead to inability to run state sync, and inability to bootstrap new nodes at all if no archival nodes are online. + +### Neutral + +- Reduced block retention requires application changes, and cannot be controlled directly in Tendermint. + +- Application-specified block retention may set a lower bound on disk space requirements for all nodes. + +## References + +- State sync ADR: + +- State sync issue: + +- Block pruning issue: diff --git a/docs/architecture/adr-078-nonzero-genesis.md b/docs/architecture/adr-078-nonzero-genesis.md new file mode 100644 index 000000000..bd9c030f0 --- /dev/null +++ b/docs/architecture/adr-078-nonzero-genesis.md @@ -0,0 +1,82 @@ +# ADR 078: Non-Zero Genesis + +## Changelog + +- 2020-07-26: Initial draft (@erikgrinaker) +- 2020-07-28: Use weak chain linking, i.e. `predecessor` field (@erikgrinaker) +- 2020-07-31: Drop chain linking (@erikgrinaker) +- 2020-08-03: Add `State.InitialHeight` (@erikgrinaker) +- 2021-02-11: Migrate to tendermint repo (Originally [RFC 002](https://github.com/tendermint/spec/pull/119)) + +## Author(s) + +- Erik Grinaker (@erikgrinaker) + +## Context + +The recommended upgrade path for block protocol-breaking upgrades is currently to hard fork the +chain (see e.g. [`cosmoshub-3` upgrade](https://blog.cosmos.network/cosmos-hub-3-upgrade-announcement-39c9da941aee). +This is done by halting all validators at a predetermined height, exporting the application +state via application-specific tooling, and creating an entirely new chain using the exported +application state. + +As far as Tendermint is concerned, the upgraded chain is a completely separate chain, with e.g. +a new chain ID and genesis file. Notably, the new chain starts at height 1, and has none of the +old chain's block history. This causes problems for integrators, e.g. coin exchanges and +wallets, that assume a monotonically increasing height for a given blockchain. Users also find +it confusing that a given height can now refer to distinct states depending on the chain +version. + +An ideal solution would be to always retain block backwards compatibility in such a way that chain +history is never lost on upgrades. However, this may require a significant amount of engineering +work that is not viable for the planned Stargate release (Tendermint 0.34), and may prove too +restrictive for future development. + +As a first step, allowing the new chain to start from an initial height specified in the genesis +file would at least provide monotonically increasing heights. There was a proposal to include the +last block header of the previous chain as well, but since the genesis file is not verified and +hashed (only specific fields are) this would not be trustworthy. + +External tooling will be required to map historical heights onto e.g. archive nodes that contain +blocks from previous chain version. Tendermint will not include any such functionality. + +## Proposal + +Tendermint will allow chains to start from an arbitrary initial height: + +- A new field `initial_height` is added to the genesis file, defaulting to `1`. It can be set to any +non-negative integer, and `0` is considered equivalent to `1`. + +- A new field `InitialHeight` is added to the ABCI `RequestInitChain` message, with the same value +and semantics as the genesis field. + +- A new field `InitialHeight` is added to the `state.State` struct, where `0` is considered invalid. + Including the field here simplifies implementation, since the genesis value does not have to be + propagated throughout the code base separately, but it is not strictly necessary. + +ABCI applications may have to be updated to handle arbitrary initial heights, otherwise the initial +block may fail. + +## Status + +Accepted + +## Consequences + +### Positive + +- Heights can be unique throughout the history of a "logical" chain, across hard fork upgrades. + +### Negative + +- Upgrades still cause loss of block history. + +- Integrators will have to map height ranges to specific archive nodes/networks to query history. + +### Neutral + +- There is no explicit link to the last block of the previous chain. + +## References + +- [#2543: Allow genesis file to start from non-zero height w/ prev block header](https://github.com/tendermint/tendermint/issues/2543) diff --git a/docs/architecture/adr-079-ed25519-verification.md b/docs/architecture/adr-079-ed25519-verification.md new file mode 100644 index 000000000..c20869e6c --- /dev/null +++ b/docs/architecture/adr-079-ed25519-verification.md @@ -0,0 +1,57 @@ +# ADR 079: Ed25519 Verification + +## Changelog + +- 2020-08-21: Initial RFC +- 2021-02-11: Migrate RFC to tendermint repo (Originally [RFC 003](https://github.com/tendermint/spec/pull/144)) + +## Author(s) + +- Marko (@marbar3778) + +## Context + +Ed25519 keys are the only supported key types for Tendermint validators currently. Tendermint-Go wraps the ed25519 key implementation from the go standard library. As more clients are implemented to communicate with the canonical Tendermint implementation (Tendermint-Go) different implementations of ed25519 will be used. Due to [RFC 8032](https://www.rfc-editor.org/rfc/rfc8032.html) not guaranteeing implementation compatibility, Tendermint clients must to come to an agreement of how to guarantee implementation compatibility. [Zcash](https://z.cash/) has multiple implementations of their client and have identified this as a problem as well. The team at Zcash has made a proposal to address this issue, [Zcash improvement proposal 215](https://zips.z.cash/zip-0215). + +## Proposal + +- Tendermint-Go would adopt [hdevalence/ed25519consensus](https://github.com/hdevalence/ed25519consensus). + - This library is implements `ed25519.Verify()` in accordance to zip-215. Tendermint-go will continue to use `crypto/ed25519` for signing and key generation. + +- Tendermint-rs would adopt [ed25519-zebra](https://github.com/ZcashFoundation/ed25519-zebra) + - related [issue](https://github.com/informalsystems/tendermint-rs/issues/355) + +Signature verification is one of the major bottlenecks of Tendermint-go, batch verification can not be used unless it has the same consensus rules, ZIP 215 makes verification safe in consensus critical areas. + +This change constitutes a breaking changes, therefore must be done in a major release. No changes to validator keys or operations will be needed for this change to be enabled. + +This change has no impact on signature aggregation. To enable this signature aggregation Tendermint will have to use different signature schema (Schnorr, BLS, ...). Secondly, this change will enable safe batch verification for the Tendermint-Go client. Batch verification for the rust client is already supported in the library being used. + +As part of the acceptance of this proposal it would be best to contract or discuss with a third party the process of conducting a security review of the go library. + +## Status + +Proposed + +## Consequences + +### Positive + +- Consistent signature verification across implementations +- Enable safe batch verification + +### Negative + +#### Tendermint-Go + +- Third_party dependency + - library has not gone through a security review. + - unclear maintenance schedule +- Fragmentation of the ed25519 key for the go implementation, verification is done using a third party library while the rest + uses the go standard library + +### Neutral + +## References + +[It’s 255:19AM. Do you know what your validation criteria are?](https://hdevalence.ca/blog/2020-10-04-its-25519am) diff --git a/docs/architecture/adr-080-reverse-sync.md b/docs/architecture/adr-080-reverse-sync.md new file mode 100644 index 000000000..57d747fc8 --- /dev/null +++ b/docs/architecture/adr-080-reverse-sync.md @@ -0,0 +1,203 @@ +# ADR 080: ReverseSync - fetching historical data + +## Changelog + +- 2021-02-11: Migrate to tendermint repo (Originally [RFC 005](https://github.com/tendermint/spec/pull/224)) +- 2021-04-19: Use P2P to gossip necessary data for reverse sync. +- 2021-03-03: Simplify proposal to the state sync case. +- 2021-02-17: Add notes on asynchronicity of processes. +- 2020-12-10: Rename backfill blocks to reverse sync. +- 2020-11-25: Initial draft. + +## Author(s) + +- Callum Waters (@cmwaters) + +## Context + +Two new features: [Block pruning](https://github.com/tendermint/tendermint/issues/3652) +and [State sync](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-042-state-sync.md) +meant nodes no longer needed a complete history of the blockchain. This +introduced some challenges of its own which were covered and subsequently +tackled with [RFC-001](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-077-block-retention.md). +The RFC allowed applications to set a block retention height; an upper bound on +what blocks would be pruned. However nodes who state sync past this upper bound +(which is necessary as snapshots must be saved within the trusting period for +the assisting light client to verify) have no means of backfilling the blocks +to meet the retention limit. This could be a problem as nodes who state sync and +then eventually switch to consensus (or fast sync) may not have the block and +validator history to verify evidence causing them to panic if they see 2/3 +commit on what the node believes to be an invalid block. + +Thus, this RFC sets out to instil a minimum block history invariant amongst +honest nodes. + +## Proposal + +A backfill mechanism can simply be defined as an algorithm for fetching, +verifying and storing, headers and validator sets of a height prior to the +current base of the node's blockchain. In matching the terminology used for +other data retrieving protocols (i.e. fast sync and state sync), we +call this method **ReverseSync**. + +We will define the mechanism in four sections: + +- Usage +- Design +- Verification +- Termination + +### Usage + +For now, we focus purely on the case of a state syncing node, whom after +syncing to a height will need to verify historical data in order to be capable +of processing new blocks. We can denote the earliest height that the node will +need to verify and store in order to be able to verify any evidence that might +arise as the `max_historical_height`/`time`. Both height and time are necessary +as this maps to the BFT time used for evidence expiration. After acquiring +`State`, we calculate these parameters as: + +```go +max_historical_height = max(state.InitialHeight, state.LastBlockHeight - state.ConsensusParams.EvidenceAgeHeight) +max_historical_time = max(GenesisTime, state.LastBlockTime.Sub(state.ConsensusParams.EvidenceAgeTime)) +``` + +Before starting either fast sync or consensus, we then run the following +synchronous process: + +```go +func ReverseSync(max_historical_height int64, max_historical_time time.Time) error +``` + +Where we fetch and verify blocks until a block `A` where +`A.Height <= max_historical_height` and `A.Time <= max_historical_time`. + +Upon successfully reverse syncing, a node can now safely continue. As this +feature is only used as part of state sync, one can think of this as merely an +extension to it. + +In the future we may want to extend this functionality to allow nodes to fetch +historical blocks for reasons of accountability or data accessibility. + +### Design + +This section will provide a high level overview of some of the more important +characteristics of the design, saving the more tedious details as an ADR. + +#### P2P + +Implementation of this RFC will require the addition of a new channel and two +new messages. + +```proto +message LightBlockRequest { + uint64 height = 1; +} +``` + +```proto +message LightBlockResponse { + Header header = 1; + Commit commit = 2; + ValidatorSet validator_set = 3; +} +``` + +The P2P path may also enable P2P networked light clients and a state sync that +also doesn't need to rely on RPC. + +### Verification + +ReverseSync is used to fetch the following data structures: + +- `Header` +- `Commit` +- `ValidatorSet` + +Nodes will also need to be able to verify these. This can be achieved by first +retrieving the header at the base height from the block store. From this trusted +header, the node hashes each of the three data structures and checks that they are correct. + +1. The trusted header's last block ID matches the hash of the new header + + ```go + header[height].LastBlockID == hash(header[height-1]) + ``` + +2. The trusted header's last commit hash matches the hash of the new commit + + ```go + header[height].LastCommitHash == hash(commit[height-1]) + ``` + +3. Given that the node now trusts the new header, check that the header's validator set + hash matches the hash of the validator set + + ```go + header[height-1].ValidatorsHash == hash(validatorSet[height-1]) + ``` + +### Termination + +ReverseSync draws a lot of parallels with fast sync. An important consideration +for fast sync that also extends to ReverseSync is termination. ReverseSync will +finish it's task when one of the following conditions have been met: + +1. It reaches a block `A` where `A.Height <= max_historical_height` and +`A.Time <= max_historical_time`. +2. None of it's peers reports to have the block at the height below the +processes current block. +3. A global timeout. + +This implies that we can't guarantee adequate history and thus the term +"invariant" can't be used in the strictest sense. In the case that the first +condition isn't met, the node will log an error and optimistically attempt +to continue with either fast sync or consensus. + +## Alternative Solutions + +The need for a minimum block history invariant stems purely from the need to +validate evidence (although there may be some application relevant needs as +well). Because of this, an alternative, could be to simply trust whatever the +2/3+ majority has agreed upon and in the case where a node is at the head of the +blockchain, you simply abstain from voting. + +As it stands, if 2/3+ vote on evidence you can't verify, in the same manner if +2/3+ vote on a header that a node sees as invalid (perhaps due to a different +app hash), the node will halt. + +Another alternative is the method with which the relevant data is retrieved. +Instead of introducing new messages to the P2P layer, RPC could have been used +instead. + +The aforementioned data is already available via the following RPC endpoints: +`/commit` for `Header`'s' and `/validators` for `ValidatorSet`'s'. It was +decided predominantly due to the instability of the current RPC infrastructure +that P2P be used instead. + +## Status + +Proposed + +## Consequences + +### Positive + +- Ensures a minimum block history invariant for honest nodes. This will allow + nodes to verify evidence. + +### Negative + +- Statesync will be slower as more processing is required. + +### Neutral + +- By having validator sets served through p2p, this would make it easier to +extend p2p support to light clients and state sync. +- In the future, it may also be possible to extend this feature to allow for +nodes to freely fetch and verify prior blocks + +## References + +- [RFC-001: Block retention](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-077-block-retention.md) +- [Original issue](https://github.com/tendermint/tendermint/issues/4629) diff --git a/docs/architecture/adr-081-protobuf-mgmt.md b/docs/architecture/adr-081-protobuf-mgmt.md new file mode 100644 index 000000000..1199cff1b --- /dev/null +++ b/docs/architecture/adr-081-protobuf-mgmt.md @@ -0,0 +1,201 @@ +# ADR 081: Protocol Buffers Management + +## Changelog + +- 2022-02-28: First draft + +## Status + +Accepted + +[Tracking issue](https://github.com/tendermint/tendermint/issues/8121) + +## Context + +At present, we manage the [Protocol Buffers] schema files ("protos") that define +our wire-level data formats within the Tendermint repository itself (see the +[`proto`](../../proto/) directory). Recently, we have been making use of [Buf], +both locally and in CI, in order to generate Go stubs, and lint and check +`.proto` files for breaking changes. + +The version of Buf used at the time of this decision was `v1beta1`, and it was +discussed in [\#7975] and in weekly calls as to whether we should upgrade to +`v1` and harmonize our approach with that used by the Cosmos SDK. The team +managing the Cosmos SDK was primarily interested in having our protos versioned +and easily accessible from the [Buf] registry. + +The three main sets of stakeholders for the `.proto` files and their needs, as +currently understood, are as follows. + +1. Tendermint needs Go code generated from `.proto` files. +2. Consumers of Tendermint's `.proto` files, specifically projects that want to + interoperate with Tendermint and need to generate code for their own + programming language, want to be able to access these files in a reliable and + efficient way. +3. The Tendermint Core team wants to provide stable interfaces that are as easy + as possible to maintain, on which consumers can depend, and to be able to + notify those consumers promptly when those interfaces change. To this end, we + want to: + 1. Prevent any breaking changes from being introduced in minor/patch releases + of Tendermint. Only major version updates should be able to contain + breaking interface changes. + 2. Prevent generated code from diverging from the Protobuf schema files. + +There was also discussion surrounding the notion of automated documentation +generation and hosting, but it is not clear at this time whether this would be +that valuable to any of our stakeholders. What will, of course, be valuable at +minimum would be better documentation (in comments) of the `.proto` files +themselves. + +## Alternative Approaches + +### Meeting stakeholders' needs + +1. Go stub generation from protos. We could use: + 1. [Buf]. This approach has been rather cumbersome up to this point, and it + is not clear what Buf really provides beyond that which `protoc` provides + to justify the additional complexity in configuring Buf for stub + generation. + 2. [protoc] - the Protocol Buffers compiler. +2. Notification of breaking changes: + 1. Buf in CI for all pull requests to *release* branches only (and not on + `master`). + 2. Buf in CI on every pull request to every branch (this was the case at the + time of this decision, and the team decided that the signal-to-noise ratio + for this approach was too low to be of value). +3. `.proto` linting: + 1. Buf in CI on every pull request +4. `.proto` formatting: + 1. [clang-format] locally and a [clang-format GitHub Action] in CI to check + that files are formatted properly on every pull request. +5. Sharing of `.proto` files in a versioned, reliable manner: + 1. Consumers could simply clone the Tendermint repository, check out a + specific commit, tag or branch and manually copy out all of the `.proto` + files they need. This requires no effort from the Tendermint Core team and + will continue to be an option for consumers. The drawback of this approach + is that it requires manual coding/scripting to implement and is brittle in + the face of bigger changes. + 2. Uploading our `.proto` files to Buf's registry on every release. This is + by far the most seamless for consumers of our `.proto` files, but requires + the dependency on Buf. This has the additional benefit that the Buf + registry will automatically [generate and host + documentation][buf-docs-gen] for these protos. + 3. We could create a process that, upon release, creates a `.zip` file + containing our `.proto` files. + +### Popular alternatives to Buf + +[Prototool] was not considered as it appears deprecated, and the ecosystem seems +to be converging on Buf at this time. + +### Tooling complexity + +The more tools we have in our build/CI processes, the more complex and fragile +repository/CI management becomes, and the longer it takes to onboard new team +members. Maintainability is a core concern here. + +### Buf sustainability and costs + +One of the primary considerations regarding the usage of Buf is whether, for +example, access to its registry will eventually become a +paid-for/subscription-based service and whether this is valuable enough for us +and the ecosystem to pay for such a service. At this time, it appears as though +Buf will never charge for hosting open source projects' protos. + +Another consideration was Buf's sustainability as a project - what happens when +their resources run out? Will there be a strong and broad enough open source +community to continue maintaining it? + +### Local Buf usage options + +Local usage of Buf (i.e. not in CI) can be accomplished in two ways: + +1. Installing the relevant tools individually. +2. By way of its [Docker image][buf-docker]. + +Local installation of Buf requires developers to manually keep their toolchains +up-to-date. The Docker option comes with a number of complexities, including +how the file system permissions of code generated by a Docker container differ +between platforms (e.g. on Linux, Buf-generated code ends up being owned by +`root`). + +The trouble with the Docker-based approach is that we make use of the +[gogoprotobuf] plugin for `protoc`. Continuing to use the Docker-based approach +to using Buf will mean that we will have to continue building our own custom +Docker image with embedded gogoprotobuf. + +Along these lines, we could eventually consider coming up with a [Nix]- or +[redo]-based approach to developer tooling to ensure tooling consistency across +the team and for anyone who wants to be able to contribute to Tendermint. + +## Decision + +1. We will adopt Buf for now for proto generation, linting, breakage checking + and its registry (mainly in CI, with optional usage locally). +2. Failing CI when checking for breaking changes in `.proto` files will only + happen when performing minor/patch releases. +3. Local tooling will be favored over Docker-based tooling. + +## Detailed Design + +We currently aim to: + +1. Update to Buf `v1` to facilitate linting, breakage checking and uploading to + the Buf registry. +2. Configure CI appropriately for proto management: + 1. Uploading protos to the Buf registry on every release (e.g. the + [approach][cosmos-sdk-buf-registry-ci] used by the Cosmos SDK). + 2. Linting on every pull request (e.g. the + [approach][cosmos-sdk-buf-linting-ci] used by the Cosmos SDK). The linter + passing should be considered a requirement for accepting PRs. + 3. Checking for breaking changes in minor/patch version releases and failing + CI accordingly - see [\#8003]. + 4. Add [clang-format GitHub Action] to check `.proto` file formatting. Format + checking should be considered a requirement for accepting PRs. +3. Update the Tendermint [`Makefile`](../../Makefile) to primarily facilitate + local Protobuf stub generation, linting, formatting and breaking change + checking. More specifically: + 1. This includes removing the dependency on Docker and introducing the + dependency on local toolchain installation. CI-based equivalents, where + relevant, will rely on specific GitHub Actions instead of the Makefile. + 2. Go code generation will rely on `protoc` directly. + +## Consequences + +### Positive + +- We will still offer Go stub generation, proto linting and breakage checking. +- Breakage checking will only happen on minor/patch releases to increase the + signal-to-noise ratio in CI. +- Versioned protos will be made available via Buf's registry upon every release. + +### Negative + +- Developers/contributors will need to install the relevant Protocol + Buffers-related tooling (Buf, gogoprotobuf, clang-format) locally in order to + build, lint, format and check `.proto` files for breaking changes. + +### Neutral + +## References + +- [Protocol Buffers] +- [Buf] +- [\#7975] +- [protoc] - The Protocol Buffers compiler + +[Protocol Buffers]: https://developers.google.com/protocol-buffers +[Buf]: https://buf.build/ +[\#7975]: https://github.com/tendermint/tendermint/pull/7975 +[protoc]: https://github.com/protocolbuffers/protobuf +[clang-format]: https://clang.llvm.org/docs/ClangFormat.html +[clang-format GitHub Action]: https://github.com/marketplace/actions/clang-format-github-action +[buf-docker]: https://hub.docker.com/r/bufbuild/buf +[cosmos-sdk-buf-registry-ci]: https://github.com/cosmos/cosmos-sdk/blob/e6571906043b6751951a42b6546431b1c38b05bd/.github/workflows/proto-registry.yml +[cosmos-sdk-buf-linting-ci]: https://github.com/cosmos/cosmos-sdk/blob/e6571906043b6751951a42b6546431b1c38b05bd/.github/workflows/proto.yml#L15 +[\#8003]: https://github.com/tendermint/tendermint/issues/8003 +[Nix]: https://nixos.org/ +[gogoprotobuf]: https://github.com/gogo/protobuf +[Prototool]: https://github.com/uber/prototool +[buf-docs-gen]: https://docs.buf.build/bsr/documentation +[redo]: https://redo.readthedocs.io/en/latest/ diff --git a/docs/architecture/adr-template.md b/docs/architecture/adr-template.md new file mode 100644 index 000000000..27225fd70 --- /dev/null +++ b/docs/architecture/adr-template.md @@ -0,0 +1,101 @@ +# ADR {ADR-NUMBER}: {TITLE} + +## Changelog + +- {date}: {changelog} + +## Status + +> An architecture decision is considered "proposed" when a PR containing the ADR +> is submitted. When merged, an ADR must have a status associated with it, which +> must be one of: "Accepted", "Rejected", "Deprecated" or "Superseded". +> +> An accepted ADR's implementation status must be tracked via a tracking issue, +> milestone or project board (only one of these is necessary). For example: +> +> Accepted +> +> [Tracking issue](https://github.com/tendermint/tendermint/issues/123) +> [Milestone](https://github.com/tendermint/tendermint/milestones/123) +> [Project board](https://github.com/orgs/tendermint/projects/123) +> +> Rejected ADRs are captured as a record of recommendations that we specifically +> do not (and possibly never) want to implement. The ADR itself must, for +> posterity, include reasoning as to why it was rejected. +> +> If an ADR is deprecated, simply write "Deprecated" in this section. If an ADR +> is superseded by one or more other ADRs, provide local a reference to those +> ADRs, e.g.: +> +> Superseded by [ADR 123](./adr-123.md) + +Accepted | Rejected | Deprecated | Superseded by + +## Context + +> This section contains all the context one needs to understand the current state, +> and why there is a problem. It should be as succinct as possible and introduce +> the high level idea behind the solution. + +## Alternative Approaches + +> This section contains information around alternative options that are considered +> before making a decision. It should contain a explanation on why the alternative +> approach(es) were not chosen. + +## Decision + +> This section records the decision that was made. +> It is best to record as much info as possible from the discussion that happened. +> This aids in not having to go back to the Pull Request to get the needed information. + +## Detailed Design + +> This section does not need to be filled in at the start of the ADR, but must +> be completed prior to the merging of the implementation. +> +> Here are some common questions that get answered as part of the detailed design: +> +> - What are the user requirements? +> +> - What systems will be affected? +> +> - What new data structures are needed, what data structures will be changed? +> +> - What new APIs will be needed, what APIs will be changed? +> +> - What are the efficiency considerations (time/space)? +> +> - What are the expected access patterns (load/throughput)? +> +> - Are there any logging, monitoring or observability needs? +> +> - Are there any security considerations? +> +> - Are there any privacy considerations? +> +> - How will the changes be tested? +> +> - If the change is large, how will the changes be broken up for ease of review? +> +> - Will these changes require a breaking (major) release? +> +> - Does this change require coordination with the SDK or other? + +## Consequences + +> This section describes the consequences, after applying the decision. All +> consequences should be summarized here, not just the "positive" ones. + +### Positive + +### Negative + +### Neutral + +## References + +> Are there any relevant PR comments, issues that led up to this, or articles +> referenced for why we made the given design choice? If so link them here! + +- {reference link} diff --git a/docs/architecture/img/adr-046-fig1.png b/docs/architecture/img/adr-046-fig1.png new file mode 100644 index 000000000..d68712e8a Binary files /dev/null and b/docs/architecture/img/adr-046-fig1.png differ diff --git a/docs/architecture/img/adr-062-architecture.svg b/docs/architecture/img/adr-062-architecture.svg new file mode 100644 index 000000000..4a824eee0 --- /dev/null +++ b/docs/architecture/img/adr-062-architecture.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/architecture/img/adr-075-log-after.png b/docs/architecture/img/adr-075-log-after.png new file mode 100644 index 000000000..359f205e4 Binary files /dev/null and b/docs/architecture/img/adr-075-log-after.png differ diff --git a/docs/architecture/img/adr-075-log-before.png b/docs/architecture/img/adr-075-log-before.png new file mode 100644 index 000000000..813b9d257 Binary files /dev/null and b/docs/architecture/img/adr-075-log-before.png differ diff --git a/docs/architecture/img/bc-reactor-refactor.png b/docs/architecture/img/bc-reactor-refactor.png new file mode 100644 index 000000000..4cd84a02f Binary files /dev/null and b/docs/architecture/img/bc-reactor-refactor.png differ diff --git a/docs/architecture/img/bc-reactor.png b/docs/architecture/img/bc-reactor.png new file mode 100644 index 000000000..f7fe0f819 Binary files /dev/null and b/docs/architecture/img/bc-reactor.png differ diff --git a/docs/architecture/img/block-retention.png b/docs/architecture/img/block-retention.png new file mode 100644 index 000000000..e013e1ab3 Binary files /dev/null and b/docs/architecture/img/block-retention.png differ diff --git a/docs/architecture/img/blockchain-reactor-v1.png b/docs/architecture/img/blockchain-reactor-v1.png new file mode 100644 index 000000000..70debcd66 Binary files /dev/null and b/docs/architecture/img/blockchain-reactor-v1.png differ diff --git a/docs/architecture/img/blockchain-reactor-v2.png b/docs/architecture/img/blockchain-reactor-v2.png new file mode 100644 index 000000000..5f15333a0 Binary files /dev/null and b/docs/architecture/img/blockchain-reactor-v2.png differ diff --git a/docs/architecture/img/blockchain-v2-channels.png b/docs/architecture/img/blockchain-v2-channels.png new file mode 100644 index 000000000..69886da95 Binary files /dev/null and b/docs/architecture/img/blockchain-v2-channels.png differ diff --git a/docs/architecture/img/consensus_blockchain.png b/docs/architecture/img/consensus_blockchain.png new file mode 100644 index 000000000..dd0f4daa8 Binary files /dev/null and b/docs/architecture/img/consensus_blockchain.png differ diff --git a/docs/architecture/img/formula1.png b/docs/architecture/img/formula1.png new file mode 100644 index 000000000..447ee30f5 Binary files /dev/null and b/docs/architecture/img/formula1.png differ diff --git a/docs/architecture/img/formula2.png b/docs/architecture/img/formula2.png new file mode 100644 index 000000000..081a15769 Binary files /dev/null and b/docs/architecture/img/formula2.png differ diff --git a/docs/architecture/img/mempool-v0.jpeg b/docs/architecture/img/mempool-v0.jpeg new file mode 100644 index 000000000..dc3da6e77 Binary files /dev/null and b/docs/architecture/img/mempool-v0.jpeg differ diff --git a/docs/architecture/img/pbts-message.png b/docs/architecture/img/pbts-message.png new file mode 100644 index 000000000..400f35690 Binary files /dev/null and b/docs/architecture/img/pbts-message.png differ diff --git a/docs/architecture/img/state-sync.png b/docs/architecture/img/state-sync.png new file mode 100644 index 000000000..08b6eac43 Binary files /dev/null and b/docs/architecture/img/state-sync.png differ diff --git a/docs/architecture/img/tags1.png b/docs/architecture/img/tags1.png new file mode 100644 index 000000000..a6bc64e81 Binary files /dev/null and b/docs/architecture/img/tags1.png differ diff --git a/docs/introduction/what-is-tendermint.md b/docs/introduction/what-is-tendermint.md index 9cff48a2a..88dc2c1fa 100644 --- a/docs/introduction/what-is-tendermint.md +++ b/docs/introduction/what-is-tendermint.md @@ -67,7 +67,7 @@ configuration, service discovery, locking, leader-election, and so on. Tendermint is in essence similar software, but with two key differences: - It is Byzantine Fault Tolerant, meaning it can only tolerate up to a - 1/3 of failures, but those failures can include arbitrary behaviour - + 1/3 of failures, but those failures can include arbitrary behavior - including hacking and malicious attacks. - It does not specify a particular application, like a fancy key-value store. Instead, it focuses on arbitrary state machine replication, so developers can build @@ -101,13 +101,13 @@ Another example of a cryptocurrency application built on Tendermint is [Fabric](https://github.com/hyperledger/fabric) takes a similar approach to Tendermint, but is more opinionated about how the state is managed, -and requires that all application behaviour runs in potentially many +and requires that all application behavior runs in potentially many docker containers, modules it calls "chaincode". It uses an implementation of [PBFT](http://pmg.csail.mit.edu/papers/osdi99.pdf). from a team at IBM that is [augmented to handle potentially non-deterministic chaincode](https://www.zurich.ibm.com/~cca/papers/sieve.pdf) It is -possible to implement this docker-based behaviour as a ABCI app in +possible to implement this docker-based behavior as a ABCI app in Tendermint, though extending Tendermint to handle non-determinism remains for future work. diff --git a/docs/package-lock.json b/docs/package-lock.json index 8bbdae8cc..37dfc40e7 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -8876,9 +8876,9 @@ } }, "node_modules/minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" }, "node_modules/mississippi": { "version": "3.0.0", @@ -13045,9 +13045,9 @@ } }, "node_modules/url-parse": { - "version": "1.5.7", - "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.7.tgz", - "integrity": "sha512-HxWkieX+STA38EDk7CE9MEryFeHCKzgagxlGvsdS7WBImq9Mk+PGwiT56w82WI3aicwJA8REp42Cxo98c8FZMA==", + "version": "1.5.10", + "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz", + "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==", "dependencies": { "querystringify": "^2.1.1", "requires-port": "^1.0.0" @@ -21113,9 +21113,9 @@ } }, "minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" }, "mississippi": { "version": "3.0.0", @@ -24536,9 +24536,9 @@ } }, "url-parse": { - "version": "1.5.7", - "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.7.tgz", - "integrity": "sha512-HxWkieX+STA38EDk7CE9MEryFeHCKzgagxlGvsdS7WBImq9Mk+PGwiT56w82WI3aicwJA8REp42Cxo98c8FZMA==", + "version": "1.5.10", + "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz", + "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==", "requires": { "querystringify": "^2.1.1", "requires-port": "^1.0.0" diff --git a/docs/presubmit.sh b/docs/presubmit.sh new file mode 100755 index 000000000..0dec7d32a --- /dev/null +++ b/docs/presubmit.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# This script verifies that each document in the docs and architecture +# directory has a corresponding table-of-contents entry in its README file. +# +# This can be run manually from the command line. +# It is also run in CI via the docs-toc.yml workflow. +# +set -euo pipefail + +readonly base="$(dirname $0)" +cd "$base" + +readonly workdir="$(mktemp -d)" +trap "rm -fr -- '$workdir'" EXIT + +checktoc() { + local dir="$1" + local tag="$2"'-*-*' + local out="$workdir/${dir}.out.txt" + ( + cd "$dir" >/dev/null + find . -maxdepth 1 -type f -name "$tag" -not -exec grep -q "({})" README.md ';' -print + ) > "$out" + if [[ -s "$out" ]] ; then + echo "-- The following files in $dir lack a ToC entry: +" + cat "$out" + return 1 + fi +} + +err=0 + +# Verify that each RFC and ADR has a ToC entry in its README file. +checktoc architecture adr || ((err++)) +checktoc rfc rfc || ((err++)) + +exit $err diff --git a/docs/rfc/README.md b/docs/rfc/README.md new file mode 100644 index 000000000..346318c7b --- /dev/null +++ b/docs/rfc/README.md @@ -0,0 +1,64 @@ +--- +order: 1 +parent: + order: false +--- + +# Requests for Comments + +A Request for Comments (RFC) is a record of discussion on an open-ended topic +related to the design and implementation of Tendermint Core, for which no +immediate decision is required. + +The purpose of an RFC is to serve as a historical record of a high-level +discussion that might otherwise only be recorded in an ad hoc way (for example, +via gists or Google docs) that are difficult to discover for someone after the +fact. An RFC _may_ give rise to more specific architectural _decisions_ for +Tendermint, but those decisions must be recorded separately in [Architecture +Decision Records (ADR)](./../architecture). + +As a rule of thumb, if you can articulate a specific question that needs to be +answered, write an ADR. If you need to explore the topic and get input from +others to know what questions need to be answered, an RFC may be appropriate. + +## RFC Content + +An RFC should provide: + +- A **changelog**, documenting when and how the RFC has changed. +- An **abstract**, briefly summarizing the topic so the reader can quickly tell + whether it is relevant to their interest. +- Any **background** a reader will need to understand and participate in the + substance of the discussion (links to other documents are fine here). +- The **discussion**, the primary content of the document. + +The [rfc-template.md](./rfc-template.md) file includes placeholders for these +sections. + +## Table of Contents + +- [RFC-000: P2P Roadmap](./rfc-000-p2p-roadmap.rst) +- [RFC-001: Storage Engines](./rfc-001-storage-engine.rst) +- [RFC-002: Interprocess Communication](./rfc-002-ipc-ecosystem.md) +- [RFC-003: Performance Taxonomy](./rfc-003-performance-questions.md) +- [RFC-004: E2E Test Framework Enhancements](./rfc-004-e2e-framework.rst) +- [RFC-005: Event System](./rfc-005-event-system.rst) +- [RFC-006: Event Subscription](./rfc-006-event-subscription.md) +- [RFC-007: Deterministic Proto Byte Serialization](./rfc-007-deterministic-proto-bytes.md) +- [RFC-008: Don't Panic](./rfc-008-do-not-panic.md) +- [RFC-009: Consensus Parameter Upgrades](./rfc-009-consensus-parameter-upgrades.md) +- [RFC-010: P2P Light Client](./rfc-010-p2p-light-client.rst) +- [RFC-011: Delete Gas](./rfc-011-delete-gas.md) +- [RFC-012: Event Indexing Revisited](./rfc-012-custom-indexing.md) +- [RFC-013: ABCI++](./rfc-013-abci++.md) +- [RFC-014: Semantic Versioning](./rfc-014-semantic-versioning.md) +- [RFC-015: ABCI++ Tx Mutation](./rfc-015-abci++-tx-mutation.md) +- [RFC-016: Node Architecture](./rfc-016-node-architecture.md) +- [RFC-017: ABCI++ Vote Extension Propagation](./rfc-017-abci++-vote-extension-propag.md) +- [RFC-018: BLS Signature Aggregation Exploration](./rfc-018-bls-agg-exploration.md) +- [RFC-019: Configuration File Versioning](./rfc-019-config-version.md) +- [RFC-020: Onboarding Projects](./rfc-020-onboarding-projects.rst) +- [RFC-021: The Future of the Socket Protocol](./rfc-021-socket-protocol.md) +- [RFC-023: Semi-permanent Testnet](./rfc-023-semi-permanent-testnet.md) + + diff --git a/docs/rfc/images/abci++.png b/docs/rfc/images/abci++.png new file mode 100644 index 000000000..d5146f995 Binary files /dev/null and b/docs/rfc/images/abci++.png differ diff --git a/docs/rfc/images/abci.png b/docs/rfc/images/abci.png new file mode 100644 index 000000000..10039ab5c Binary files /dev/null and b/docs/rfc/images/abci.png differ diff --git a/docs/rfc/images/node-dependency-tree.svg b/docs/rfc/images/node-dependency-tree.svg new file mode 100644 index 000000000..6d95e0e15 --- /dev/null +++ b/docs/rfc/images/node-dependency-tree.svg @@ -0,0 +1,3 @@ + + +
Node
Node
Statesync
Statesync
Blocksync
Blocksync
Consensus
Consensus
Mempool
Mempool
Evidence
Evidence
Block Executor
Block Executor
Blockchain
Blockchain
Evidence
Evidence
PEX
PEX
Peer Store
Peer Store
Peer Networking
Peer Networking
RPC External
RPC External
ABCI Layer
ABCI Layer
Events System
Events System
RPC Internal
RPC Internal
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/rfc/rfc-000-p2p-roadmap.rst b/docs/rfc/rfc-000-p2p-roadmap.rst new file mode 100644 index 000000000..dc9b54c7f --- /dev/null +++ b/docs/rfc/rfc-000-p2p-roadmap.rst @@ -0,0 +1,316 @@ +==================== +RFC 000: P2P Roadmap +==================== + +Changelog +--------- + +- 2021-08-20: Completed initial draft and distributed via a gist +- 2021-08-25: Migrated as an RFC and changed format + +Abstract +-------- + +This document discusses the future of peer network management in Tendermint, with +a particular focus on features, semantics, and a proposed roadmap. +Specifically, we consider libp2p as a tool kit for implementing some fundamentals. + +Background +---------- + +For the 0.35 release cycle the switching/routing layer of Tendermint was +replaced. This work was done "in place," and produced a version of Tendermint +that was backward-compatible and interoperable with previous versions of the +software. While there are new p2p/peer management constructs in the new +version (e.g. ``PeerManager`` and ``Router``), the main effect of this change +was to simplify the ways that other components within Tendermint interacted with +the peer management layer, and to make it possible for higher-level components +(specifically the reactors), to be used and tested more independently. + +This refactoring, which was a major undertaking, was entirely necessary to +enable areas for future development and iteration on this aspect of +Tendermint. There are also a number of potential user-facing features that +depend heavily on the p2p layer: additional transport protocols, transport +compression, improved resilience to network partitions. These improvements to +modularity, stability, and reliability of the p2p system will also make +ongoing maintenance and feature development easier in the rest of Tendermint. + +Critique of Current Peer-to-Peer Infrastructure +--------------------------------------- + +The current (refactored) P2P stack is an improvement on the previous iteration +(legacy), but as of 0.35, there remains room for improvement in the design and +implementation of the P2P layer. + +Some limitations of the current stack include: + +- heavy reliance on buffering to avoid backups in the flow of components, + which is fragile to maintain and can lead to unexpected memory usage + patterns and forces the routing layer to make decisions about when messages + should be discarded. + +- the current p2p stack relies on convention (rather than the compiler) to + enforce the API boundaries and conventions between reactors and the router, + making it very easy to write "wrong" reactor code or introduce a bad + dependency. + +- the current stack is probably more complex and difficult to maintain because + the legacy system must coexist with the new components in 0.35. When the + legacy stack is removed there are some simple changes that will become + possible and could reduce the complexity of the new system. (e.g. `#6598 + `_.) + +- the current stack encapsulates a lot of information about peers, and makes it + difficult to expose that information to monitoring/observability tools. This + general opacity also makes it difficult to interact with the peer system + from other areas of the code base (e.g. tests, reactors). + +- the legacy stack provided some control to operators to force the system to + dial new peers or seed nodes or manipulate the topology of the system _in + situ_. The current stack can't easily provide this, and while the new stack + may have better behavior, it does leave operators hands tied. + +Some of these issues will be resolved early in the 0.36 cycle, with the +removal of the legacy components. + +The 0.36 release also provides the opportunity to make changes to the +protocol, as the release will not be compatible with previous releases. + +Areas for Development +--------------------- + +These sections describe features that may make sense to include in a Phase 2 of +a P2P project. + +Internal Message Passing +~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, there's no provision for intranode communication using the P2P +layer, which means when two reactors need to interact with each other they +have to have dependencies on each other's interfaces, and +initialization. Changing these interactions (e.g. transitions between +blocksync and consensus) from procedure calls to message passing. + +This is a relatively simple change and could be implemented with the following +components: + +- a constant to represent "local" delivery as the ``To`` field on + ``p2p.Envelope``. + +- special path for routing local messages that doesn't require message + serialization (protobuf marshalling/unmarshaling). + +Adding these semantics, particularly if in conjunction with synchronous +semantics provides a solution to dependency graph problems currently present +in the Tendermint codebase, which will simplify development, make it possible +to isolate components for testing. + +Eventually, this will also make it possible to have a logical Tendermint node +running in multiple processes or in a collection of containers, although the +usecase of this may be debatable. + +Synchronous Semantics (Paired Request/Response) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the current system, all messages are sent with fire-and-forget semantics, +and there's no coupling between a request sent via the p2p layer, and a +response. These kinds of semantics would simplify the implementation of +state and block sync reactors, and make intra-node message passing more +powerful. + +For some interactions, like gossiping transactions between the mempools of +different nodes, fire-and-forget semantics make sense, but for other +operations the missing link between requests/responses leads to either +inefficiency when a node fails to respond or becomes unavailable, or code that +is just difficult to follow. + +To support this kind of work, the protocol would need to accommodate some kind +of request/response ID to allow identifying out-of-order responses over a +single connection. Additionally, expanded the programming model of the +``p2p.Channel`` to accommodate some kind of _future_ or similar paradigm to +make it viable to write reactor code without needing for the reactor developer +to wrestle with lower level concurrency constructs. + + +Timeout Handling (QoS) +~~~~~~~~~~~~~~~~~~~~~~ + +Currently, all timeouts, buffering, and QoS features are handled at the router +layer, and the reactors are implemented in ways that assume/require +asynchronous operation. This both increases the required complexity at the +routing layer, and means that misbehavior at the reactor level is difficult to +detect or attribute. Additionally, the current system provides three main +parameters to control quality of service: + +- buffer sizes for channels and queues. + +- priorities for channels + +- queue implementation details for shedding load. + +These end up being quite coarse controls, and changing the settings are +difficult because as the queues and channels are able to buffer large numbers +of messages it can be hard to see the impact of a given change, particularly +in our extant test environment. In general, we should endeavor to: + +- set real timeouts, via contexts, on most message send operations, so that + senders rather than queues can be responsible for timeout + logic. Additionally, this will make it possible to avoid sending messages + during shutdown. + +- reduce (to the greatest extent possible) the amount of buffering in + channels and the queues, to more readily surface backpressure and reduce the + potential for buildup of stale messages. + +Stream Based Connection Handling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently the transport layer is message based, which makes sense from a +mental model of how the protocol works, but makes it more difficult to +implement transports and connection types, as it forces a higher level view of +the connection and interaction which makes it harder to implement for novel +transport types and makes it more likely that message-based caching and rate +limiting will be implemented at the transport layer rather than at a more +appropriate level. + +The transport then, would be responsible for negotiating the connection and the +handshake and otherwise behave like a socket/file descriptor with ``Read`` and +``Write`` methods. + +While this was included in the initial design for the new P2P layer, it may be +obviated entirely if the transport and peer layer is replaced with libp2p, +which is primarily stream based. + +Service Discovery +~~~~~~~~~~~~~~~~~ + +In the current system, Tendermint assumes that all nodes in a network are +largely equivalent, and nodes tend to be "chatty" making many requests of +large numbers of peers and waiting for peers to (hopefully) respond. While +this works and has allowed Tendermint to get to a certain point, this both +produces a theoretical scaling bottle neck and makes it harder to test and +verify components of the system. + +In addition to peer's identity and connection information, peers should be +able to advertise a number of services or capabilities, and node operators or +developers should be able to specify peer capability requirements (e.g. target +at least -percent of peers with capability.) + +These capabilities may be useful in selecting peers to send messages to, it +may make sense to extend Tendermint's message addressing capability to allow +reactors to send messages to groups of peers based on role rather than only +allowing addressing to one or all peers. + +Having a good service discovery mechanism may pair well with the synchronous +semantics (request/response) work, as it allows reactors to "make a request of +a peer with capability and wait for the response," rather force the +reactors to need to track the capabilities or state of specific peers. + +Solutions +--------- + +Continued Homegrown Implementation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The current peer system is homegrown and is conceptually compatible with the +needs of the project, and while there are limitations to the system, the p2p +layer is not (currently as of 0.35) a major source of bugs or friction during +development. + +However, the current implementation makes a number of allowances for +interoperability, and there are a collection of iterative improvements that +should be considered in the next couple of releases. To maintain the current +implementation, upcoming work would include: + +- change the ``Transport`` mechanism to facilitate easier implementations. + +- implement different ``Transport`` handlers to be able to manage peer + connections using different protocols (e.g. QUIC, etc.) + +- entirely remove the constructs and implementations of the legacy peer + implementation. + +- establish and enforce clearer chains of responsibility for connection + establishment (e.g. handshaking, setup,) which is currently shared between + three components. + +- report better metrics regarding the into the state of peers and network + connectivity, which are opaque outside of the system. This is constrained at + the moment as a side effect of the split responsibility for connection + establishment. + +- extend the PEX system to include service information so that nodes in the + network weren't necessarily homogeneous. + +While maintaining a bespoke peer management layer would seem to distract from +development of core functionality, the truth is that (once the legacy code is +removed,) the scope of the peer layer is relatively small from a maintenance +perspective, and having control at this layer might actually afford the +project with the ability to more rapidly iterate on some features. + +LibP2P +~~~~~~ + +LibP2P provides components that, approximately, account for the +``PeerManager`` and ``Transport`` components of the current (new) P2P +stack. The Go APIs seem reasonable, and being able to externalize the +implementation details of peer and connection management seems like it could +provide a lot of benefits, particularly in supporting a more active ecosystem. + +In general the API provides the kind of stream-based, multi-protocol +supporting, and idiomatic baseline for implementing a peer layer. Additionally +because it handles peer exchange and connection management at a lower +level, by using libp2p it'd be possible to remove a good deal of code in favor +of just using libp2p. Having said that, Tendermint's P2P layer covers a +greater scope (e.g. message routing to different peers) and that layer is +something that Tendermint might want to retain. + +The are a number of unknowns that require more research including how much of +a peer database the Tendermint engine itself needs to maintain, in order to +support higher level operations (consensus, statesync), but it might be the +case that our internal systems need to know much less about peers than +otherwise specified. Similarly, the current system has a notion of peer +scoring that cannot be communicated to libp2p, which may be fine as this is +only used to support peer exchange (PEX,) which would become a property libp2p +and not expressed in it's current higher-level form. + +In general, the effort to switch to libp2p would involve: + +- timing it during an appropriate protocol-breaking window, as it doesn't seem + viable to support both libp2p *and* the current p2p protocol. + +- providing some in-memory testing network to support the use case that the + current ``p2p.MemoryNetwork`` provides. + +- re-homing the ``p2p.Router`` implementation on top of libp2p components to + be able to maintain the current reactor implementations. + +Open question include: + +- how much local buffering should we be doing? It sort of seems like we should + figure out what the expected behavior is for libp2p for QoS-type + functionality, and if our requirements mean that we should be implementing + this on top of things ourselves? + +- if Tendermint was going to use libp2p, how would libp2p's stability + guarantees (protocol, etc.) impact/constrain Tendermint's stability + guarantees? + +- what kind of introspection does libp2p provide, and to what extend would + this change or constrain the kind of observability that Tendermint is able + to provide? + +- how do efforts to select "the best" (healthy, close, well-behaving, etc.) + peers work out if Tendermint is not maintaining a local peer database? + +- would adding additional higher level semantics (internal message passing, + request/response pairs, service discovery, etc.) facilitate removing some of + the direct linkages between constructs/components in the system and reduce + the need for Tendermint nodes to maintain state about its peers? + +References +---------- + +- `Tracking Ticket for P2P Refactor Project `_ +- `ADR 61: P2P Refactor Scope <../architecture/adr-061-p2p-refactor-scope.md>`_ +- `ADR 62: P2P Architecture and Abstraction <../architecture/adr-061-p2p-architecture.md>`_ diff --git a/docs/rfc/rfc-001-storage-engine.rst b/docs/rfc/rfc-001-storage-engine.rst new file mode 100644 index 000000000..560e8a8b3 --- /dev/null +++ b/docs/rfc/rfc-001-storage-engine.rst @@ -0,0 +1,179 @@ +=========================================== +RFC 001: Storage Engines and Database Layer +=========================================== + +Changelog +--------- + +- 2021-04-19: Initial Draft (gist) +- 2021-09-02: Migrated to RFC folder, with some updates + +Abstract +-------- + +The aspect of Tendermint that's responsible for persistence and storage (often +"the database" internally) represents a bottle neck in the architecture of the +platform, that the 0.36 release presents a good opportunity to correct. The +current storage engine layer provides a great deal of flexibility that is +difficult for users to leverage or benefit from, while also making it harder +for Tendermint Core developers to deliver improvements on storage engine. This +RFC discusses the possible improvements to this layer of the system. + +Background +---------- + +Tendermint has a very thin common wrapper that makes Tendermint itself +(largely) agnostic to the data storage layer (within the realm of the popular +key-value/embedded databases.) This flexibility is not particularly useful: +the benefits of a specific database engine in the context of Tendermint is not +particularly well understood, and the maintenance burden for multiple backends +is not commensurate with the benefit provided. Additionally, because the data +storage layer is handled generically, and most tests run with an in-memory +framework, it's difficult to take advantage of any higher-level features of a +database engine. + +Ideally, developers within Tendermint will be able to interact with persisted +data via an interface that can function, approximately like an object +store, and this storage interface will be able to accommodate all existing +persistence workloads (e.g. block storage, local peer management information +like the "address book", crash-recovery log like the WAL.) In addition to +providing a more ergonomic interface and new semantics, by selecting a single +storage engine tendermint can use native durability and atomicity features of +the storage engine and simplify its own implementations. + +Data Access Patterns +~~~~~~~~~~~~~~~~~~~~ + +Tendermint's data access patterns have the following characteristics: + +- aggregate data size often exceeds memory. + +- data is rarely mutated after it's written for most data (e.g. blocks), but + small amounts of working data is persisted by nodes and is frequently + mutated (e.g. peer information, validator information.) + +- read patterns can be quite random. + +- crash resistance and crash recovery, provided by write-ahead-logs (in + consensus, and potentially for the mempool) should allow the system to + resume work after an unexpected shut down. + +Project Goals +~~~~~~~~~~~~~ + +As we think about replacing the current persistence layer, we should consider +the following high level goals: + +- drop dependencies on storage engines that have a CGo dependency. + +- encapsulate data format and data storage from higher-level services + (e.g. reactors) within tendermint. + +- select a storage engine that does not incur any additional operational + complexity (e.g. database should be embedded.) + +- provide database semantics with sufficient ACID, snapshots, and + transactional support. + +Open Questions +~~~~~~~~~~~~~~ + +The following questions remain: + +- what kind of data-access concurrency does tendermint require? + +- would tendermint users SDK/etc. benefit from some shared database + infrastructure? + + - In earlier conversations it seemed as if the SDK has selected Badger and + RocksDB for their storage engines, and it might make sense to be able to + (optionally) pass a handle to a Badger instance between the libraries in + some cases. + +- what are typical data sizes, and what kinds of memory sizes can we expect + operators to be able to provide? + +- in addition to simple persistence, what kind of additional semantics would + tendermint like to enjoy (e.g. transactional semantics, unique constraints, + indexes, in-place-updates, etc.)? + +Decision Framework +~~~~~~~~~~~~~~~~~~ + +Given the constraint of removing the CGo dependency, the decision is between +"badger" and "boltdb" (in the form of the etcd/CoreOS fork,) as low level. On +top of this and somewhat orthogonally, we must also decide on the interface to +the database and how the larger application will have to interact with the +database layer. Users of the data layer shouldn't ever need to interact with +raw byte slices from the database, and should mostly have the experience of +interacting with Go-types. + +Badger is more consistently developed and has a broader feature set than +Bolt. At the same time, Badger is likely more memory intensive and may have +more overhead in terms of open file handles given it's model. At first glance, +Badger is the obvious choice: it's actively developed and it has a lot of +features that could be useful. Bolt is not without some benefits: it's stable +and is maintained by the etcd folks, it's simpler model (single memory mapped +file, etc,) may be easier to reason about. + +I propose that we consider the following specific questions about storage +engines: + +- does Badger's evolving development, which may result in data file format + changes in the future, and could restrict our access to using the latest + version of the library between major upgrades, present a problem? + +- do we do we have goals/concerns about memory footprint that Badger may + prevent us from hitting, particularly as data sets grow over time? + +- what kind of additional tooling might we need/like to build (dump/restore, + etc.)? + +- do we want to run unit/integration tests against a data files on disk rather + than relying exclusively on the memory database? + +Project Scope +~~~~~~~~~~~~~ + +This project will consist of the following aspects: + +- selecting a storage engine, and modifying the tendermint codebase to + disallow any configuration of the storage engine outside of the tendermint. + +- remove the dependency on the current tm-db interfaces and replace with some + internalized, safe, and ergonomic interface for data persistence with all + required database semantics. + +- update core tendermint code to use the new interface and data tools. + +Next Steps +~~~~~~~~~~ + +- circulate the RFC, and discuss options with appropriate stakeholders. + +- write brief ADR to summarize decisions around technical decisions reached + during the RFC phase. + +References +---------- + +- `bolddb `_ +- `badger `_ +- `badgerdb overview `_ +- `botldb overview `_ +- `boltdb vs badger `_ +- `bolthold `_ +- `badgerhold `_ +- `Pebble `_ +- `SDK Issue Regarding IVAL `_ +- `SDK Discussion about SMT/IVAL `_ + +Discussion +---------- + +- All things being equal, my tendency would be to use badger, with badgerhold + (if that makes sense) for its ergonomics and indexing capabilities, which + will require some small selection of wrappers for better write transaction + support. This is a weakly held tendency/belief and I think it would be + useful for the RFC process to build consensus (or not) around this basic + assumption. diff --git a/docs/rfc/rfc-002-ipc-ecosystem.md b/docs/rfc/rfc-002-ipc-ecosystem.md new file mode 100644 index 000000000..48c541ef0 --- /dev/null +++ b/docs/rfc/rfc-002-ipc-ecosystem.md @@ -0,0 +1,420 @@ +# RFC 002: Interprocess Communication (IPC) in Tendermint + +## Changelog + +- 08-Sep-2021: Initial draft (@creachadair). + + +## Abstract + +Communication in Tendermint among consensus nodes, applications, and operator +tools all use different message formats and transport mechanisms. In some +cases there are multiple options. Having all these options complicates both the +code and the developer experience, and hides bugs. To support a more robust, +trustworthy, and usable system, we should document which communication paths +are essential, which could be removed or reduced in scope, and what we can +improve for the most important use cases. + +This document proposes a variety of possible improvements of varying size and +scope. Specific design proposals should get their own documentation. + + +## Background + +The Tendermint state replication engine has a complex IPC footprint. + +1. Consensus nodes communicate with each other using a networked peer-to-peer + message-passing protocol. + +2. Consensus nodes communicate with the application whose state is being + replicated via the [Application BlockChain Interface (ABCI)][abci]. + +3. Consensus nodes export a network-accessible [RPC service][rpc-service] to + support operations (bootstrapping, debugging) and synchronization of [light clients][light-client]. + This interface is also used by the [`tendermint` CLI][tm-cli]. + +4. Consensus nodes export a gRPC service exposing a subset of the methods of + the RPC service described by (3). This was intended to simplify the + implementation of tools that already use gRPC to communicate with an + application (via the Cosmos SDK), and wanted to also talk to the consensus + node without implementing yet another RPC protocol. + + The gRPC interface to the consensus node has been deprecated and is slated + for removal in the forthcoming Tendermint v0.36 release. + +5. Consensus nodes may optionally communicate with a "remote signer" that holds + a validator key and can provide public keys and signatures to the consensus + node. One of the stated goals of this configuration is to allow the signer + to be run on a private network, separate from the consensus node, so that a + compromise of the consensus node from the public network would be less + likely to expose validator keys. + +## Discussion: Transport Mechanisms + +### Remote Signer Transport + +A remote signer communicates with the consensus node in one of two ways: + +1. "Raw": Using a TCP or Unix-domain socket which carries varint-prefixed + protocol buffer messages. In this mode, the consensus node is the server, + and the remote signer is the client. + + This mode has been deprecated, and is intended to be removed. + +2. gRPC: This mode uses the same protobuf messages as "Raw" node, but uses a + standard encrypted gRPC HTTP/2 stub as the transport. In this mode, the + remote signer is the server and the consensus node is the client. + + +### ABCI Transport + +In ABCI, the _application_ is the server, and the Tendermint consensus engine +is the client. Most applications implement the server using the [Cosmos SDK][cosmos-sdk], +which handles low-level details of the ABCI interaction and provides a +higher-level interface to the rest of the application. The SDK is written in Go. + +Beneath the SDK, the application communicates with Tendermint core in one of +two ways: + +- In-process direct calls (for applications written in Go and compiled against + the Tendermint code). This is an optimization for the common case where an + application is written in Go, to save on the overhead of marshaling and + unmarshaling requests and responses within the same process: + [`abci/client/local_client.go`][local-client] + +- A custom remote procedure protocol built on wire-format protobuf messages + using a socket (the "socket protocol"): [`abci/server/socket_server.go`][socket-server] + +The SDK also provides a [gRPC service][sdk-grpc] accessible from outside the +application, allowing transactions to be broadcast to the network, look up +transactions, and simulate transaction costs. + + +### RPC Transport + +The consensus node RPC service allows callers to query consensus parameters +(genesis data, transactions, commits), node status (network info, health +checks), application state (abci_query, abci_info), mempool state, and other +attributes of the node and its application. The service also provides methods +allowing transactions and evidence to be injected ("broadcast") into the +blockchain. + +The RPC service is exposed in several ways: + +- HTTP GET: Queries may be sent as URI parameters, with method names in the path. + +- HTTP POST: Queries may be sent as JSON-RPC request messages in the body of an + HTTP POST request. The server uses a custom implementation of JSON-RPC that + is not fully compatible with the [JSON-RPC 2.0 spec][json-rpc], but handles + the common cases. + +- Websocket: Queries may be sent as JSON-RPC request messages via a websocket. + This transport uses more or less the same JSON-RPC plumbing as the HTTP POST + handler. + + The websocket endpoint also includes three methods that are _only_ exported + via websocket, which appear to support event subscription. + +- gRPC: A subset of queries may be issued in protocol buffer format to the gRPC + interface described above under (4). As noted, this endpoint is deprecated + and will be removed in v0.36. + +### Opportunities for Simplification + +**Claim:** There are too many IPC mechanisms. + +The preponderance of ABCI usage is via the Cosmos SDK, which means the +application and the consensus node are compiled together into a single binary, +and the consensus node calls the ABCI methods of the application directly as Go +functions. + +We also need a true IPC transport to support ABCI applications _not_ written in +Go. There are also several known applications written in Rust, for example +(including [Anoma](https://github.com/anoma/anoma), Penumbra, +[Oasis](https://github.com/oasisprotocol/oasis-core), Twilight, and +[Nomic](https://github.com/nomic-io/nomic)). Ideally we will have at most one +such transport "built-in": More esoteric cases can be handled by a custom proxy. +Pragmatically, gRPC is probably the right choice here. + +The primary consumers of the multi-headed "RPC service" today are the light +client and the `tendermint` command-line client. There is probably some local +use via curl, but I expect that is mostly ad hoc. Ethan reports that nodes are +often configured with the ports to the RPC service blocked, which is good for +security but complicates use by the light client. + +### Context: Remote Signer Issues + +Since the remote signer needs a secure communication channel to exchange keys +and signatures, and is expected to run truly remotely from the node (i.e., on a +separate physical server), there is not a whole lot we can do here. We should +finish the deprecation and removal of the "raw" socket protocol between the +consensus node and remote signers, but the use of gRPC is appropriate. + +The main improvement we can make is to simplify the implementation quite a bit, +once we no longer need to support both "raw" and gRPC transports. + +### Context: ABCI Issues + +In the original design of ABCI, the presumption was that all access to the +application should be mediated by the consensus node. The idea is that outside +access could change application state and corrupt the consensus process, which +relies on the application to be deterministic. Of course, even without outside +access an application could behave nondeterministically, but allowing other +programs to send it requests was seen as courting trouble. + +Conversely, users noted that most of the time, tools written for a particular +application don't want to talk to the consensus module directly. The +application "owns" the state machine the consensus engine is replicating, so +tools that care about application state should talk to the application. +Otherwise, they would have to bake in knowledge about Tendermint (e.g., its +interfaces and data structures) just because of the mediation. + +For clients to talk directly to the application, however, there is another +concern: The consensus node is the ABCI _client_, so it is inconvenient for the +application to "push" work into the consensus module via ABCI itself. The +current implementation works around this by calling the consensus node's RPC +service, which exposes an `ABCIQuery` kitchen-sink method that allows the +application a way to poke ABCI messages in the other direction. + +Without this RPC method, you could work around this (at least in principle) by +having the consensus module "poll" the application for work that needs done, +but that has unsatisfactory implications for performance and robustness, as +well as being harder to understand. + +There has apparently been discussion about trying to make a more bidirectional +communication between the consensus node and the application, but this issue +seems to still be unresolved. + +Another complication of ABCI is that it requires the application (server) to +maintain [four separate connections][abci-conn]: One for "consensus" operations +(BeginBlock, EndBlock, DeliverTx, Commit), one for "mempool" operations, one +for "query" operations, and one for "snapshot" (state synchronization) operations. +The rationale seems to have been that these groups of operations should be able +to proceed concurrently with each other. In practice, it results in a very complex +state management problem to coordinate state updates between the separate streams. +While application authors in Go are mostly insulated from that complexity by the +Cosmos SDK, the plumbing to maintain those separate streams is complicated, hard +to understand, and we suspect it contains concurrency bugs and/or lock contention +issues affecting performance that are subtle and difficult to pin down. + +Even without changing the semantics of any ABCI operations, this code could be +made smaller and easier to debug by separating the management of concurrency +and locking from the IPC transport: If all requests and responses are routed +through one connection, the server can explicitly maintain priority queues for +requests and responses, and make less-conservative decisions about when locks +are (or aren't) required to synchronize state access. With independent queues, +the server must lock conservatively, and no optimistic scheduling is practical. + +This would be a tedious implementation change, but should be achievable without +breaking any of the existing interfaces. More importantly, it could potentially +address a lot of difficult concurrency and performance problems we currently +see anecdotally but have difficultly isolating because of how intertwined these +separate message streams are at runtime. + +TODO: Impact of ABCI++ for this topic? + +### Context: RPC Issues + +The RPC system serves several masters, and has a complex surface area. I +believe there are some improvements that can be exposed by separating some of +these concerns. + +The Tendermint light client currently uses the RPC service to look up blocks +and transactions, and to forward ABCI queries to the application. The light +client proxy uses the RPC service via a websocket. The Cosmos IBC relayer also +uses the RPC service via websocket to watch for transaction events, and uses +the `ABCIQuery` method to fetch information and proofs for posted transactions. + +Some work is already underway toward using P2P message passing rather than RPC +to synchronize light client state with the rest of the network. IBC relaying, +however, requires access to the event system, which is currently not accessible +except via the RPC interface. Event subscription _could_ be exposed via P2P, +but that is a larger project since it adds P2P communication load, and might +thus have an impact on the performance of consensus. + +If event subscription can be moved into the P2P network, we could entirely +remove the websocket transport, even for clients that still need access to the +RPC service. Until then, we may still be able to reduce the scope of the +websocket endpoint to _only_ event subscription, by moving uses of the RPC +server as a proxy to ABCI over to the gRPC interface. + +Having the RPC server still makes sense for local bootstrapping and operations, +but can be further simplified. Here are some specific proposals: + +- Remove the HTTP GET interface entirely. + +- Simplify JSON-RPC plumbing to remove unnecessary reflection and wrapping. + +- Remove the gRPC interface (this is already planned for v0.36). + +- Separate the websocket interface from the rest of the RPC service, and + restrict it to only event subscription. + + Eventually we should try to emove the websocket interface entirely, but we + will need to revisit that (probably in a new RFC) once we've done some of the + easier things. + +These changes would preserve the ability of operators to issue queries with +curl (but would require using JSON-RPC instead of URI parameters). That would +be a little less user-friendly, but for a use case that should not be that +prevalent. + +These changes would also preserve compatibility with existing JSON-RPC based +code paths like the `tendermint` CLI and the light client (even ahead of +further work to remove that dependency). + +**Design goal:** An operator should be able to disable non-local access to the +RPC server on any node in the network without impairing the ability of the +network to function for service of state replication, including light clients. + +**Design principle:** All communication required to implement and monitor the +consensus network should use P2P, including the various synchronizations. + +### Options for ABCI Transport + +The majority of current usage is in Go, and the majority of that is mediated by +the Cosmos SDK, which uses the "direct call" interface. There is probably some +opportunity to clean up the implementation of that code, notably by inverting +which interface is at the "top" of the abstraction stack (currently it acts +like an RPC interface, and escape-hatches into the direct call). However, this +general approach works fine and doesn't need to be fundamentally changed. + +For applications _not_ written in Go, the two remaining options are the +"socket" protocol (another variation on varint-prefixed protobuf messages over +an unstructured stream) and gRPC. It would be nice if we could get rid of one +of these to reduce (unneeded?) optionality. + +Since both the socket protocol and gRPC depend on protocol buffers, the +"socket" protocol is the most obvious choice to remove. While gRPC is more +complex, the set of languages that _have_ protobuf support but _lack_ gRPC +support is small. Moreover, gRPC is already widely used in the rest of the +ecosystem (including the Cosmos SDK). + +If some use case did arise later that can't work with gRPC, it would not be too +difficult for that application author to write a little proxy (in Go) that +bridges the convenient SDK APIs into a simpler protocol than gRPC. + +**Design principle:** It is better for an uncommon special case to carry the +burdens of its specialness, than to bake an escape hatch into the infrastructure. + +**Recommendation:** We should deprecate and remove the socket protocol. + +### Options for RPC Transport + +[ADR 057][adr-57] proposes using gRPC for the Tendermint RPC implementation. +This is still possible, but if we are able to simplify and decouple the +concerns as described above, I do not think it should be necessary. + +While JSON-RPC is not the best possible RPC protocol for all situations, it has +some advantages over gRPC for our domain. Specifically: + +- It is easy to call JSON-RPC manually from the command-line, which helps with + a common concern for the RPC service, local debugging and operations. + + Relatedly: JSON is relatively easy for humans to read and write, and it can + be easily copied and pasted to share sample queries and debugging results in + chat, issue comments, and so on. Ideally, the RPC service will not be used + for activities where the costs of a text protocol are important compared to + its legibility and manual usability benefits. + +- gRPC has an enormous dependency footprint for both clients and servers, and + many of the features it provides to support security and performance + (encryption, compression, streaming, etc.) are mostly irrelevant to local + use. Tendermint already needs to include a gRPC client for the remote signer, + but if we can avoid the need for a _client_ to depend on gRPC, that is a win + for usability. + +- If we intend to migrate light clients off RPC to use P2P entirely, there is + no advantage to forcing a temporary migration to gRPC along the way; and once + the light client is not dependent on the RPC service, the efficiency of the + protocol is much less important. + +- We can still get the benefits of generated data types using protocol buffers, even + without using gRPC: + + - Protobuf defines a standard JSON encoding for all message types so + languages with protobuf support do not need to worry about type mapping + oddities. + + - Using JSON means that even languages _without_ good protobuf support can + implement the protocol with a bit more work, and I expect this situation to + be rare. + +Even if a language lacks a good standard JSON-RPC mechanism, the protocol is +lightweight and can be implemented by simple send/receive over TCP or +Unix-domain sockets with no need for code generation, encryption, etc. gRPC +uses a complex HTTP/2 based transport that is not easily replicated. + +### Future Work + +The background and proposals sketched above focus on the existing structure of +Tendermint and improvements we can make in the short term. It is worthwhile to +also consider options for longer-term broader changes to the IPC ecosystem. +The following outlines some ideas at a high level: + +- **Consensus service:** Today, the application and the consensus node are + nominally connected only via ABCI. Tendermint was originally designed with + the assumption that all communication with the application should be mediated + by the consensus node. Based on further experience, however, the design goal + is now that the _application_ should be the mediator of application state. + + As noted above, however, ABCI is a client/server protocol, with the + application as the server. For outside clients that turns out to have been a + good choice, but it complicates the relationship between the application and + the consensus node: Previously transactions were entered via the node, now + they are entered via the app. + + We have worked around this by using the Tendermint RPC service to give the + application a "back channel" to the consensus node, so that it can push + transactions back into the consensus network. But the RPC service exposes a + lot of other functionality, too, including event subscription, block and + transaction queries, and a lot of node status information. + + Even if we can't easily "fix" the orientation of the ABCI relationship, we + could improve isolation by splitting out the parts of the RPC service that + the application needs as a back-channel, and sharing those _only_ with the + application. By defining a "consensus service", we could give the application + a way to talk back limited to only the capabilities it needs. This approach + has the benefit that we could do it without breaking existing use, and if we + later did "fix" the ABCI directionality, we could drop the special case + without disrupting the rest of the RPC interface. + +- **Event service:** Right now, the IBC relayer relies on the Tendermint RPC + service to provide a stream of block and transaction events, which it uses to + discover which transactions need relaying to other chains. While I think + that event subscription should eventually be handled via P2P, we could gain + some immediate benefit by splitting out event subscription from the rest of + the RPC service. + + In this model, an event subscription service would be exposed on the public + network, but on a different endpoint. This would remove the need for the RPC + service to support the websocket protocol, and would allow operators to + isolate potentially sensitive status query results from the public network. + + At the moment the relayers also use the RPC service to get block data for + synchronization, but work is already in progress to handle that concern via + the P2P layer. Once that's done, event subscription could be separated. + +Separating parts of the existing RPC service is not without cost: It might +require additional connection endpoints, for example, though it is also not too +difficult for multiple otherwise-independent services to share a connection. + +In return, though, it would become easier to reduce transport options and for +operators to independently control access to sensitive data. Considering the +viability and implications of these ideas is beyond the scope of this RFC, but +they are documented here since they follow from the background we have already +discussed. + +## References + +[abci]: https://github.com/tendermint/tendermint/tree/master/spec/abci +[rpc-service]: https://docs.tendermint.com/master/rpc/ +[light-client]: https://docs.tendermint.com/master/tendermint-core/light-client.html +[tm-cli]: https://github.com/tendermint/tendermint/tree/master/cmd/tendermint +[cosmos-sdk]: https://github.com/cosmos/cosmos-sdk/ +[local-client]: https://github.com/tendermint/tendermint/blob/master/abci/client/local_client.go +[socket-server]: https://github.com/tendermint/tendermint/blob/master/abci/server/socket_server.go +[sdk-grpc]: https://pkg.go.dev/github.com/cosmos/cosmos-sdk/types/tx#ServiceServer +[json-rpc]: https://www.jsonrpc.org/specification +[abci-conn]: https://github.com/tendermint/tendermint/blob/master/spec/abci/apps.md#state +[adr-57]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-057-RPC.md diff --git a/docs/rfc/rfc-003-performance-questions.md b/docs/rfc/rfc-003-performance-questions.md new file mode 100644 index 000000000..d9a0215b5 --- /dev/null +++ b/docs/rfc/rfc-003-performance-questions.md @@ -0,0 +1,283 @@ +# RFC 003: Taxonomy of potential performance issues in Tendermint + +## Changelog + +- 2021-09-02: Created initial draft (@wbanfield) +- 2021-09-14: Add discussion of the event system (@wbanfield) + +## Abstract + +This document discusses the various sources of performance issues in Tendermint and +attempts to clarify what work may be required to understand and address them. + +## Background + +Performance, loosely defined as the ability of a software process to perform its work +quickly and efficiently under load and within reasonable resource limits, is a frequent +topic of discussion in the Tendermint project. +To effectively address any issues with Tendermint performance we need to +categorize the various issues, understand their potential sources, and gauge their +impact on users. + +Categorizing the different known performance issues will allow us to discuss and fix them +more systematically. This document proposes a rough taxonomy of performance issues +and highlights areas where more research into potential performance problems is required. + +Understanding Tendermint's performance limitations will also be critically important +as we make changes to many of its subsystems. Performance is a central concern for +upcoming decisions regarding the `p2p` protocol, RPC message encoding and structure, +database usage and selection, and consensus protocol updates. + + +## Discussion + +This section attempts to delineate the different sections of Tendermint functionality +that are often cited as having performance issues. It raises questions and suggests +lines of inquiry that may be valuable for better understanding Tendermint's performance issues. + +As a note: We should avoid quickly adding many microbenchmarks or package level benchmarks. +These are prone to being worse than useless as they can obscure what _should_ be +focused on: performance of the system from the perspective of a user. We should, +instead, tune performance with an eye towards user needs and actions users make. These users comprise +both operators of Tendermint chains and the people generating transactions for +Tendermint chains. Both of these sets of users are largely aligned in wanting an end-to-end +system that operates quickly and efficiently. + +REQUEST: The list below may be incomplete, if there are additional sections that are often +cited as creating poor performance, please comment so that they may be included. + +### P2P + +#### Claim: Tendermint cannot scale to large numbers of nodes + +A complaint has been reported that Tendermint networks cannot scale to large numbers of nodes. +The listed number of nodes a user reported as causing issue was in the thousands. +We don't currently have evidence about what the upper-limit of nodes that Tendermint's +P2P stack can scale to. + +We need to more concretely understand the source of issues and determine what layer +is causing a problem. It's possible that the P2P layer, in the absence of any reactors +sending data, is perfectly capable of managing thousands of peer connections. For +a reasonable networking and application setup, thousands of connections should not present any +issue for the application. + +We need more data to understand the problem directly. We want to drive the popularity +and adoption of Tendermint and this will mean allowing for chains with more validators. +We should follow up with users experiencing this issue. We may then want to add +a series of metrics to the P2P layer to better understand the inefficiencies it produces. + +The following metrics can help us understand the sources of latency in the Tendermint P2P stack: + +* Number of messages sent and received per second +* Time of a message spent on the P2P layer send and receive queues + +The following metrics exist and should be leveraged in addition to those added: + +* Number of peers node's connected to +* Number of bytes per channel sent and received from each peer + +### Sync + +#### Claim: Block Syncing is slow + +Bootstrapping a new node in a network to the height of the rest of the network is believed to +take longer than users would like. Block sync requires fetching all of the blocks from +peers and placing them into the local disk for storage. A useful line of inquiry +is understanding how quickly a perfectly tuned system _could_ fetch all of the state +over a network so that we understand how much overhead Tendermint actually adds. + +The operation is likely to be _incredibly_ dependent on the environment in which +the node is being run. The factors that will influence syncing include: +1. Number of peers that a syncing node may fetch from. +2. Speed of the disk that a validator is writing to. +3. Speed of the network connection between the different peers that node is +syncing from. + +We should calculate how quickly this operation _could possibly_ complete for common chains and nodes. +To calculate how quickly this operation could possibly complete, we should assume that +a node is reading at line-rate of the NIC and writing at the full drive speed to its +local storage. Comparing this theoretical upper-limit to the actual sync times +observed by node operators will give us a good point of comparison for understanding +how much overhead Tendermint incurs. + +We should additionally add metrics to the blocksync operation to more clearly pinpoint +slow operations. The following metrics should be added to the block syncing operation: + +* Time to fetch and validate each block +* Time to execute a block +* Blocks sync'd per unit time + +### Application + +Applications performing complex state transitions have the potential to bottleneck +the Tendermint node. + +#### Claim: ABCI block delivery could cause slowdown + +ABCI delivers blocks in several methods: `BeginBlock`, `DeliverTx`, `EndBlock`, `Commit`. + +Tendermint delivers transactions one-by-one via the `DeliverTx` call. Most of the +transaction delivery in Tendermint occurs asynchronously and therefore appears unlikely to +form a bottleneck in ABCI. + +After delivering all transactions, Tendermint then calls the `Commit` ABCI method. +Tendermint [locks all access to the mempool][abci-commit-description] while `Commit` +proceeds. This means that an application that is slow to execute all of its +transactions or finalize state during the `Commit` method will prevent any new +transactions from being added to the mempool. Apps that are slow to commit will +prevent consensus from proceeded to the next consensus height since Tendermint +cannot validate block proposals or produce block proposals without the +AppHash obtained from the `Commit` method. We should add a metric for each +step in the ABCI protocol to track the amount of time that a node spends communicating +with the application at each step. + +#### Claim: ABCI serialization overhead causes slowdown + +The most common way to run a Tendermint application is using the Cosmos-SDK. +The Cosmos-SDK runs the ABCI application within the same process as Tendermint. +When an application is run in the same process as Tendermint, a serialization penalty +is not paid. This is because the local ABCI client does not serialize method calls +and instead passes the protobuf type through directly. This can be seen +in [local_client.go][abci-local-client-code]. + +Serialization and deserialization in the gRPC and socket protocol ABCI methods +may cause slowdown. While these may cause issue, they are not part of the primary +usecase of Tendermint and do not necessarily need to be addressed at this time. + +### RPC + +#### Claim: The Query API is slow. + +The query API locks a mutex across the ABCI connections. This causes consensus to +slow during queries, as ABCI is no longer able to make progress. This is known +to be causing issue in the cosmos-sdk and is being addressed [in the sdk][sdk-query-fix] +but a more robust solution may be required. Adding metrics to each ABCI client connection +and message as described in the Application section of this document would allow us +to further introspect the issue here. + +#### Claim: RPC Serialization may cause slowdown + +The Tendermint RPC uses a modified version of JSON-RPC. This RPC powers the `broadcast_tx_*` methods, +which is a critical method for adding transactions to Tendermint at the moment. This method is +likely invoked quite frequently on popular networks. Being able to perform efficiently +on this common and critical operation is very important. The current JSON-RPC implementation +relies heavily on type introspection via reflection, which is known to be very slow in +Go. We should therefore produce benchmarks of this method to determine how much overhead +we are adding to what, is likely to be, a very common operation. + +The other JSON-RPC methods are much less critical to the core functionality of Tendermint. +While there may other points of performance consideration within the RPC, methods that do not +receive high volumes of requests should not be prioritized for performance consideration. + +NOTE: Previous discussion of the RPC framework was done in [ADR 57][adr-57] and +there is ongoing work to inspect and alter the JSON-RPC framework in [RFC 002][rfc-002]. +Much of these RPC-related performance considerations can either wait until the work of RFC 002 work is done or be +considered concordantly with the in-flight changes to the JSON-RPC. + +### Protocol + +#### Claim: Gossiping messages is a slow process + +Currently, for any validator to successfully vote in a consensus _step_, it must +receive votes from greater than 2/3 of the validators on the network. In many cases, +it's preferable to receive as many votes as possible from correct validators. + +This produces a quadratic increase in messages that are communicated as more validators join the network. +(Each of the N validators must communicate with all other N-1 validators). + +This large number of messages communicated per step has been identified to impact +performance of the protocol. Given that the number of messages communicated has been +identified as a bottleneck, it would be extremely valuable to gather data on how long +it takes for popular chains with many validators to gather all votes within a step. + +Metrics that would improve visibility into this include: + +* Amount of time for a node to gather votes in a step. +* Amount of time for a node to gather all block parts. +* Number of votes each node sends to gossip (i.e. not its own votes, but votes it is +transmitting for a peer). +* Total number of votes each node sends to receives (A node may receive duplicate votes +so understanding how frequently this occurs will be valuable in evaluating the performance +of the gossip system). + +#### Claim: Hashing Txs causes slowdown in Tendermint + +Using a faster hash algorithm for Tx hashes is currently a point of discussion +in Tendermint. Namely, it is being considered as part of the [modular hashing proposal][modular-hashing]. +It is currently unknown if hashing transactions in the Mempool forms a significant bottleneck. +Although it does not appear to be documented as slow, there are a few open github +issues that indicate a possible user preference for a faster hashing algorithm, +including [issue 2187][issue-2187] and [issue 2186][issue-2186]. + +It is likely worth investigating what order of magnitude Tx hashing takes in comparison to other +aspects of adding a Tx to the mempool. It is not currently clear if the rate of adding Tx +to the mempool is a source of user pain. We should not endeavor to make large changes to +consensus critical components without first being certain that the change is highly +valuable and impactful. + +### Digital Signatures + +#### Claim: Verification of digital signatures may cause slowdown in Tendermint + +Working with cryptographic signatures can be computationally expensive. The cosmos +hub uses [ed25519 signatures][hub-signature]. The library performing signature +verification in Tendermint on votes is [benchmarked][ed25519-bench] to be able to perform an `ed25519` +signature in 75μs on a decently fast CPU. A validator in the Cosmos Hub performs +3 sets of verifications on the signatures of the 140 validators in the Hub +in a consensus round, during block verification, when verifying the prevotes, and +when verifying the precommits. With no batching, this would be roughly `3ms` per +round. It is quite unlikely, therefore, that this accounts for any serious amount +of the ~7 seconds of block time per height in the Hub. + +This may cause slowdown when syncing, since the process needs to constantly verify +signatures. It's possible that improved signature aggregation will lead to improved +light client or other syncing performance. In general, a metric should be added +to track block rate while blocksyncing. + +#### Claim: Our use of digital signatures in the consensus protocol contributes to performance issue + +Currently, Tendermint's digital signature verification requires that all validators +receive all vote messages. Each validator must receive the complete digital signature +along with the vote message that it corresponds to. This means that all N validators +must receive messages from at least 2/3 of the N validators in each consensus +round. Given the potential for oddly shaped network topologies and the expected +variable network roundtrip times of a few hundred milliseconds in a blockchain, +it is highly likely that this amount of gossiping is leading to a significant amount +of the slowdown in the Cosmos Hub and in Tendermint consensus. + +### Tendermint Event System + +#### Claim: The event system is a bottleneck in Tendermint + +The Tendermint Event system is used to communicate and store information about +internal Tendermint execution. The system uses channels internally to send messages +to different subscribers. Sending an event [blocks on the internal channel][event-send]. +The default configuration is to [use an unbuffered channel for event publishes][event-buffer-capacity]. +Several consumers of the event system also use an unbuffered channel for reads. +An example of this is the [event indexer][event-indexer-unbuffered], which takes an +unbuffered subscription to the event system. The result is that these unbuffered readers +can cause writes to the event system to block or slow down depending on contention in the +event system. This has implications for the consensus system, which [publishes events][consensus-event-send]. +To better understand the performance of the event system, we should add metrics to track the timing of +event sends. The following metrics would be a good start for tracking this performance: + +* Time in event send, labeled by Event Type +* Time in event receive, labeled by subscriber +* Event throughput, measured in events per unit time. + +### References +[modular-hashing]: https://github.com/tendermint/tendermint/pull/6773 +[issue-2186]: https://github.com/tendermint/tendermint/issues/2186 +[issue-2187]: https://github.com/tendermint/tendermint/issues/2187 +[rfc-002]: https://github.com/tendermint/tendermint/pull/6913 +[adr-57]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-057-RPC.md +[issue-1319]: https://github.com/tendermint/tendermint/issues/1319 +[abci-commit-description]: https://github.com/tendermint/tendermint/blob/master/spec/abci/apps.md#commit +[abci-local-client-code]: https://github.com/tendermint/tendermint/blob/511bd3eb7f037855a793a27ff4c53c12f085b570/abci/client/local_client.go#L84 +[hub-signature]: https://github.com/cosmos/gaia/blob/0ecb6ed8a244d835807f1ced49217d54a9ca2070/docs/resources/genesis.md#consensus-parameters +[ed25519-bench]: https://github.com/oasisprotocol/curve25519-voi/blob/d2e7fc59fe38c18ca990c84c4186cba2cc45b1f9/PERFORMANCE.md +[event-send]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/libs/pubsub/pubsub.go#L338 +[event-buffer-capacity]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/types/event_bus.go#L14 +[event-indexer-unbuffered]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/state/indexer/indexer_service.go#L39 +[consensus-event-send]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/internal/consensus/state.go#L1573 +[sdk-query-fix]: https://github.com/cosmos/cosmos-sdk/pull/10045 diff --git a/docs/rfc/rfc-004-e2e-framework.rst b/docs/rfc/rfc-004-e2e-framework.rst new file mode 100644 index 000000000..8508ca173 --- /dev/null +++ b/docs/rfc/rfc-004-e2e-framework.rst @@ -0,0 +1,213 @@ +======================================== +RFC 004: E2E Test Framework Enhancements +======================================== + +Changelog +--------- + +- 2021-09-14: started initial draft (@tychoish) + +Abstract +-------- + +This document discusses a series of improvements to the e2e test framework +that we can consider during the next few releases to help boost confidence in +Tendermint releases, and improve developer efficiency. + +Background +---------- + +During the 0.35 release cycle, the E2E tests were a source of great +value, helping to identify a number of bugs before release. At the same time, +the tests were not consistently passing during this time, thereby reducing +their value, and forcing the core development team to allocate time and energy +to maintaining and chasing down issues with the e2e tests and the test +harness. The experience of this release cycle calls to mind a series of +improvements to the test framework, and this document attempts to capture +these improvements, along with motivations, and potential for impact. + +Projects +-------- + +Flexible Workload Generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Presently the e2e suite contains a single workload generation pattern, which +exists simply to ensure that the test networks have some work during their +runs. However, the shape and volume of the work is very consistent and is very +gentle to help ensure test reliability. + +We don't need a complex workload generation framework, but being able to have +a few different workload shapes available for test networks, both generated and +hand-crafted, would be useful. + +Workload patterns/configurations might include: + +- transaction targeting patterns (include light nodes, round robin, target + individual nodes) + +- variable transaction size over time. + +- transaction broadcast option (synchronously, checked, fire-and-forget, + mixed). + +- number of transactions to submit. + +- non-transaction workloads: (evidence submission, query, event subscription.) + +Configurable Generator +~~~~~~~~~~~~~~~~~~~~~~ + +The nightly e2e suite is defined by the `testnet generator +`_, +and it's difficult to add dimensions or change the focus of the test suite in +any way without modifying the implementation of the generator. If the +generator were more configurable, potentially via a file rather than in +the Go implementation, we could modify the focus of the test suite on the +fly. + +Features that we might want to configure: + +- number of test networks to generate of various topologies, to improve + coverage of different configurations. + +- test application configurations (to modify the latency of ABCI calls, etc.) + +- size of test networks. + +- workload shape and behavior. + +- initial sync and catch-up configurations. + +The workload generator currently provides runtime options for limiting the +generator to specific types of P2P stacks, and for generating multiple groups +of test cases to support parallelism. The goal is to extend this pattern and +avoid hardcoding the matrix of test cases in the generator code. Once the +testnet configuration generation behavior is configurable at runtime, +developers may be able to use the e2e framework to validate changes before +landing changes that break e2e tests a day later. + +In addition to the autogenerated suite, it might make sense to maintain a +small collection of hand-crafted cases that exercise configurations of +concern, to run as part of the nightly (or less frequent) loop. + +Implementation Plan Structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As a development team, we should determine the features should impact the e2e +testing early in the development cycle, and if we intend to modify the e2e +tests to exercise a feature, we should identify this early and begin the +integration process as early as possible. + +To facilitate this, we should adopt a practice whereby we exercise specific +features that are currently under development more rigorously in the e2e +suite, and then as development stabilizes we can reduce the number or weight +of these features in the suite. + +As of 0.35 there are essentially two end to end tests: the suite of 64 +generated test networks, and the hand crafted `ci.toml` test case. The +generated test cases help provide systemtic coverage, while the `ci` run +provides coverage for a large number of features. + +Reduce Cycle Time +~~~~~~~~~~~~~~~~~ + +One of the barriers to leveraging the e2e framework, and one of the challenges +in debugging failures, is the cycle time of running a single test iteration is +quite high: 5 minutes to build the docker image, plus the time to run the test +or tests. + +There are a number of improvements and enhancements that can reduce the cycle +time in practice: + +- reduce the amount of time required to build the docker image used in these + tests. Without the dependency on CGo, the tendermint binaries could be + (cross) compiled outside of the docker container and then injected into + them, which would take better advantage of docker's native caching, + although, without the dependency on CGo there would be no hard requirement + for the e2e tests to use docker. + +- support test parallelism. Because of the way the testnets are orchestrated + a single system can really only run one network at a time. For executions + (local or remote) with more resources, there's no reason to run a few + networks in parallel to reduce the feedback time. + +- prune testnet configurations that are unlikely to provide good signal, to + shorten the time to feedback. + +- apply some kind of tiered approach to test execution, to improve the + legibility of the test result. For example order tests by the dependency of + their features, or run test networks without perturbations before running + that configuration with perturbations, to be able to isolate the impact of + specific features. + +- orchestrate the test harness directly from go test rather than via a special + harness and shell scripts so e2e tests may more naively fit into developers + existing workflows. + +Many of these improvements, particularly, reducing the build time will also +reduce the time to get feedback during automated builds. + +Deeper Insights +~~~~~~~~~~~~~~~ + +When a test network fails, it's incredibly difficult to understand _why_ the +network failed, as the current system provides very little insight into the +system outside of the process logs. When a test network stalls or fails +developers should be able to quickly and easily get a sense of the state of +the network and all nodes. + +Improvements in persuit of this goal, include functionality that would help +node operators in production environments by improving the quality and utility +of the logging messages and other reported metrics, but also provide some +tools to collect and aggregate this data for developers in the context of test +networks. + +- Interleave messages from all nodes in the network to be able to correlate + events during the test run. + +- Collect structured metrics of the system operation (CPU/MEM/IO) during the + test run, as well as from each tendermint/application process. + +- Build (simple) tools to be able to render and summarize the data collected + during the test run to answer basic questions about test outcome. + +Flexible Assertions +~~~~~~~~~~~~~~~~~~~ + +Currently, all assertions run for every test network, which makes the +assertions pretty bland, and the framework primarily useful as a smoke-test +framework, but it might be useful to be able to write and run different +tests for different configurations. This could allow us to test outside of the +happy-path. + +In general our existing assertions occupy a fraction of the total test time, +so the relative cost of adding a few extra test assertions would be of limited +cost, and could help build confidence. + +Additional Kinds of Testing +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The existing e2e suite, exercises networks of nodes that have homogeneous +tendermint version, stable configuration, that are expected to make +progress. There are many other possible test configurations that may be +interesting to engage with. These could include dimensions, such as: + +- Multi-version testing to exercise our compatibility guarantees for networks + that might have different tendermint versions. + +- As a flavor or mult-version testing, include upgrade testing, to build + confidence in migration code and procedures. + +- Additional test applications, particularly practical-type applciations + including some that use gaiad and/or the cosmos-sdk. Test-only applications + that simulate other kinds of applications (e.g. variable application + operation latency.) + +- Tests of "non-viable" configurations that ensure that forbidden combinations + lead to halts. + +References +---------- + +- `ADR 66: End-to-End Testing <../architecture/adr-66-e2e-testing.md>`_ diff --git a/docs/rfc/rfc-005-event-system.rst b/docs/rfc/rfc-005-event-system.rst new file mode 100644 index 000000000..b4a00b43d --- /dev/null +++ b/docs/rfc/rfc-005-event-system.rst @@ -0,0 +1,122 @@ +===================== +RFC 005: Event System +===================== + +Changelog +--------- + +- 2021-09-17: Initial Draft (@tychoish) + +Abstract +-------- + +The event system within Tendermint, which supports a lot of core +functionality, also represents a major infrastructural liability. As part of +our upcoming review of the RPC interfaces and our ongoing thoughts about +stability and performance, as well as the preparation for Tendermint 1.0, we +should revisit the design and implementation of the event system. This +document discusses both the current state of the system and potential +directions for future improvement. + +Background +---------- + +Current State of Events +~~~~~~~~~~~~~~~~~~~~~~~ + +The event system makes it possible for clients, both internal and external, +to receive notifications of state replication events, such as new blocks, +new transactions, validator set changes, as well as intermediate events during +consensus. Because the event system is very cross cutting, the behavior and +performance of the event publication and subscription system has huge impacts +for all of Tendermint. + +The subscription service is exposed over the RPC interface, but also powers +the indexing (e.g. to an external database,) and is the mechanism by which +`BroadcastTxCommit` is able to wait for transactions to land in a block. + +The current pubsub mechanism relies on a couple of buffered channels, +primarily between all event creators and subscribers, but also for each +subscription. The result of this design is that, in some situations with the +right collection of slow subscription consumers the event system can put +backpressure on the consensus state machine and message gossiping in the +network, thereby causing nodes to lag. + +Improvements +~~~~~~~~~~~~ + +The current system relies on implicit, bounded queues built by the buffered channels, +and though threadsafe, can force all activity within Tendermint to serialize, +which does not need to happen. Additionally, timeouts for subscription +consumers related to the implementation of the RPC layer, may complicate the +use of the system. + +References +~~~~~~~~~~ + +- Legacy Implementation + - `publication of events `_ + - `send operation `_ + - `send loop `_ +- Related RFCs + - `RFC 002: IPC Ecosystem <./rfc-002-ipc-ecosystem.md>`_ + - `RFC 003: Performance Questions <./rfc-003-performance-questions.md>`_ + +Discussion +---------- + +Changes to Published Events +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As part of this process, the Tendermint team should do a study of the existing +event types and ensure that there are viable production use cases for +subscriptions to all event types. Instinctively it seems plausible that some +of the events may not be useable outside of tendermint, (e.g. ``TimeoutWait`` +or ``NewRoundStep``) and it might make sense to remove them. Certainly, it +would be good to make sure that we don't maintain infrastructure for unused or +un-useful message indefinitely. + +Blocking Subscription +~~~~~~~~~~~~~~~~~~~~~ + +The blocking subscription mechanism makes it possible to have *send* +operations into the subscription channel be un-buffered (the event processing +channel is still buffered.) In the blocking case, events from one subscription +can block processing that event for other non-blocking subscriptions. The main +case, it seems for blocking subscriptions is ensuring that a transaction has +been committed to a block for ``BroadcastTxCommit``. Removing blocking +subscriptions entirely, and potentially finding another way to implement +``BroadcastTxCommit``, could lead to important simplifications and +improvements to throughput without requiring large changes. + +Subscription Identification +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before `#6386 `_, all +subscriptions were identified by the combination of a client ID and a query, +and with that change, it became possible to identify all subscription given +only an ID, but compatibility with the legacy identification means that there's a +good deal of legacy code as well as client side efficiency that could be +improved. + +Pubsub Changes +~~~~~~~~~~~~~~ + +The pubsub core should be implemented in a way that removes the possibility of +backpressure from the event system to impact the core system *or* for one +subscription to impact the behavior of another area of the +system. Additionally, because the current system is implemented entirely in +terms of a collection of buffered channels, the event system (and large +numbers of subscriptions) can be a source of memory pressure. + +These changes could include: + +- explicit cancellation and timeouts promulgated from callers (e.g. RPC end + points, etc,) this should be done using contexts. + +- subscription system should be able to spill to disk to avoid putting memory + pressure on the core behavior of the node (consensus, gossip). + +- subscriptions implemented as cursors rather than channels, with either + condition variables to simulate the existing "push" API or a client side + iterator API with some kind of long polling-type interface. diff --git a/docs/rfc/rfc-006-event-subscription.md b/docs/rfc/rfc-006-event-subscription.md new file mode 100644 index 000000000..4372f8d28 --- /dev/null +++ b/docs/rfc/rfc-006-event-subscription.md @@ -0,0 +1,204 @@ +# RFC 006: Event Subscription + +## Changelog + +- 30-Oct-2021: Initial draft (@creachadair) + +## Abstract + +The Tendermint consensus node allows clients to subscribe to its event stream +via methods on its RPC service. The ability to view the event stream is +valuable for clients, but the current implementation has some deficiencies that +make it difficult for some clients to use effectively. This RFC documents these +issues and discusses possible approaches to solving them. + + +## Background + +A running Tendermint consensus node exports a [JSON-RPC service][rpc-service] +that provides a [large set of methods][rpc-methods] for inspecting and +interacting with the node. One important cluster of these methods are the +`subscribe`, `unsubscribe`, and `unsubscribe_all` methods, which permit clients +to subscribe to a filtered stream of the [events generated by the node][events] +as it runs. + +Unlike the other methods of the service, the methods in the "event +subscription" cluster are not accessible via [ordinary HTTP GET or POST +requests][rpc-transport], but require upgrading the HTTP connection to a +[websocket][ws]. This is necessary because the `subscribe` request needs a +persistent channel to deliver results back to the client, and an ordinary HTTP +connection does not reliably persist across multiple requests. Since these +methods do not work properly without a persistent channel, they are _only_ +exported via a websocket connection, and are not routed for plain HTTP. + + +## Discussion + +There are some operational problems with the current implementation of event +subscription in the RPC service: + +- **Event delivery is not valid JSON-RPC.** When a client issues a `subscribe` + request, the server replies (correctly) with an initial empty acknowledgement + (`{}`). After that, each matching event is delivered "unsolicited" (without + another request from the client), as a separate [response object][json-response] + with the same ID as the initial request. + + This matters because it means a standard JSON-RPC client library can't + interact correctly with the event subscription mechanism. + + Even for clients that can handle unsolicited values pushed by the server, + these responses are invalid: They have an ID, so they cannot be treated as + [notifications][json-notify]; but the ID corresponds to a request that was + already completed. In practice, this means that general-purpose JSON-RPC + libraries cannot use this method correctly -- it requires a custom client. + + The Go RPC client from the Tendermint core can support this case, but clients + in other languages have no easy solution. + + This is the cause of issue [#2949][issue2949]. + +- **Subscriptions are terminated by disconnection.** When the connection to the + client is interrupted, the subscription is silently dropped. + + This is a reasonable behavior, but it matters because a client whose + subscription is dropped gets no useful error feedback, just a closed + connection. Should they try again? Is the node overloaded? Was the client + too slow? Did the caller forget to respond to pings? Debugging these kinds + of failures is unnecessarily painful. + + Websockets compound this, because websocket connections time out if no + traffic is seen for a while, and keeping them alive requires active + cooperation between the client and server. With a plain TCP socket, liveness + is handled transparently by the keepalive mechanism. On a websocket, + however, one side has to occasionally send a PING (if the connection is + otherwise idle). The other side must return a matching PONG in time, or the + connection is dropped. Apart from being tedious, this is highly susceptible + to CPU load. + + The Tendermint Go implementation automatically sends and responds to pings. + Clients in other languages (or not wanting to use the Tendermint libraries) + need to handle it explicitly. This burdens the client for no practical + benefit: A subscriber has no information about when matching events may be + available, so it shouldn't have to participate in keeping the connection + alive. + +- **Mismatched load profiles.** Most of the RPC service is mainly important for + low-volume local use, either by the application the node serves (e.g., the + ABCI methods) or by the node operator (e.g., the info methods). Event + subscription is important for remote clients, and may represent a much higher + volume of traffic. + + This matters because both are using the same JSON-RPC mechanism. For + low-volume local use, the ergonomics of JSON-RPC are a good fit: It's easy to + issue queries from the command line (e.g., using `curl`) or to write scripts + that call the RPC methods to monitor the running node. + + For high-volume remote use, JSON-RPC is not such a good fit: Even leaving + aside the non-standard delivery protocol mentioned above, the time and memory + cost of encoding event data matters for the stability of the node when there + can be potentially hundreds of subscribers. Moreover, a subscription is + long-lived compared to most RPC methods, in that it may persist as long the + node is active. + +- **Mismatched security profiles.** The RPC service exports several methods + that should not be open to arbitrary remote callers, both for correctness + reasons (e.g., `remove_tx` and `broadcast_tx_*`) and for operational + stability reasons (e.g., `tx_search`). A node may still need to expose + events, however, to support UI tools. + + This matters, because all the methods share the same network endpoint. While + it is possible to block the top-level GET and POST handlers with a proxy, + exposing the `/websocket` handler exposes not _only_ the event subscription + methods, but the rest of the service as well. + +### Possible Improvements + +There are several things we could do to improve the experience of developers +who need to subscribe to events from the consensus node. These are not all +mutually exclusive. + +1. **Split event subscription into a separate service**. Instead of exposing + event subscription on the same endpoint as the rest of the RPC service, + dedicate a separate endpoint on the node for _only_ event subscription. The + rest of the RPC services (_sans_ events) would remain as-is. + + This would make it easy to disable or firewall outside access to sensitive + RPC methods, without blocking access to event subscription (and vice versa). + This is probably worth doing, even if we don't take any of the other steps + described here. + +2. **Use a different protocol for event subscription.** There are various ways + we could approach this, depending how much we're willing to shake up the + current API. Here are sketches of a few options: + + - Keep the websocket, but rework the API to be more JSON-RPC compliant, + perhaps by converting event delivery into notifications. This is less + up-front change for existing clients, but retains all of the existing + implementation complexity, and doesn't contribute much toward more serious + performance and UX improvements later. + + - Switch from websocket to plain HTTP, and rework the subscription API to + use a more conventional request/response pattern instead of streaming. + This is a little more up-front work for existing clients, but leverages + better library support for clients not written in Go. + + The protocol would become more chatty, but we could mitigate that with + batching, and in return we would get more control over what to do about + slow clients: Instead of simply silently dropping them, as we do now, we + could drop messages and signal the client that they missed some data ("M + dropped messages since your last poll"). + + This option is probably the best balance between work, API change, and + benefit, and has a nice incidental effect that it would be easier to debug + subscriptions from the command-line, like the other RPC methods. + + - Switch to gRPC: Preserves a persistent connection and gives us a more + efficient binary wire format (protobuf), at the cost of much more work for + clients and harder debugging. This may be the best option if performance + and server load are our top concerns. + + Given that we are currently using JSON-RPC, however, I'm not convinced the + costs of encoding and sending messages on the event subscription channel + are the limiting factor on subscription efficiency, however. + +3. **Delegate event subscriptions to a proxy.** Give responsibility for + managing event subscription to a proxy that runs separately from the node, + and switch the node to push events to the proxy (like a webhook) instead of + serving subscribers directly. This is more work for the operator (another + process to configure and run) but may scale better for big networks. + + I mention this option for completeness, but making this change would be a + fairly substantial project. If we want to consider shifting responsibility + for event subscription outside the node anyway, we should probably be more + systematic about it. For a more principled approach, see point (4) below. + +4. **Move event subscription downstream of indexing.** We are already planning + to give applications more control over event indexing. By extension, we + might allow the application to also control how events are filtered, + queried, and subscribed. Having the application control these concerns, + rather than the node, might make life easier for developers building UI and + tools for that application. + + This is a much larger change, so I don't think it is likely to be practical + in the near-term, but it's worth considering as a broader option. Some of + the existing code for filtering and selection could be made more reusable, + so applications would not need to reinvent everything. + + +## References + +- [Tendermint RPC service][rpc-service] +- [Tendermint RPC routes][rpc-methods] +- [Discussion of the event system][events] +- [Discussion about RPC transport options][rpc-transport] (from RFC 002) +- [RFC 6455: The websocket protocol][ws] +- [JSON-RPC 2.0 Specification](https://www.jsonrpc.org/specification) + +[rpc-service]: https://docs.tendermint.com/master/rpc/ +[rpc-methods]: https://github.com/tendermint/tendermint/blob/master/internal/rpc/core/routes.go#L12 +[events]: ./rfc-005-event-system.rst +[rpc-transport]: ./rfc-002-ipc-ecosystem.md#rpc-transport +[ws]: https://datatracker.ietf.org/doc/html/rfc6455 +[json-response]: https://www.jsonrpc.org/specification#response_object +[json-notify]: https://www.jsonrpc.org/specification#notification +[issue2949]: https://github.com/tendermint/tendermint/issues/2949 diff --git a/docs/rfc/rfc-007-deterministic-proto-bytes.md b/docs/rfc/rfc-007-deterministic-proto-bytes.md new file mode 100644 index 000000000..0b55c2228 --- /dev/null +++ b/docs/rfc/rfc-007-deterministic-proto-bytes.md @@ -0,0 +1,140 @@ +# RFC 007 : Deterministic Proto Byte Serialization + +## Changelog + +- 09-Dec-2021: Initial draft (@williambanfield). + +## Abstract + +This document discusses the issue of stable byte-representation of serialized messages +within Tendermint and describes a few possible routes that could be taken to address it. + +## Background + +We use the byte representations of wire-format proto messages to produce +and verify hashes of data within the Tendermint codebase as well as for +producing and verifying cryptographic signatures over these signed bytes. + +The protocol buffer [encoding spec][proto-spec-encoding] does not guarantee that the byte representation +of a protocol buffer message will be the same between two calls to an encoder. +While there is a mode to force the encoder to produce the same byte representation +of messages within a single binary, these guarantees are not good enough for our +use case in Tendermint. We require multiple different versions of a binary running +Tendermint to be able to inter-operate. Additionally, we require that multiple different +systems written in _different languages_ be able to participate in different aspects +of the protocols of Tendermint and be able to verify the integrity of the messages +they each produce. + +While this has not yet created a problem that we know of in a running network, we should +make sure to provide stronger guarantees around the serialized representation of the messages +used within the Tendermint consensus algorithm to prevent any issue from occurring. + + +## Discussion + +Proto has the following points of variability that can produce non-deterministic byte representation: + +1. Encoding order of fields within a message. + +Proto allows fields to be encoded in any order and even be repeated. + +2. Encoding order of elements of a repeated field. + +`repeated` fields in a proto message can be serialized in any order. + +3. Presence or absence of default values. + +Types in proto have defined default values similar to Go's zero values. +Writing or omitting a default value are both legal ways of encoding a wire message. + +4. Serialization of 'unknown' fields. + +Unknown fields can be present when a message is created by a binary with a newer +version of the proto that contains fields that the deserializer in a different +binary does not yet know about. Deserializers in binaries that do not know about the field +will maintain the bytes of the unknown field but not place them into the deserialized structure. + +We have a few options to consider when producing this stable representation. + +### Options for deterministic byte representation + +#### Use only compliant serializers and constrain field usage + +According to [Cosmos-SDK ADR-27][cosmos-sdk-adr-27], when message types obey a simple +set of rules, gogoproto produces a consistent byte representation of serialized messages. +This seems promising, although more research is needed to guarantee gogoproto always +produces a consistent set of bytes on serialized messages. This would solve the problem +within Tendermint as written in Go, but would require ensuring that there are similar +serializers written in other languages that produce the same output as gogoproto. + +#### Reorder serialized bytes to ensure determinism. + +The serialized form of a proto message can be transformed into a canonical representation +by applying simple rules to the serialized bytes. Re-ordering the serialized bytes +would allow Tendermint to produce a canonical byte representation without having to +simultaneously maintain a custom proto marshaller. + +This could be implemented as a function in many languages that performed the following +producing bytes to sign or hashing: + +1. Does not add any of the data from unknown fields into the type to hash. + +Tendermint should not run into a case where it needs to verify the integrity of +data with unknown fields for the following reasons: + +The purpose of checking hash equality within Tendermint is to ensure that +its local copy of data matches the data that the network agreed on. There should +therefore not be a case where a process is checking hash equality using data that it did not expect +to receive. What the data represent may be opaque to the process, such as when checking the +transactions in a block, _but the process will still have expected to receive this data_, +despite not understanding what their internal structure is. It's not clear what it would +mean to verify that a block contains data that a process does not know about. + +The same reasoning applies for signature verification within Tendermint. Processes +verify that a digital signature signed over a set of bytes by locally reconstructing the +data structure that the digital signature signed using the process's local data. + +2. Reordered all message fields to be in tag-sorted order. + +Tag-sorting top-level fields will place all fields of the same tag in a adjacent +to eachother within the serialized representation. + +3. Reordered the contents of all `repeated` fields to be in lexicographically sorted order. + +`repeated` fields will appear in a message as having the same tag but will contain different +contents. Therefore, lexicographical sorting will produce a stable ordering of +fields with the same tag. + +4. Deleted all default values from the byte representation. + +Encoders can include default values or omit them. Most encoders appear to omit them +but we may wish to delete them just to be safe. + +5. Recursively performed these operations on any length-delimited subfields. + +Length delimited fields may contain messages, strings, or just bytes. However, +it's not possible to know what data is being represented by such a field. +A 'string' may happen to have the same structure as an embedded message and we cannot +disambiguate. For this reason, we must apply these same rules to all subfields that +may contain messages. Because we cannot know if we have totally mangled the interior 'string' +or not, this data should never be deserialized or used for anything beyond hashing. + +A **prototype** implementation by @creachadair of this can be found in [the wirepb repo][wire-pb]. +This could be implemented in multiple languages more simply than ensuring that there are +canonical proto serializers that match in each language. + +### Future work + +We should add clear documentation to the Tendermint codebase every time we +compare hashes of proto messages or use proto serialized bytes to produces a +digital signatures that we have been careful to ensure that the hashes are performed +properly. + +### References + +[proto-spec-encoding]: https://developers.google.com/protocol-buffers/docs/encoding +[spec-issue]: https://github.com/tendermint/tendermint/issues/5005 +[cosmos-sdk-adr-27]: https://github.com/cosmos/cosmos-sdk/blob/master/docs/architecture/adr-027-deterministic-protobuf-serialization.md +[cer-proto-3]: https://github.com/regen-network/canonical-proto3 +[wire-pb]: https://github.com/creachadair/wirepb + diff --git a/docs/rfc/rfc-008-do-not-panic.md b/docs/rfc/rfc-008-do-not-panic.md new file mode 100644 index 000000000..ec8c08f5e --- /dev/null +++ b/docs/rfc/rfc-008-do-not-panic.md @@ -0,0 +1,139 @@ +# RFC 008: Don't Panic + +## Changelog + +- 2021-12-17: initial draft (@tychoish) + +## Abstract + +Today, the Tendermint core codebase has panics in a number of cases as +a response to exceptional situations. These panics complicate testing, +and might make tendermint components difficult to use as a library in +some circumstances. This document outlines a project of converting +panics to errors and describes the situations where its safe to +panic. + +## Background + +Panics in Go are a great mechanism for aborting the current execution +for truly exceptional situations (e.g. memory errors, data corruption, +processes initialization); however, because they resemble exceptions +in other languages, it can be easy to over use them in the +implementation of software architectures. This certainly happened in +the history of Tendermint, and as we embark on the project of +stabilizing the package, we find ourselves in the right moment to +reexamine our use of panics, and largely where panics happen in the +code base. + +There are still some situations where panics are acceptable and +desireable, but it's important that Tendermint, as a project, comes to +consensus--perhaps in the text of this document--on the situations +where it is acceptable to panic. + +### References + +- [Defer Panic and Recover](https://go.dev/blog/defer-panic-and-recover) +- [Why Go gets exceptions right](https://dave.cheney.net/tag/panic) +- [Don't panic](https://dave.cheney.net/practical-go/presentations/gophercon-singapore-2019.html#_dont_panic) + +## Discussion + +### Acceptable Panics + +#### Initialization + +It is unambiguously safe (and desireable) to panic in `init()` +functions in response to any kind of error. These errors are caught by +tests, and occur early enough in process initialization that they +won't cause unexpected runtime crashes. + +Other code that is called early in process initialization MAY panic, +in some situations if it's not possible to return an error or cause +the process to abort early, although these situations should be +vanishingly slim. + +#### Data Corruption + +If Tendermint code encounters an inconsistency that could be +attributed to data corruption or a logical impossibility it is safer +to panic and crash the process than continue to attempt to make +progress in these situations. + +Examples including reading data out of the storage engine that +is invalid or corrupt, or encountering an ambiguous situation where +the process should halt. Generally these forms of corruption are +detected after interacting with a trusted but external data source, +and reflect situations where the author thinks its safer to terminate +the process immediately rather than allow execution to continue. + +#### Unrecoverable Consensus Failure + +In general, a panic should be used in the case of unrecoverable +consensus failures. If a process detects that the network is +behaving in an incoherent way and it does not have a clearly defined +and mechanism for recovering, the process should panic. + +#### Static Validity + +It is acceptable to panic for invariant violations, within a library +or package, in situations that should be statically impossible, +because there is no way to make these kinds of assertions at compile +time. + +For example, type-asserting `interface{}` values returned by +`container/list` and `container/heap` (and similar), is acceptable, +because package authors should have exclusive control of the inputs to +these containers. Packages should not expose the ability to add +arbitrary values to these data structures. + +#### Controlled Panics Within Libraries + +In some algorithms with highly recursive structures or very nested +call patterns, using a panic, in combination with conditional recovery +handlers results in more manageable code. Ultimately this is a limited +application, and implementations that use panics internally should +only recover conditionally, filtering out panics rather than ignoring +or handling all panics. + +#### Request Handling + +Code that handles responses to incoming/external requests +(e.g. `http.Handler`) should avoid panics, but practice this isn't +totally possible, and it makes sense that request handlers have some +kind of default recovery mechanism that will prevent one request from +terminating a service. + +### Unacceptable Panics + +In **no** other situation is it acceptable for the code to panic: + +- there should be **no** controlled panics that callers are required + to handle across library/package boundaries. +- callers of library functions should not expect panics. +- ensuring that arbitrary go routines can't panic. +- ensuring that there are no arbitrary panics in core production code, + espically code that can run at any time during the lifetime of a + process. +- all test code and fixture should report normal test assertions with + a mechanism like testify's `require` assertion rather than calling + panic directly. + +The goal of this increased "panic rigor" is to ensure that any escaped +panic is reflects a fixable bug in Tendermint. + +### Removing Panics + +The process for removing panics involve a few steps, and will be part +of an ongoing process of code modernization: + +- converting existing explicit panics to errors in cases where it's + possible to return an error, the errors can and should be handled, and returning + an error would not lead to data corruption or cover up data + corruption. + +- increase rigor around operations that can cause runtime errors, like + type assertions, nil pointer errors, array bounds access issues, and + either avoid these situations or return errors where possible. + +- remove generic panic handlers which could cover and hide known + panics. diff --git a/docs/rfc/rfc-009-consensus-parameter-upgrades.md b/docs/rfc/rfc-009-consensus-parameter-upgrades.md new file mode 100644 index 000000000..d5077840d --- /dev/null +++ b/docs/rfc/rfc-009-consensus-parameter-upgrades.md @@ -0,0 +1,128 @@ +# RFC 009 : Consensus Parameter Upgrade Considerations + +## Changelog + +- 06-Jan-2011: Initial draft (@williambanfield). + +## Abstract + +This document discusses the challenges of adding additional consensus parameters +to Tendermint and proposes a few solutions that can enable addition of consensus +parameters in a backwards-compatible way. + +## Background + +This section provides an overview of the issues of adding consensus parameters +to Tendermint. + +### Hash Compatibility + +Tendermint produces a hash of a subset of the consensus parameters. The values +that are hashed currently are the `BlockMaxGas` and the `BlockMaxSize`. These +are currently in the [HashedParams struct][hashed-params]. This hash is included +in the block and validators use it to validate that their local view of the consensus +parameters matches what the rest of the network is configured with. + +Any new consensus parameters added to Tendermint should be included in this +hash. This presents a challenge for verification of historical blocks when consensus +parameters are added. If a network produced blocks with a version of Tendermint that +did not yet have the new consensus parameters, the parameter hash it produced will +not reference the new parameters. Any nodes joining the network with the newer +version of Tendermint will have the new consensus parameters. Tendermint will need +to handle this case so that new versions of Tendermint with new consensus parameters +can still validate old blocks correctly without having to do anything overly complex +or hacky. + +### Allowing Developer-Defined Values and the `EndBlock` Problem + +When new consensus parameters are added, application developers may wish to set +values for them so that the developer-defined values may be used as soon as the +software upgrades. We do not currently have a clean mechanism for handling this. + +Consensus parameter updates are communicated from the application to Tendermint +within `EndBlock` of some height `H` and take effect at the next height, `H+1`. +This means that for updates that add a consensus parameter, there is a single +height where the new parameters cannot take effect. The parameters did not exist +in the version of the software that emitted the `EndBlock` response for height `H-1`, +so they cannot take effect at height `H`. The first height that the updated params +can take effect is height `H+1`. As of now, height `H` must run with the defaults. + +## Discussion + +### Hash Compatibility + +This section discusses possible solutions to the problem of maintaining backwards-compatibility +of hashed parameters while adding new parameters. + +#### Never Hash Defaults + +One solution to the problem of backwards-compatibility is to never include parameters +in the hash if the are using the default value. This means that blocks produced +before the parameters existed will have implicitly been created with the defaults. +This works because any software with newer versions of Tendermint must be using the +defaults for new parameters when validating old blocks since the defaults can not +have been updated until a height at which the parameters existed. + +#### Only Update HashedParams on Hash-Breaking Releases + +An alternate solution to never hashing defaults is to not update the hashed +parameters on non-hash-breaking releases. This means that when new consensus +parameters are added to Tendermint, there may be a release that makes use of the +parameters but does not verify that they are the same across all validators by +referencing them in the hash. This seems reasonably safe given the fact that +only a very far subset of the consensus parameters are currently verified at all. + +#### Version The Consensus Parameter Hash Scheme + +The upcoming work on [soft upgrades](https://github.com/tendermint/spec/pull/222) +proposes applying different hashing rules depending on the active block version. +The consensus parameter hash could be versioned in the same way. When different +block versions are used, a different set of consensus parameters will be included +in the hash. + +### Developer Defined Values + +This section discusses possible solutions to the problem of allowing application +developers to define values for the new parameters during the upgrade that adds +the parameters. + +#### Using `InitChain` for New Values + +One solution to the problem of allowing application developers to define values +for new consensus parameters is to call the `InitChain` ABCI method on application +startup and fetch the value for any new consensus parameters. The [response object][init-chain-response] +contains a field for `ConsensusParameter` updates so this may serve as a natural place +to put this logic. + +This poses a few difficulties. Nodes replaying old blocks while running new +software do not ever call `InitChain` after the initial time. They will therefore +not have a way to determine that the parameters changed at some height by using a +call to `InitChain`. The `EndBlock` response is how parameter changes at a height +are currently communicated to Tendermint and conflating these cases seems risky. + +#### Force Defaults For Single Height + +An alternate option is to not use `InitChain` and instead require chains to use the +default values of the new parameters for a single height. + +As documented in the upcoming [ADR-74][adr-74], popular chains often simply use the default +values. Additionally, great care is being taken to ensure that logic governed by upcoming +consensus parameters is not liveness-breaking. This means that, at worst-case, +chains will experience a single slow height while waiting for the new values to +by applied. + +#### Add a new `UpgradeChain` method + +An additional method for allowing chains to update the consensus parameters that +do not yet exist is to add a new `UpgradeChain` method to `ABCI`. The upgrade chain +method would be called when the chain detects that the version of block that it +is about to produce does not match the previous block. This method would be called +after `EndBlock` and would return the set of consensus parameters to use at the +next height. It would therefore give an application the chance to set the new +consensus parameters before running a height with these new parameter. + +### References + +[hashed-params]: https://github.com/tendermint/tendermint/blob/0ae974e63911804d4a2007bd8a9b3ad81d6d2a90/types/params.go#L49 +[init-chain-response]: https://github.com/tendermint/tendermint/blob/0ae974e63911804d4a2007bd8a9b3ad81d6d2a90/abci/types/types.pb.go#L1616 +[adr-74]: https://github.com/tendermint/tendermint/pull/7503 diff --git a/docs/rfc/rfc-010-p2p-light-client.rst b/docs/rfc/rfc-010-p2p-light-client.rst new file mode 100644 index 000000000..b5f465589 --- /dev/null +++ b/docs/rfc/rfc-010-p2p-light-client.rst @@ -0,0 +1,145 @@ +================================== +RFC 010: Peer to Peer Light Client +================================== + +Changelog +--------- + +- 2022-01-21: Initial draft (@tychoish) + +Abstract +-------- + +The dependency on access to the RPC system makes running or using the light +client more complicated than it should be, because in practice node operators +choose to restrict access to these end points (often correctly.) There is no +deep dependency for the light client on the RPC system, and there is a +persistent notion that "make a p2p light client" is a solution to this +operational limitation. This document explores the implications and +requirements of implementing a p2p-based light client, as well as the +possibilities afforded by this implementation. + +Background +---------- + +High Level Design +~~~~~~~~~~~~~~~~~ + +From a high level, the light client P2P implementation, is relatively straight +forward, but is orthogonal to the P2P-backed statesync implementation that +took place during the 0.35 cycle. The light client only really needs to be +able to request (and receive) a `LightBlock` at a given height. To support +this, a new Reactor would run on every full node and validator which would be +able to service these requests. The workload would be entirely +request-response, and the implementation of the reactor would likely be very +straight forward, and the implementation of the provider is similarly +relatively simple. + +The complexity of the project focuses around peer discovery, handling when +peers disconnect from the light clients, and how to change the current P2P +code to appropriately handle specialized nodes. + +I believe it's safe to assume that much of the current functionality of the +current ``light`` mode would *not* need to be maintained: there is no need to +proxy the RPC endpoints over the P2P layer and there may be no need to run a +node/process for the p2p light client (e.g. all use of this will be as a +client.) + +The ability to run light clients using the RPC system will continue to be +maintained. + +LibP2P +~~~~~~ + +While some aspects of the P2P light client implementation are orthogonal to +LibP2P project, it's useful to think about the ways that these efforts may +combine or interact. + +We expect to be able to leverage libp2p tools to provide some kind of service +discovery for tendermint-based networks. This means that it will be possible +for the p2p stack to easily identify specialized nodes, (e.g. light clients) +thus obviating many of the design challenges with providing this feature in +the context of the current stack. + +Similarly, libp2p makes it possible for a project to be able back their non-Go +light clients, without the major task of first implementing Tendermint's p2p +connection handling. We should identify if there exist users (e.g. the go IBC +relayer, it's maintainers, and operators) who would be able to take advantage +of p2p light client, before switching to libp2p. To our knowledge there are +limited implementations of this p2p protocol (a simple implementation without +secret connection support exists in rust but it has not been used in +production), and it seems unlikely that a team would implement this directly +ahead of its impending removal. + +User Cases +~~~~~~~~~~ + +This RFC makes a few assumptions about the use cases and users of light +clients in tendermint. + +The most active and delicate use cases for light clients is in the +implementation of the IBC relayer. Thus, we expect that providing P2P light +clients might increase the reliability of relayers and reduce the cost of +running a relayer, because relayer operators won't have to decide between rely +on public RPC endpoints (unreliable) or running their own full nodes +(expensive.) This also assumes that there are *no* other uses of the RPC in +the relayer, and unless the relayers have the option of dropping all RPC use, +it's unclear if a P2P light client will actually be able to successfully +remove the dependency on the RPC system. + +Given that the primary relayer implementation is Hermes (rust,) it might be +safe to deliver a version of Tendermint that adds a light client rector in +the full nodes, but that does not provide an implementation of a Go light +client. This either means that the rust implementation would need support for +the legacy P2P connection protocol or wait for the libp2p implementation. + +Client side light client (e.g. wallets, etc.) users may always want to use (a +subset) of the RPC rather than connect to the P2P network for an ephemeral +use. + +Discussion +---------- + +Implementation Questions +~~~~~~~~~~~~~~~~~~~~~~~~ + +Most of the complication in the is how to have a long lived light client node +that *only* runs the light client reactor, as this raises a few questions: + +- would users specify a single P2P node to connect to when creating a light + client or would they also need/want to discover peers? + + - **answer**: most light client use cases won't care much about selecting + peers (and those that do can either disable PEX and specify persistent + peers, *or* use the RPC light client.) + +- how do we prevent full nodes and validators from allowing their peer slots, + which are typically limited, from filling with light clients? If + light-clients aren't limited, how do we prevent light clients from consuming + resources on consensus nodes? + + - **answer**: I think we can institute an internal cap on number of light + client connections to accept and also elide light client nodes from PEX + (pre-libp2p, if we implement this.) I believe that libp2p should provide + us with the kind of service discovery semantics for network connectivity + that would obviate this issue. + +- when a light client disconnects from its peers will it need to reset its + internal state (cache)? does this change if it connects to the same peers? + + - **answer**: no, the internal state only needs to be reset if the light + client detects an invalid block or other divergence, and changing + witnesses--which will be more common with a p2p light client--need not + invalidate the cache. + +These issues are primarily present given that the current peer management later +does not have a particularly good service discovery mechanism nor does it have +a very sophisticated way of identifying nodes of different types or modes. + +Report Evidence +~~~~~~~~~~~~~~~ + +The current light client implementation currently has the ability to report +observed evidence. Either the notional light client reactor needs to be able +to handle these kinds of requests *or* all light client nodes need to also run +the evidence reactor. This could be configured at runtime. diff --git a/docs/rfc/rfc-011-delete-gas.md b/docs/rfc/rfc-011-delete-gas.md new file mode 100644 index 000000000..a4e643ef2 --- /dev/null +++ b/docs/rfc/rfc-011-delete-gas.md @@ -0,0 +1,162 @@ +# RFC 011: Remove Gas From Tendermint + +## Changelog + +- 03-Feb-2022: Initial draft (@williambanfield). +- 10-Feb-2022: Update in response to feedback (@williambanfield). +- 11-Feb-2022: Add reflection on MaxGas during consensus (@williambanfield). + +## Abstract + +In the v0.25.0 release, Tendermint added a mechanism for tracking 'Gas' in the mempool. +At a high level, Gas allows applications to specify how much it will cost the network, +often in compute resources, to execute a given transaction. While such a mechanism is common +in blockchain applications, it is not generalizable enough to be a maintained as a part +of Tendermint. This RFC explores the possibility of removing the concept of Gas from +Tendermint while still allowing applications the power to control the contents of +blocks to achieve similar goals. + +## Background + +The notion of Gas was included in the original Ethereum whitepaper and exists as +an important feature of the Ethereum blockchain. + +The [whitepaper describes Gas][eth-whitepaper-messages] as an Anti-DoS mechanism. The Ethereum Virtual Machine +provides a Turing complete execution platform. Without any limitations, malicious +actors could waste computation resources by directing the EVM to perform large +or even infinite computations. Gas serves as a metering mechanism to prevent this. + +Gas appears to have been added to Tendermint multiple times, initially as part of +a now defunct `/vm` package, and in its most recent iteration [as part of v0.25.0][gas-add-pr] +as a mechanism to limit the transactions that will be included in the block by an additional +parameter. + +Gas has gained adoption within the Cosmos ecosystem [as part of the Cosmos SDK][cosmos-sdk-gas]. +The SDK provides facilities for tracking how much 'Gas' a transaction is expected to take +and a mechanism for tracking how much gas a transaction has already taken. + +Non-SDK applications also make use of the concept of Gas. Anoma appears to implement +[a gas system][anoma-gas] to meter the transactions it executes. + +While the notion of gas is present in projects that make use of Tendermint, it is +not a concern of Tendermint's. Tendermint's value and goal is producing blocks +via a distributed consensus algorithm. Tendermint relies on the application specific +code to decide how to handle the transactions Tendermint has produced (or if the +application wants to consider them at all). Gas is an application concern. + +Our implementation of Gas is not currently enforced by consensus. Our current validation check that +occurs during block propagation does not verify that the block is under the configured `MaxGas`. +Ensuring that the transactions in a proposed block do not exceed `MaxGas` would require +input from the application during propagation. The `ProcessProposal` method introduced +as part of ABCI++ would enable such input but would further entwine Tendermint and +the application. The issue of checking `MaxGas` during block propagation is important +because it demonstrates that the feature as it currently exists is not implemented +as fully as it perhaps should be. + +Our implementation of Gas is causing issues for node operators and relayers. At +the moment, transactions that overflow the configured 'MaxGas' can be silently rejected +from the mempool. Overflowing MaxGas is the _only_ way that a transaction can be considered +invalid that is not directly a result of failing the `CheckTx`. Operators, and the application, +do not know that a transaction was removed from the mempool for this reason. A stateless check +of this nature is exactly what `CheckTx` exists for and there is no reason for the mempool +to keep track of this data separately. A special [MempoolError][add-mempool-error] field +was added in v0.35 to communicate to clients that a transaction failed after `CheckTx`. +While this should alleviate the pain for operators wishing to understand if their +transaction was included in the mempool, it highlights that the abstraction of +what is included in the mempool is not currently well defined. + +Removing Gas from Tendermint and the mempool would allow for the mempool to be a better +abstraction: any transaction that arrived at `CheckTx` and passed the check will either be +a candidate for a later block or evicted after a TTL is reached or to make room for +other, higher priority transactions. All other transactions are completely invalid and can be discarded forever. + +Removing gas will not be completely straightforward. It will mean ensuring that +equivalent functionality can be implemented outside of the mempool using the mempool's API. + +## Discussion + +This section catalogs the functionality that will need to exist within the Tendermint +mempool to allow Gas to be removed and replaced by application-side bookkeeping. + +### Requirement: Provide Mempool Tx Sorting Mechanism + +Gas produces a market for inclusion in a block. On many networks, a [gas fee][cosmos-sdk-fees] is +included in pending transactions. This fee indicates how much a user is willing to +pay per unit of execution and the fees are distributed to validators. + +Validators wishing to extract higher gas fees are incentivized to include transactions +with the highest listed gas fees into each block. This produces a natural ordering +of the pending transactions. Applications wishing to implement a gas mechanism need +to be able to order the transactions in the mempool. This can trivially be accomplished +by sorting transactions using the `priority` field available to applications as part of +v0.35's `ResponseCheckTx` message. + +### Requirement: Allow Application-Defined Block Resizing + +When creating a block proposal, Tendermint pulls a set of possible transactions out of +the mempool to include in the next block. Tendermint uses MaxGas to limit the set of transactions +it pulls out of the mempool fetching a set of transactions whose sum is less than MaxGas. + +By removing gas tracking from Tendermint's mempool, Tendermint will need to provide a way for +applications to determine an acceptable set of transactions to include in the block. + +This is what the new ABCI++ `PrepareProposal` method is useful for. Applications +that wish to limit the contents of a block by an application-defined limit may +do so by removing transactions from the proposal it is passed during `PrepareProposal`. +Applications wishing to reach parity with the current Gas implementation may do +so by creating an application-side limit: filtering out transactions from +`PrepareProposal` the cause the proposal the exceed the maximum gas. Additionally, +applications can currently opt to have all transactions in the mempool delivered +during `PrepareProposal` by passing `-1` for `MaxGas` and `MaxBytes` into +[ReapMaxBytesMaxGas][reap-max-bytes-max-gas]. + +### Requirement: Handle Transaction Metadata + +Moving the gas mechanism into applications adds an additional piece of complexity +to applications. The application must now track how much gas it expects a transaction +to consume. The mempool currently handles this bookkeeping responsibility and uses the estimated +gas to determine the set of transactions to include in the block. In order to task +the application with keeping track of this metadata, we should make it easier for the +application to do so. In general, we'll want to keep only one copy of this type +of metadata in the program at a time, either in the application or in Tendermint. + +The following sections are possible solutions to the problem of storing transaction +metadata without duplication. + +#### Metadata Handling: EvictTx Callback + +A possible approach to handling transaction metadata is by adding a new `EvictTx` +ABCI method. Whenever the mempool is removing a transaction, either because it has +reached its TTL or because it failed `RecheckTx`, `EvictTx` would be called with +the transaction hash. This would indicate to the application that it could free any +metadata it was storing about the transaction such as the computed gas fee. + +Eviction callbacks are pretty common in caching systems, so this would be very +well-worn territory. + +#### Metadata Handling: Application-Specific Metadata Field(s) + +An alternative approach to handling transaction metadata would be would be the +addition of a new application-metadata field in the `ResponseCheckTx`. This field +would be a protocol buffer message whose contents were entirely opaque to Tendermint. +The application would be responsible for marshalling and unmarshalling whatever data +it stored in this field. During `PrepareProposal`, the application would be passed +this metadata along with the transaction, allowing the application to use it to perform +any necessary filtering. + +If either of these proposed metadata handling techniques are selected, it's likely +useful to enable applications to gossip metadata along with the transaction it is +gossiping. This could easily take the form of an opaque proto message that is +gossiped along with the transaction. + +## References + +[eth-whitepaper-messages]: https://ethereum.org/en/whitepaper/#messages-and-transactions +[gas-add-pr]: https://github.com/tendermint/tendermint/pull/2360 +[cosmos-sdk-gas]: https://github.com/cosmos/cosmos-sdk/blob/c00cedb1427240a730d6eb2be6f7cb01f43869d3/docs/basics/gas-fees.md +[cosmos-sdk-fees]: https://github.com/cosmos/cosmos-sdk/blob/c00cedb1427240a730d6eb2be6f7cb01f43869d3/docs/basics/tx-lifecycle.md#gas-and-fees +[anoma-gas]: https://github.com/anoma/anoma/blob/6974fe1532a59db3574fc02e7f7e65d1216c1eb2/docs/src/specs/ledger.md#transaction-execution +[cosmos-sdk-fee]: https://github.com/cosmos/cosmos-sdk/blob/c00cedb1427240a730d6eb2be6f7cb01f43869d3/types/tx/tx.pb.go#L780-L794 +[issue-7750]: https://github.com/tendermint/tendermint/issues/7750 +[reap-max-bytes-max-gas]: https://github.com/tendermint/tendermint/blob/1ac58469f32a98f1c0e2905ca1773d9eac7b7103/internal/mempool/types.go#L45 +[add-mempool-error]: https://github.com/tendermint/tendermint/blob/205bfca66f6da1b2dded381efb9ad3792f9404cf/rpc/coretypes/responses.go#L239 diff --git a/docs/rfc/rfc-012-custom-indexing.md b/docs/rfc/rfc-012-custom-indexing.md new file mode 100644 index 000000000..9dc9bdbd6 --- /dev/null +++ b/docs/rfc/rfc-012-custom-indexing.md @@ -0,0 +1,352 @@ +# RFC 012: Event Indexing Revisited + +## Changelog + +- 11-Feb-2022: Add terminological notes. +- 10-Feb-2022: Updated from review feedback. +- 07-Feb-2022: Initial draft (@creachadair) + +## Abstract + +A Tendermint node allows ABCI events associated with block and transaction +processing to be "indexed" into persistent storage. The original Tendermint +implementation provided a fixed, built-in [proprietary indexer][kv-index] for +such events. + +In response to user requests to customize indexing, [ADR 065][adr065] +introduced an "event sink" interface that allows developers (at least in +theory) to plug in alternative index storage. + +Although ADR-065 was a good first step toward customization, its implementation +model does not satisfy all the user requirements. Moreover, this approach +leaves some existing technical issues with indexing unsolved. + +This RFC documents these concerns, and discusses some potential approaches to +solving them. This RFC does _not_ propose a specific technical decision. It is +meant to unify and focus some of the disparate discussions of the topic. + + +## Background + +We begin with some important terminological context. The term "event" in +Tendermint can be confusing, as the same word is used for multiple related but +distinct concepts: + +1. **ABCI Events** refer to the key-value metadata attached to blocks and + transactions by the application. These values are represented by the ABCI + `Event` protobuf message type. + +2. **Consensus Events** refer to the data published by the Tendermint node to + its pubsub bus in response to various consensus state transitions and other + important activities, such as round updates, votes, transaction delivery, + and block completion. + +This confusion is compounded because some "consensus event" values also have +"ABCI event" metadata attached to them. Notably, block and transaction items +typically have ABCI metadata assigned by the application. + +Indexers and RPC clients subscribed to the pubsub bus receive **consensus +events**, but they identify which ones to care about using query expressions +that match against the **ABCI events** associated with them. + +In the discussion that follows, we will use the term **event item** to refer to +a datum published to or received from the pubsub bus, and **ABCI event** or +**event metadata** to refer to the key/value annotations. + +**Indexing** in this context means recording the association between certain +ABCI metadata and the blocks or transactions they're attached to. The ABCI +metadata typically carry application-specific details like sender and recipient +addresses, catgory tags, and so forth, that are not part of consensus but are +used by UI tools to find and display transactions of interest. + +The consensus node records the blocks and transactions as part of its block +store, but does not persist the application metadata. Metadata persistence is +the task of the indexer, which can be (optionally) enabled by the node +operator. + +### History + +The [original indexer][kv-index] built in to Tendermint stored index data in an +embedded [`tm-db` database][tmdb] with a proprietary key layout. +In [ADR 065][adr065], we noted that this implementation has both performance +and scaling problems under load. Moreover, the only practical way to query the +index data is via the [query filter language][query] used for event +subscription. [Issue #1161][i1161] appears to be a motivational context for that ADR. + +To mitigate both of these concerns, we introduced the [`EventSink`][esink] +interface, combining the original transaction and block indexer interfaces +along with some service plumbing. Using this interface, a developer can plug +in an indexer that uses a more efficient storage engine, and provides a more +expressive query language. As a proof-of-concept, we built a [PostgreSQL event +sink][psql] that exports data to a [PostgreSQL database][postgres]. + +Although this approach addressed some of the immediate concerns, there are +several issues for custom indexing that have not been fully addressed. Here we +will discuss them in more detail. + +For further context, including links to user reports and related work, see also +the [Pluggable custom event indexing tracking issue][i7135] issue. + +### Issue 1: Tight Coupling + +The `EventSink` interface supports multiple implementations, but plugging in +implementations still requires tight integration with the node. In particular: + +- Any custom indexer must either be written in Go and compiled in to the + Tendermint binary, or the developer must write a Go shim to communicate with + the implementation and build that into the Tendermint binary. + +- This means to support a custom indexer, it either has to be integrated into + the Tendermint core repository, or every installation that uses that indexer + must fetch or build a patched version of Tendermint. + +The problem with integrating indexers into Tendermint Core is that every user +of Tendermint Core takes a dependency on all supported indexers, including +those they never use. Even if the unused code is disabled with build tags, +users have to remember to do this or potentially be exposed to security issues +that may arise in any of the custom indexers. This is a risk for Tendermint, +which is a trust-critical component of all applications built on it. + +The problem with _not_ integrating indexers into Tendermint Core is that any +developer who wants to use a particular indexer must now fetch or build a +patched version of the core code that includes the custom indexer. Besides +being inconvenient, this makes it harder for users to upgrade their node, since +they need to either re-apply their patches directly or wait for an intermediary +to do it for them. + +Even for developers who have written their applications in Go and link with the +consensus node directly (e.g., using the [Cosmos SDK][sdk]), these issues add a +potentially significant complication to the build process. + +### Issue 2: Legacy Compatibility + +The `EventSink` interface retains several limitations of the original +proprietary indexer. These include: + +- The indexer has no control over which event items are reported. Only the + exact block and transaction events that were reported to the original indexer + are reported to a custom indexer. + +- The interface requires the implementation to define methods for the legacy + search and query API. This requirement comes from the integation with the + [event subscription RPC API][event-rpc], but actually supporting these + methods is not trivial. + +At present, only the original KV indexer implements the query methods. Even the +proof-of-concept PostgreSQL implementation simply reports errors for all calls +to these methods. + +Even for a plugin written in Go, implementing these methods "correctly" would +require parsing and translating the custom query language over whatever storage +platform the indexer uses. + +For a plugin _not_ written in Go, even beyond the cost of integration the +developer would have to re-implement the entire query language. + +### Issue 3: Indexing Delays Consensus + +Within the node, indexing hooks in to the same internal pubsub dispatcher that +is used to export event items to the [event subscription RPC API][event-rpc]. +In contrast with RPC subscribers, however, indexing is a "privileged" +subscriber: If an RPC subscriber is "too slow", the node may terminate the +subscription and disconnect the client. That means that RPC subscribers may +lose (miss) event items. The indexer, however, is "unbuffered", and the +publisher will never drop or disconnect from it. If the indexer is slow, the +publisher will block until it returns, to ensure that no event items are lost. + +In practice, this means that the performance of the indexer has a direct effect +on the performance of the consensus node: If the indexer is slow or stalls, it +will slow or halt the progress of consensus. Users have already reported this +problem even with the built-in indexer (see, for example, [#7247][i7247]). +Extending this concern to arbitrary user-defined custom indexers gives that +risk a much larger surface area. + + +## Discussion + +It is not possible to simultaneously guarantee that publishing event items will +not delay consensus, and also that all event items of interest are always +completely indexed. + +Therefore, our choice is between eliminating delay (and minimizing loss) or +eliminating loss (and minimizing delay). Currently, we take the second +approach, which has led to user complaints about consensus delays due to +indexing and subscription overhead. + +- If we agree that consensus performance supersedes index completeness, our + design choices are to constrain the likelihood and frequency of missing event + items. + +- If we decide that consensus performance is more important than index + completeness, our option is to minimize overhead on the event delivery path + and document that indexer plugins constrain the rate of consensus. + +Since we have user reports requesting both properties, we have to choose one or +the other. Since the primary job of the consensus engine is to correctly, +robustly, reliablly, and efficiently replicate application state across the +network, I believe the correct choice is to favor consensus performance. + +An important consideration for this decision is that a node does not index +application metadata separately: If indexing is disabled, there is no built-in +mechanism to go back and replay or reconstruct the data that an indexer would +have stored. The node _does_ store the blockchain itself (i.e., the blocks and +their transactions), so potentially some use cases currently handled by the +indexer could be handled by the node. For example, allowing clients to ask +whether a given transaction ID has been committed to a block could in principle +be done without an indexer, since it does not depend on application metadata. + +Inevitably, a question will arise whether we could implement both strategies +and toggle between them with a flag. That would be a worst-case scenario, +requiring us to maintain the complexity of two very-different operational +concerns. If our goal is that Tendermint should be as simple, efficient, and +trustworthy as posible, there is not a strong case for making these options +configurable: We should pick a side and commit to it. + +### Design Principles + +Although there is no unique "best" solution to the issues described above, +there are some specific principles that a solution should include: + +1. **A custom indexer should not require integration into Tendermint core.** A + developer or node operator can create, build, deploy, and use a custom + indexer with a stock build of the Tendermint consensus node. + +2. **Custom indexers cannot stall consensus.** An indexer that is slow or + stalls cannot slow down or prevent core consensus from making progress. + + The plugin interface must give node operators control over the tolerances + for acceptable indexer performance, and the means to detect when indexers + are falling outside those tolerances, but indexer failures should "fail + safe" with respect to consensus (even if that means the indexer may miss + some data, in sufficiently-extreme circumstances). + +3. **Custom indexers control which event items they index.** A custom indexer + is not limited to only the current transaction and block events, but can + observe any event item published by the node. + +4. **Custom indexing is forward-compatible.** Adding new event item types or + metadata to the consensus node should not require existing custom indexers + to be rebuilt or modified, unless they want to take advantage of the new + data. + +5. **Indexers are responsible for answering queries.** An indexer plugin is not + required to support the legacy query filter language, nor to be compatible + with the legacy RPC endpoints for accessing them. Any APIs for clients to + query a custom index are the responsibility of the indexer, not the node. + +### Open Questions + +Given the constraints outlined above, there are important design questions we +must answer to guide any specific changes: + +1. **What is an acceptable probability that, given sufficiently extreme + operational issues, an indexer might miss some number of events?** + + There are two parts to this question: One is what constitutes an extreme + operational problem, the other is how likely we are to miss some number of + events items. + + - If the consensus is that no event item must ever be missed, no matter how + bad the operational circumstances, then we _must_ accept that indexing can + slow or halt consensus arbitrarily. It is impossible to guarantee complete + index coverage without potentially unbounded delays. + + - Otherwise, how much data can we afford to lose and how often? For example, + if we can ensure no event item will be lost unless the indexer halts for + at least five minutes, is that acceptable? What probabilities and time + ranges are reasonable for real production environments? + +2. **What level of operational overhead is acceptable to impose on node + operators to support indexing?** + + Are node operators willing to configure and run custom indexers as sidecar + type processes alongside a node? How much indexer setup above and beyond the + work of setting up the underlying node in isolation is tractable in + production networks? + + The answer to this question also informs the question of whether we should + keep an "in-process" indexing option, and to what extent that option needs + to satisfy the suggested design principles. + + Relatedly, to what extent do we need to be concerned about the cost of + encoding and sending event items to an external process (e.g., as JSON blobs + or protobuf wire messages)? Given that the node already encodes event items + as JSON for subscription purposes, the overhead would be negligible for the + node itself, but the indexer would have to decode to process the results. + +3. **What (if any) query APIs does the consensus node need to export, + independent of the indexer implementation?** + + One typical example is whether the node should be able to answer queries + like "is this transaction ID in a block?" Currently, a node cannot answer + this query _unless_ it runs the built-in KV indexer. Does the node need to + continue to support that query even for nodes that disable the KV indexer, + or which use a custom indexer? + +### Informal Design Intent + +The design principles described above implicate several components of the +Tendermint node, beyond just the indexer. In the context of [ADR 075][adr075], +we are re-working the RPC event subscription API to improve some of the UX +issues discussed above for RPC clients. It is our expectation that a solution +for pluggable custom indexing will take advantage of some of the same work. + +On that basis, the design approach I am considering for custom indexing looks +something like this (subject to refinement): + +1. A custom indexer runs as a separate process from the node. + +2. The indexer subscribes to event items via the ADR 075 events API. + + This means indexers would receive event payloads as JSON rather than + protobuf, but since we already have to support JSON encoding for the RPC + interface anyway, that should not increase complexity for the node. + +3. The existing PostgreSQL indexer gets reworked to have this form, and no + longer built as part of the Tendermint core binary. + + We can retain the code in the core repository as a proof-of-concept, or + perhaps create a separate repository with contributed indexers and move it + there. + +4. (Possibly) Deprecate and remove the legacy KV indexer, or disable it by + default. If we decide to remove it, we can also remove the legacy RPC + endpoints for querying the KV indexer. + + If we plan to do this, we should also investigate providing a way for + clients to query whether a given transaction ID has landed in a block. That + serves a common need, and currently _only_ works if the KV indexer is + enabled, but could be addressed more simply using the other data a node + already has stored, without having to answer more general queries. + + +## References + +- [ADR 065: Custom Event Indexing][adr065] +- [ADR 075: RPC Event Subscription Interface][adr075] +- [Cosmos SDK][sdk] +- [Event subscription RPC][event-rpc] +- [KV transaction indexer][kv-index] +- [Pluggable custom event indexing][i7135] (#7135) +- [PostgreSQL event sink][psql] + - [PostgreSQL database][postgres] +- [Query filter language][query] +- [Stream events to postgres for indexing][i1161] (#1161) +- [Unbuffered event subscription slow down the consensus][i7247] (#7247) +- [`EventSink` interface][esink] +- [`tm-db` library][tmdb] + +[adr065]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-065-custom-event-indexing.md +[adr075]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-075-rpc-subscription.md +[esink]: https://pkg.go.dev/github.com/tendermint/tendermint/internal/state/indexer#EventSink +[event-rpc]: https://docs.tendermint.com/master/rpc/#/Websocket/subscribe +[i1161]: https://github.com/tendermint/tendermint/issues/1161 +[i7135]: https://github.com/tendermint/tendermint/issues/7135 +[i7247]: https://github.com/tendermint/tendermint/issues/7247 +[kv-index]: https://github.com/tendermint/tendermint/blob/master/internal/state/indexer/tx/kv +[postgres]: https://postgresql.org/ +[psql]: https://github.com/tendermint/tendermint/blob/master/internal/state/indexer/sink/psql +[psql]: https://github.com/tendermint/tendermint/blob/master/internal/state/indexer/sink/psql +[query]: https://pkg.go.dev/github.com/tendermint/tendermint/internal/pubsub/query/syntax +[sdk]: https://github.com/cosmos/cosmos-sdk +[tmdb]: https://pkg.go.dev/github.com/tendermint/tm-db#DB diff --git a/docs/rfc/rfc-013-abci++.md b/docs/rfc/rfc-013-abci++.md new file mode 100644 index 000000000..6e83c9aa2 --- /dev/null +++ b/docs/rfc/rfc-013-abci++.md @@ -0,0 +1,253 @@ +# RFC 013: ABCI++ + +## Changelog + +- 2020-01-11: initialized +- 2022-02-11: Migrate RFC to tendermint repo (Originally [RFC 004](https://github.com/tendermint/spec/pull/254)) + +## Author(s) + +- Dev (@valardragon) +- Sunny (@sunnya97) + +## Context + +ABCI is the interface between the consensus engine and the application. +It defines when the application can talk to consensus during the execution of a blockchain. +At the moment, the application can only act at one phase in consensus, immediately after a block has been finalized. + +This restriction on the application prohibits numerous features for the application, including many scalability improvements that are now better understood than when ABCI was first written. +For example, many of the scalability proposals can be boiled down to "Make the miner / block proposers / validators do work, so the network does not have to". +This includes optimizations such as tx-level signature aggregation, state transition proofs, etc. +Furthermore, many new security properties cannot be achieved in the current paradigm, as the application cannot enforce validators do more than just finalize txs. +This includes features such as threshold cryptography, and guaranteed IBC connection attempts. +We propose introducing three new phases to ABCI to enable these new features, and renaming the existing methods for block execution. + +#### Prepare Proposal phase + +This phase aims to allow the block proposer to perform more computation, to reduce load on all other full nodes, and light clients in the network. +It is intended to enable features such as batch optimizations on the transaction data (e.g. signature aggregation, zk rollup style validity proofs, etc.), enabling stateless blockchains with validator provided authentication paths, etc. + +This new phase will only be executed by the block proposer. The application will take in the block header and raw transaction data output by the consensus engine's mempool. It will then return block data that is prepared for gossip on the network, and additional fields to include into the block header. + +#### Process Proposal Phase + +This phase aims to allow applications to determine validity of a new block proposal, and execute computation on the block data, prior to the blocks finalization. +It is intended to enable applications to reject block proposals with invalid data, and to enable alternate pipelined execution models. (Such as Ethereum-style immediate execution) + +This phase will be executed by all full nodes upon receiving a block, though on the application side it can do more work in the even that the current node is a validator. + +#### Vote Extension Phase + +This phase aims to allow applications to require their validators do more than just validate blocks. +Example usecases of this include validator determined price oracles, validator guaranteed IBC connection attempts, and validator based threshold crypto. + +This adds an app-determined data field that every validator must include with their vote, and these will thus appear in the header. + +#### Rename {BeginBlock, [DeliverTx], EndBlock} to FinalizeBlock + +The prior phases gives the application more flexibility in their execution model for a block, and they obsolete the current methods for how the consensus engine relates the block data to the state machine. Thus we refactor the existing methods to better reflect what is happening in the new ABCI model. + +This rename doesn't on its own enable anything new, but instead improves naming to clarify the expectations from the application in this new communication model. The existing ABCI methods `BeginBlock, [DeliverTx], EndBlock` are renamed to a single method called `FinalizeBlock`. + +#### Summary + +We include a more detailed list of features / scaling improvements that are blocked, and which new phases resolve them at the end of this document. + + +On the top is the existing definition of ABCI, and on the bottom is the proposed ABCI++. + +## Proposal + +Below we suggest an API to add these three new phases. +In this document, sometimes the final round of voting is referred to as precommit for clarity in how it acts in the Tendermint case. + +### Prepare Proposal + +*Note, APIs in this section will change after Vote Extensions, we list the adjusted APIs further in the proposal.* + +The Prepare Proposal phase allows the block proposer to perform application-dependent work in a block, to lower the amount of work the rest of the network must do. This enables batch optimizations to a block, which has been empirically demonstrated to be a key component for scaling. This phase introduces the following ABCI method + +```rust +fn PrepareProposal(Block) -> BlockData +``` + +where `BlockData` is a type alias for however data is internally stored within the consensus engine. In Tendermint Core today, this is `[]Tx`. + +The application may read the entire block proposal, and mutate the block data fields. Mutated transactions will still get removed from the mempool later on, as the mempool rechecks all transactions after a block is executed. + +The `PrepareProposal` API will be modified in the vote extensions section, for allowing the application to modify the header. + +### Process Proposal + +The Process Proposal phase sends the block data to the state machine, prior to running the last round of votes on the state machine. This enables features such as allowing validators to reject a block according to whether state machine deems it valid, and changing block execution pipeline. + +We introduce three new methods, + +```rust +fn VerifyHeader(header: Header, isValidator: bool) -> ResponseVerifyHeader {...} +fn ProcessProposal(block: Block) -> ResponseProcessProposal {...} +fn RevertProposal(height: usize, round: usize) {...} +``` + +where + +```rust +struct ResponseVerifyHeader { + accept_header: bool, + evidence: Vec +} +struct ResponseProcessProposal { + accept_block: bool, + evidence: Vec +} +``` + +Upon receiving a block header, every validator runs `VerifyHeader(header, isValidator)`. The reason for why `VerifyHeader` is split from `ProcessProposal` is due to the later sections for Preprocess Proposal and Vote Extensions, where there may be application dependent data in the header that must be verified before accepting the header. +If the returned `ResponseVerifyHeader.accept_header` is false, then the validator must precommit nil on this block, and reject all other precommits on this block. `ResponseVerifyHeader.evidence` is appended to the validators local `EvidencePool`. + +Upon receiving an entire block proposal (in the current implementation, all "block parts"), every validator runs `ProcessProposal(block)`. If the returned `ResponseProcessProposal.accept_block` is false, then the validator must precommit nil on this block, and reject all other precommits on this block. `ResponseProcessProposal.evidence` is appended to the validators local `EvidencePool`. + +Once a validator knows that consensus has failed to be achieved for a given block, it must run `RevertProposal(block.height, block.round)`, in order to signal to the application to revert any potentially mutative state changes it may have made. In Tendermint, this occurs when incrementing rounds. + +**RFC**: How do we handle the scenario where honest node A finalized on round x, and honest node B finalized on round x + 1? (e.g. when 2f precommits are publicly known, and a validator precommits themself but doesn't broadcast, but they increment rounds) Is this a real concern? The state root derived could change if everyone finalizes on round x+1, not round x, as the state machine can depend non-uniformly on timestamp. + +The application is expected to cache the block data for later execution. + +The `isValidator` flag is set according to whether the current node is a validator or a full node. This is intended to allow for beginning validator-dependent computation that will be included later in vote extensions. (An example of this is threshold decryptions of ciphertexts.) + +### DeliverTx rename to FinalizeBlock + +After implementing `ProcessProposal`, txs no longer need to be delivered during the block execution phase. Instead, they are already in the state machine. Thus `BeginBlock, DeliverTx, EndBlock` can all be replaced with a single ABCI method for `ExecuteBlock`. Internally the application may still structure its method for executing the block as `BeginBlock, DeliverTx, EndBlock`. However, it is overly restrictive to enforce that the block be executed after it is finalized. There are multiple other, very reasonable pipelined execution models one can go for. So instead we suggest calling this succession of methods `FinalizeBlock`. We propose the following API + +Replace the `BeginBlock, DeliverTx, EndBlock` ABCI methods with the following method + +```rust +fn FinalizeBlock() -> ResponseFinalizeBlock +``` + +where `ResponseFinalizeBlock` has the following API, in terms of what already exists + +```rust +struct ResponseFinalizeBlock { + updates: ResponseEndBlock, + tx_results: Vec +} +``` + +`ResponseEndBlock` should then be renamed to `ConsensusUpdates` and `ResponseDeliverTx` should be renamed to `ResponseTx`. + +### Vote Extensions + +The Vote Extensions phase allow applications to force their validators to do more than just validate within consensus. This is done by allowing the application to add more data to their votes, in the final round of voting. (Namely the precommit) +This additional application data will then appear in the block header. + +First we discuss the API changes to the vote struct directly + +```rust +fn ExtendVote(height: u64, round: u64) -> (UnsignedAppVoteData, SelfAuthenticatingAppData) +fn VerifyVoteExtension(signed_app_vote_data: Vec, self_authenticating_app_vote_data: Vec) -> bool +``` + +There are two types of data that the application can enforce validators to include with their vote. +There is data that the app needs the validator to sign over in their vote, and there can be self-authenticating vote data. Self-authenticating here means that the application upon seeing these bytes, knows its valid, came from the validator and is non-malleable. We give an example of each type of vote data here, to make their roles clearer. + +- Unsigned app vote data: A use case of this is if you wanted validator backed oracles, where each validator independently signs some oracle data in their vote, and the median of these values is used on chain. Thus we leverage consensus' signing process for convenience, and use that same key to sign the oracle data. +- Self-authenticating vote data: A use case of this is in threshold random beacons. Every validator produces a threshold beacon share. This threshold beacon share can be verified by any node in the network, given the share and the validators public key (which is not the same as its consensus public key). However, this decryption share will not make it into the subsequent block's header. They will be aggregated by the subsequent block proposer to get a single random beacon value that will appear in the subsequent block's header. Everyone can then verify that this aggregated value came from the requisite threshold of the validator set, without increasing the bandwidth for full nodes or light clients. To achieve this goal, the self-authenticating vote data cannot be signed over by the consensus key along with the rest of the vote, as that would require all full nodes & light clients to know this data in order to verify the vote. + +The `CanonicalVote` struct will acommodate the `UnsignedAppVoteData` field by adding another string to its encoding, after the `chain-id`. This should not interfere with existing hardware signing integrations, as it does not affect the constant offset for the `height` and `round`, and the vote size does not have an explicit upper bound. (So adding this unsigned app vote data field is equivalent from the HSM's perspective as having a superlong chain-ID) + +**RFC**: Please comment if you think it will be fine to have elongate the message the HSM signs, or if we need to explore pre-hashing the app vote data. + +The flow of these methods is that when a validator has to precommit, Tendermint will first produce a precommit canonical vote without the application vote data. It will then pass it to the application, which will return unsigned application vote data, and self authenticating application vote data. It will bundle the `unsigned_application_vote_data` into the canonical vote, and pass it to the HSM to sign. Finally it will package the self-authenticating app vote data, and the `signed_vote_data` together, into one final Vote struct to be passed around the network. + +#### Changes to Prepare Proposal Phase + +There are many use cases where the additional data from vote extensions can be batch optimized. +This is mainly of interest when the votes include self-authenticating app vote data that be batched together, or the unsigned app vote data is the same across all votes. +To allow for this, we change the PrepareProposal API to the following + +```rust +fn PrepareProposal(Block, UnbatchedHeader) -> (BlockData, Header) +``` + +where `UnbatchedHeader` essentially contains a "RawCommit", the `Header` contains a batch-optimized `commit` and an additional "Application Data" field in its root. This will involve a number of changes to core data structures, which will be gone over in the ADR. +The `Unbatched` header and `rawcommit` will never be broadcasted, they will be completely internal to consensus. + +#### Inter-process communication (IPC) effects + +For brevity in exposition above, we did not discuss the trade-offs that may occur in interprocess communication delays that these changs will introduce. +These new ABCI methods add more locations where the application must communicate with the consensus engine. +In most configurations, we expect that the consensus engine and the application will be either statically or dynamically linked, so all communication is a matter of at most adjusting the memory model the data is layed out within. +This memory model conversion is typically considered negligible, as delay here is measured on the order of microseconds at most, whereas we face milisecond delays due to cryptography and network overheads. +Thus we ignore the overhead in the case of linked libraries. + +In the case where the consensus engine and the application are ran in separate processes, and thus communicate with a form of Inter-process communication (IPC), the delays can easily become on the order of miliseconds based upon the data sent. Thus its important to consider whats happening here. +We go through this phase by phase. + +##### Prepare proposal IPC overhead + +This requires a round of IPC communication, where both directions are quite large. Namely the proposer communicating an entire block to the application. +However, this can be mitigated by splitting up `PrepareProposal` into two distinct, async methods, one for the block IPC communication, and one for the Header IPC communication. + +Then for chains where the block data does not depend on the header data, the block data IPC communication can proceed in parallel to the prior block's voting phase. (As a node can know whether or not its the leader in the next round) + +Furthermore, this IPC communication is expected to be quite low relative to the amount of p2p gossip time it takes to send the block data around the network, so this is perhaps a premature concern until more sophisticated block gossip protocols are implemented. + +##### Process Proposal IPC overhead + +This phase changes the amount of time available for the consensus engine to deliver a block's data to the state machine. +Before, the block data for block N would be delivered to the state machine upon receiving a commit for block N and then be executed. +The state machine would respond after executing the txs and before prevoting. +The time for block delivery from the consensus engine to the state machine after this change is the time of receiving block proposal N to the to time precommit on proposal N. +It is expected that this difference is unimportant in practice, as this time is in parallel to one round of p2p communication for prevoting, which is expected to be significantly less than the time for the consensus engine to deliver a block to the state machine. + +##### Vote Extension IPC overhead + +This has a small amount of data, but does incur an IPC round trip delay. This IPC round trip delay is pretty negligible as compared the variance in vote gossip time. (the IPC delay is typically on the order of 10 microseconds) + +## Status + +Proposed + +## Consequences + +### Positive + +- Enables a large number of new features for applications +- Supports both immediate and delayed execution models +- Allows application specific data from each validator +- Allows for batch optimizations across txs, and votes + +### Negative + +- This is a breaking change to all existing ABCI clients, however the application should be able to have a thin wrapper to replicate existing ABCI behavior. + - PrepareProposal - can be a no-op + - Process Proposal - has to cache the block, but can otherwise be a no-op + - Vote Extensions - can be a no-op + - Finalize Block - Can black-box call BeginBlock, DeliverTx, EndBlock given the cached block data + +- Vote Extensions adds more complexity to core Tendermint Data Structures +- Allowing alternate alternate execution models will lead to a proliferation of new ways for applications to violate expected guarantees. + +### Neutral + +- IPC overhead considerations change, but mostly for the better + +## References + +Reference for IPC delay constants: + +### Short list of blocked features / scaling improvements with required ABCI++ Phases + +| Feature | PrepareProposal | ProcessProposal | Vote Extensions | +| :--- | :---: | :---: | :---: | +| Tx based signature aggregation | X | | | +| SNARK proof of valid state transition | X | | | +| Validator provided authentication paths in stateless blockchains | X | | | +| Immediate Execution | | X | | +| Simple soft forks | | X | | +| Validator guaranteed IBC connection attempts | | | X | +| Validator based price oracles | | | X | +| Immediate Execution with increased time for block execution | X | X | X | +| Threshold Encrypted txs | X | X | X | diff --git a/docs/rfc/rfc-014-semantic-versioning.md b/docs/rfc/rfc-014-semantic-versioning.md new file mode 100644 index 000000000..0119901b1 --- /dev/null +++ b/docs/rfc/rfc-014-semantic-versioning.md @@ -0,0 +1,94 @@ +# RFC 014: Semantic Versioning + +## Changelog + +- 2021-11-19: Initial Draft +- 2021-02-11: Migrate RFC to tendermint repo (Originally [RFC 006](https://github.com/tendermint/spec/pull/365)) + +## Author(s) + +- Callum Waters @cmwaters + +## Context + +We use versioning as an instrument to hold a set of promises to users and signal when such a set changes and how. In the conventional sense of a Go library, major versions signal that the public Go API’s have changed in a breaking way and thus require the users of such libraries to change their usage accordingly. Tendermint is a bit different in that there are multiple users: application developers (both in-process and out-of-process), node operators, and external clients. More importantly, both how these users interact with Tendermint and what's important to these users differs from how users interact and what they find important in a more conventional library. + +This document attempts to encapsulate the discussions around versioning in Tendermint and draws upon them to propose a guide to how Tendermint uses versioning to make promises to its users. + +For a versioning policy to make sense, we must also address the intended frequency of breaking changes. The strictest guarantees in the world will not help users if we plan to break them with every release. + +Finally I would like to remark that this RFC only addresses the "what", as in what are the rules for versioning. The "how" of Tendermint implementing the versioning rules we choose, will be addressed in a later RFC on Soft Upgrades. + +## Discussion + +We first begin with a round up of the various users and a set of assumptions on what these users expect from Tendermint in regards to versioning: + +1. **Application Developers**, those that use the ABCI to build applications on top of Tendermint, are chiefly concerned with that API. Breaking changes will force developers to modify large portions of their codebase to accommodate for the changes. Some ABCI changes such as introducing priority for the mempool don't require any effort and can be lazily adopted whilst changes like ABCI++ may force applications to redesign their entire execution system. It's also worth considering that the API's for go developers differ to developers of other languages. The former here can use the entire Tendermint library, most notably the local RPC methods, and so the team must be wary of all public Go API's. +2. **Node Operators**, those running node infrastructure, are predominantly concerned with downtime, complexity and frequency of upgrading, and avoiding data loss. They may be also concerned about changes that may break the scripts and tooling they use to supervise their nodes. +3. **External Clients** are those that perform any of the following: + - consume the RPC endpoints of nodes like `/block` + - subscribe to the event stream + - make queries to the indexer + + This set are concerned with chain upgrades which will impact their ability to query state and block data as well as broadcast transactions. Examples include wallets and block explorers. + +4. **IBC module and relayers**. The developers of IBC and consumers of their software are concerned about changes that may affect a chain's ability to send arbitrary messages to another chain. Specifically, these users are affected by any breaking changes to the light client verification algorithm. + +Although we present them here as having different concerns, in a broader sense these user groups share a concern for the end users of applications. A crucial principle guiding this RFC is that **the ability for chains to provide continual service is more important than the actual upgrade burden put on the developers of these chains**. This means some extra burden for application developers is tolerable if it minimizes or substantially reduces downtime for the end user. + +### Modes of Interprocess Communication + +Tendermint has two primary mechanisms to communicate with other processes: RPC and P2P. The division marks the boundary between the internal and external components of the network: + +- The P2P layer is used in all cases that nodes (of any type) need to communicate with one another. +- The RPC interface is for any outside process that wants to communicate with a node. + +The design principle here is that **communication via RPC is to a trusted source** and thus the RPC service prioritizes inspection rather than verification. The P2P interface is the primary medium for verification. + +As an example, an in-browser light client would verify headers (and perhaps application state) via the p2p layer, and then pass along information on to the client via RPC (or potentially directly via a separate API). + +The main exceptions to this are the IBC module and relayers, which are external to the node but also require verifiable data. Breaking changes to the light client verification path mean that all neighbouring chains that are connected will no longer be able to verify state transitions and thus pass messages back and forward. + +## Proposal + +Tendermint version labels will follow the syntax of [Semantic Versions 2.0.0](https://semver.org/) with a major, minor and patch version. The version components will be interpreted according to these rules: + +For the entire cycle of a **major version** in Tendermint: + +- All blocks and state data in a blockchain can be queried. All headers can be verified even across minor version changes. Nodes can both block sync and state sync from genesis to the head of the chain. +- Nodes in a network are able to communicate and perform BFT state machine replication so long as the agreed network version is the lowest of all nodes in a network. For example, nodes using version 1.5.x and 1.2.x can operate together so long as the network version is 1.2 or lower (but still within the 1.x range). This rule essentially captures the concept of network backwards compatibility. +- Node RPC endpoints will remain compatible with existing external clients: + - New endpoints may be added, but old endpoints may not be removed. + - Old endpoints may be extended to add new request and response fields, but requests not using those fields must function as before the change. +- Migrations should be automatic. Upgrading of one node can happen asynchronously with respect to other nodes (although agreement of a network-wide upgrade must still occur synchronously via consensus). + +For the entire cycle of a **minor version** in Tendermint: + +- Public Go API's, for example in `node` or `abci` packages will not change in a way that requires any consumer (not just application developers) to modify their code. +- No breaking changes to the block protocol. This means that all block related data structures should not change in a way that breaks any of the hashes, the consensus engine or light client verification. +- Upgrades between minor versions may not result in any downtime (i.e., no migrations are required), nor require any changes to the config files to continue with the existing behavior. A minor version upgrade will require only stopping the existing process, swapping the binary, and starting the new process. + +A new **patch version** of Tendermint will only contain bug fixes and updates that impact the security and stability of Tendermint. + +These guarantees will come into effect at release 1.0. + +## Status + +Proposed + +## Consequences + +### Positive + +- Clearer communication of what versioning means to us and the effect they have on our users. + +### Negative + +- Can potentially incur greater engineering effort to uphold and follow these guarantees. + +### Neutral + +## References + +- [SemVer](https://semver.org/) +- [Tendermint Tracking Issue](https://github.com/tendermint/tendermint/issues/5680) diff --git a/docs/rfc/rfc-015-abci++-tx-mutation.md b/docs/rfc/rfc-015-abci++-tx-mutation.md new file mode 100644 index 000000000..3c7854ed3 --- /dev/null +++ b/docs/rfc/rfc-015-abci++-tx-mutation.md @@ -0,0 +1,261 @@ +# RFC 015: ABCI++ TX Mutation + +## Changelog + +- 23-Feb-2022: Initial draft (@williambanfield). +- 28-Feb-2022: Revised draft (@williambanfield). + +## Abstract + +A previous version of the ABCI++ specification detailed a mechanism for proposers to replace transactions +in the proposed block. This scheme required the proposer to construct new transactions +and mark these new transactions as replacing other removed transactions. The specification +was ambiguous as to how the replacement may be communicated to peer nodes. +This RFC discusses issues with this mechanism and possible solutions. + +## Background + +### What is the proposed change? + +A previous version of the ABCI++ specification proposed mechanisms for adding, removing, and replacing +transactions in a proposed block. To replace a transaction, the application running +`ProcessProposal` could mark a transaction as replaced by other application-supplied +transactions by returning a new transaction marked with the `ADDED` flag setting +the `new_hashes` field of the removed transaction to contain the list of transaction hashes +that replace it. In that previous specification for ABCI++, the full use of the +`new_hashes` field is left somewhat ambiguous. At present, these hashes are not +gossiped and are not eventually included in the block to signal replacement to +other nodes. The specification did indicate that the transactions specified in +the `new_hashes` field will be removed from the mempool but it's not clear how +peer nodes will learn about them. + +### What systems would be affected by adding transaction replacement? + +The 'transaction' is a central building block of a Tendermint blockchain, so adding +a mechanism for transaction replacement would require changes to many aspects of Tendermint. + +The following is a rough list of the functionality that this mechanism would affect: + +#### Transaction indexing + +Tendermint's indexer stores transactions and transaction results using the hash of the executed +transaction [as the key][tx-result-index] and the ABCI results and transaction bytes as the value. + +To allow transaction replacement, the replaced transactions would need to stored as well in the +indexer, likely as a mapping of original transaction to list of transaction hashes that replaced +the original transaction. + +#### Transaction inclusion proofs + +The result of a transaction query includes a Merkle proof of the existence of the +transaction in the block chain. This [proof is built][inclusion-proof] as a merkle tree +of the hashes of all of the transactions in the block where the queried transaction was executed. + +To allow transaction replacement, these proofs would need to be updated to prove +that a replaced transaction was included by replacement in the block. + +#### RPC-based transaction query parameters and results + +Tendermint's RPC allows clients to retrieve information about transactions via the +`/tx_search` and `/tx` RPC endpoints. + +RPC query results containing replaced transactions would need to be updated to include +information on replaced transactions, either by returning results for all of the replaced +transactions, or by including a response with just the hashes of the replaced transactions +which clients could proceed to query individually. + +#### Mempool transaction removal + +Additional logic would need to be added to the Tendermint mempool to clear out replaced +transactions after each block is executed. Tendermint currently removes executed transactions +from the mempool, so this would be a pretty straightforward change. + +## Discussion + +### What value may be added to Tendermint by introducing transaction replacement? + +Transaction replacement would would enable applications to aggregate or disaggregate transactions. + +For aggregation, a set of transactions that all related work, such as transferring +tokens between the same two accounts, could be replaced with a single transaction, +i.e. one that transfers a single sum from one account to the other. +Applications that make frequent use of aggregation may be able to achieve a higher throughput. +Aggregation would decrease the space occupied by a single client-submitted transaction in the block, allowing +more client-submitted transactions to be executed per block. + +For disaggregation, a very complex transaction could be split into multiple smaller transactions. +This may be useful if an application wishes to perform more fine-grained indexing on intermediate parts +of a multi-part transaction. + +### Drawbacks to transaction replacement + +Transaction replacement would require updating and shimming many of the places that +Tendermint records and exposes information about executed transactions. While +systems within Tendermint could be updated to account for transaction replacement, +such a system would leave new issues and rough edges. + +#### No way of guaranteeing correct replacement + +If a user issues a transaction to the network and the transaction is replaced, the +user has no guarantee that the replacement was correct. For example, suppose a set of users issue +transactions A, B, and C and they are all aggregated into a new transaction, D. +There is nothing guaranteeing that D was constructed correctly from the inputs. +The only way for users to ensure D is correct would be if D contained all of the +information of its constituent transactions, in which case, nothing is really gained by the replacement. + +#### Replacement transactions not signed by submitter + +Abstractly, Tendermint simply views transactions as a ball of bytes and therefore +should be fine with replacing one for another. However, many applications require +that transactions submitted to the chain be signed by some private key to authenticate +and authorize the transaction. Replaced transactions could not be signed by the +submitter, only by the application node. Therefore, any use of transaction replacement +could not contain authorization from the submitter and would either need to grant +application-submitted transactions power to perform application logic on behalf +of a user without their consent. + +Granting this power to application-submitted transactions would be very dangerous +and therefore might not be of much value to application developers. +Transaction replacement might only be really safe in the case of application-submitted +transactions or for transactions that require no authorization. For such transactions, +it's quite not quite clear what the utility of replacement is: the application can already +generate any transactions that it wants. The fact that such a transaction was a replacement +is not particularly relevant to participants in the chain since the application is +merely replacing its own transactions. + +#### New vector for censorship + +Depending on the implementation, transaction replacement may allow a node signal +to the rest of the chain that some transaction should no longer be considered for execution. +Honest nodes will use the replacement mechanism to signal that a transaction has been aggregated. +Malicious nodes will be granted a new vector for censoring transactions. +There is no guarantee that a replaced transactions is actually executed at all. +A malicious node could censor a transaction by simply listing it as replaced. +Honest nodes seeing the replacement would flush the transaction from their mempool +and not execute or propose it it in later blocks. + +### Transaction tracking implementations + +This section discusses possible ways to flesh out the implementation of transaction replacement. +Specifically, this section proposes a few alternative ways that Tendermint blockchains could +track and store transaction replacements. + +#### Include transaction replacements in the block + +One option to track transaction replacement is to include information on the +transaction replacement within the block. An additional structure may be added +the block of the following form: + +```proto +message Block { +... + repeated Replacement replacements = 5; +} + +message Replacement { + bytes included_tx_key = 1; + repeated bytes replaced_txs_keys = 2; +} +``` + +Applications executing `PrepareProposal` would return the list of replacements and +Tendermint would include an encoding of these replacements in the block that is gossiped +and committed. + +Tendermint's transaction indexing would include a new mapping for each replaced transaction +key to the committed transaction. +Transaction inclusion proofs would be updated to include these additional new transaction +keys in the Merkle tree and queries for transaction hashes that were replaced would return +information indicating that the transaction was replaced along with the hash of the +transaction that replaced it. + +Block validation of gossiped blocks would be updated to check that each of the +`included_txs_key` matches the hash of some transaction in the proposed block. + +Implementing the changes described in this section would allow Tendermint to gossip +and index transaction replacements as part of block propagation. These changes would +still require the application to certify that the replacements were valid. This +validation may be performed in one of two ways: + +1. **Applications optimistically trust that the proposer performed a legitimate replacement.** + +In this validation scheme, applications would not verify that the substitution +is valid during consensus and instead simply trust that the proposer is correct. +This would have the drawback of allowing a malicious proposer to remove transactions +it did not want executed. + +2. **Applications completely validate transaction replacement.** + +In this validation scheme, applications that allow replacement would check that +each listed replaced transaction was correctly reflected in the replacement transaction. +In order to perform such validation, the node would need to have the replaced transactions +locally. This could be accomplished one of a few ways: by querying the mempool, +by adding an additional p2p gossip channel for transaction replacements, or by including the replaced transactions +in the block. Replacement validation via mempool querying would require the node +to have received all of the replaced transactions in the mempool which is far from +guaranteed. Adding an additional gossip channel would make gossiping replaced transactions +a requirement for consensus to proceed, since all nodes would need to receive all replacement +messages before considering a block valid. Finally, including replaced transactions in +the block seems to obviate any benefit gained from performing a transaction replacement +since the replaced transaction and the original transactions would now both appear in the block. + +#### Application defined transaction replacement + +An additional option for allowing transaction replacement is to leave it entirely as a responsibility +of the application. The `PrepareProposal` ABCI++ call allows for applications to add +new transactions to a proposed block. Applications that wished to implement a transaction +replacement mechanism would be free to do so without the newly defined `new_hashes` field. +Applications wishing to implement transaction replacement would add the aggregated +transactions in the `PrepareProposal` response, and include one additional bookkeeping +transaction that listed all of the replacements, with a similar scheme to the `new_hashes` +field described in ABCI++. This new bookkeeping transaction could be used by the +application to determine which transactions to clear from the mempool in future calls +to `CheckTx`. + +The meaning of any transaction in the block is completely opaque to Tendermint, +so applications performing this style of replacement would not be able to have the replacement +reflected in any most of Tendermint's transaction tracking mechanisms, such as transaction indexing +and the `/tx` endpoint. + +#### Application defined Tx Keys + +Tendermint currently uses cryptographic hashes, SHA256, as a key for each transaction. +As noted in the section on systems that would require changing, this key is used +to identify the transaction in the mempool, in the indexer, and within the RPC system. + +An alternative approach to allowing `ProcessProposal` to specify a set of transaction +replacements would be instead to allow the application to specify an additional key or set +of keys for each transaction during `ProcessProposal`. This new `secondary_keys` set +would be included in the block and therefore gossiped during block propagation. +Additional RPC endpoints could be exposed to query by the application-defined keys. + +Applications wishing to implement replacement would leverage this new field by providing the +replaced transaction hashes as the `secondary_keys` and checking their validity during +`ProcessProposal`. During `RecheckTx` the application would then be responsible for +clearing out transactions that matched the `secondary_keys`. + +It is worth noting that something like this would be possible without `secondary_keys`. +An application wishing to implement a system like this one could define a replacement +transaction, as discussed in the section on application-defined transaction replacement, +and use a custom [ABCI event type][abci-event-type] to communicate that the replacement should +be indexed within Tendermint's ABCI event indexing. + +### Complexity to value-add tradeoff + +It is worth remarking that adding a system like this may introduce a decent amount +of new complexity into Tendermint. An approach that leaves much of the replacement +logic to Tendermint would require altering the core transaction indexing and querying +data. In many of the cases listed, a system for transaction replacement is possible +without explicitly defining it as part of `PrepareProposal`. Since applications +can now add transactions during `PrepareProposal` they can and should leverage this +functionality to include additional bookkeeping transactions in the block. It may +be worth encouraging applications to discover new and interesting ways to leverage this +power instead of immediately solving the problem for them. + +### References + +[inclusion-proof]: https://github.com/tendermint/tendermint/blob/0fcfaa4568cb700e27c954389c1fcd0b9e786332/types/tx.go#L67 +[tx-serach-result]: https://github.com/tendermint/tendermint/blob/0fcfaa4568cb700e27c954389c1fcd0b9e786332/rpc/coretypes/responses.go#L267 +[tx-rpc-func]: https://github.com/tendermint/tendermint/blob/0fcfaa4568cb700e27c954389c1fcd0b9e786332/internal/rpc/core/tx.go#L21 +[tx-result-index]: https://github.com/tendermint/tendermint/blob/0fcfaa4568cb700e27c954389c1fcd0b9e786332/internal/state/indexer/tx/kv/kv.go#L90 +[abci-event-type]: https://github.com/tendermint/tendermint/blob/0fcfaa4568cb700e27c954389c1fcd0b9e786332/abci/types/types.pb.go#L3168 diff --git a/docs/rfc/rfc-016-node-architecture.md b/docs/rfc/rfc-016-node-architecture.md new file mode 100644 index 000000000..29098d297 --- /dev/null +++ b/docs/rfc/rfc-016-node-architecture.md @@ -0,0 +1,83 @@ +# RFC 016: Node Architecture + +## Changelog + +- April 8, 2022: Initial draft (@cmwaters) +- April 15, 2022: Incorporation of feedback + +## Abstract + +The `node` package is the entry point into the Tendermint codebase, used both by the command line and programatically to create the nodes that make up a network. The package has suffered the most from the evolution of the codebase, becoming bloated as developers clipped on their bits of code here and there to get whatever feature they wanted working. + +The decisions made at the node level have the biggest impact to simplifying the protocols within them, unlocking better internal designs and making Tendermint more intuitive to use and easier to understand from the outside. Work, in minor increments, has already begun on this section of the codebase. This document exists to spark forth the necessary discourse in a few related areas that will help the team to converge on the long term makeup of the node. + +## Discussion + +The following is a list of points of discussion around the architecture of the node: + +### Dependency Tree + +The node object is currently stuffed with every component that possibly exists within Tendermint. In the constructor, all objects are built and interlaid with one another in some awkward dance. My guiding principle is that the node should only be made up of the components that it wants to have direct control of throughout its life. The node is a service which currently has the purpose of starting other services up in a particular order and stopping them all when commanded to do so. However, there are many services which are not direct dependents i.e. the mempool and evidence services should only be working when the consensus service is running. I propose to form more of a hierarchical structure of dependents which forces us to be clear about the relations that one component has to the other. More concretely, I propose the following dependency tree: + +![node dependency tree](./images/node-dependency-tree.svg) + +Many of the further discussion topics circle back to this representation of the node. + +It's also important to distinguish two dimensions which may require different characteristics of the architecture. There is the starting and stopping of services and their general lifecycle management. What is the correct order of operations to starting a node for example. Then there is the question of the needs of the service during actual operation. Then there is the question of what resources each service needs access to during its operation. Some need to publish events, others need access to data stores, and so forth. + +An alternative model and one that perhaps better suits the latter of these dimensions is the notion of an internal message passing system. Either the events bus or p2p layer could serve as a viable transport. This would essentially allow all services to communicate with any other service and could perhaps provide a solution to the coordination problem (presented below) without a centralized coordinator. The other main advantage is that such a system would be more robust to disruptions and changes to the code which may make a hierarchical structure quickly outdated and suboptimal. The addition of message routing is an added complexity to implement, will increase the degree of asynchronicity in the system and may make it harder to debug problems that are across multiple services. + +### Coordination of State Advancing Mechanisms + +Advancement of state in Tendermint is simply defined in heights: If the node is at height n, how does it get to height n + 1 and so on. Based on this definition we have three components that help a node to advance in height: consensus, statesync and blocksync. The way these components behave currently is very tightly coupled to one another with references passed back and forth. My guiding principle is that each of these should be able to operate completely independently of each other, e.g. a node should be able to run solely blocksync indefinitely. There have been several ideas suggested towards improving this flow. I've been leaning strongly towards a centralized system, whereby an orchestrator (in this case the node) decides what services to start and stop. +In a decentralized message passing system, individual services make their decision based upon a "global" shared state i.e. if my height is less that 10 below the average peer height, I as consensus, should stop (knowing that blocksync has the same condition for starting). As the example illustrates, each mechanism will still need to be aware of the presence of other mechanisms. + +Both centralized and decentralized systems rely on the communication of the nodes current height and a judgement on the height of the head of the chain. The latter, working out the head of the chain, is quite a difficult challenge as their is nothing preventing the node from acting maliciously and providing a different height. Currently both blocksync, consensus (and to a certain degree statesync), have parallel systems where peers communicate their height. This could be streamlined with the consensus (or even the p2p layer), broadcasting peer heights and either the node or the other state advancing mechanisms acting accordingly. + +Currently, when a node starts, it turns on every service that it is attached to. This means that while a node is syncing up by requesting blocks, it is also receiving transactions and votes, as well as snapshot and block requests. This is a needless use of bandwidth. An implementation of an orchestrator, regardless of whether the system is heirachical or not, should look to be able to open and close channels dynamically and effectively broadcast which services it is running. Integrating this with service discovery may also lead to a better serivce to peers. + +The orchestrator allows for some deal of variablity in how a node is constructed. Does it just run blocksync, shadowing the head of the chain and be highly available for querying. Does it rely on state sync at all? An important question that arises from this dynamicism is we ideally want to encourage nodes to provide as much of their resources as possible so that their is a healthy amount of providers to consumers. Do we make all services compulsory or allow for them to be disabled? Arguably it's possible that a user forks the codebase and rips out the blocksync code because they want to reduce bandwidth so this is more a question of how easy do we want to make this for users. + +### Block Executor + +The block executor is an important component that is currently used by both consensus and blocksync to execute transactions and update application state. Principally, I think it should be the only component that can write (and possibly even read) the block and state stores, and we should clean up other direct dependencies on the storage engine if we can. This would mean: + +- The reactors Consensus, BlockSync and StateSync should all import the executor for advancing state ie. `ApplyBlock` and `BootstrapState`. +- Pruning should also be a concern of the block executor as well as `FinalizeBlock` and `Commit`. This can simplify consensus to focus just on the consensus part. + +### The Interprocess communication systems: RPC, P2P, ABCI, and Events + +The schematic supplied above shows the relations between the different services, the node, the block executor, and the storage layer. Represented as colored dots are the components responsible for different roles of interprocess communication (IPC). These components permeate throughout the code base, seeping into most services. What can provide powerful functionality on one hand can also become a twisted vine, creating messy corner cases and convoluting the protocols themselves. A lot of the thinking around +how we want our IPC systens to function has been summarised in this [RFC](./rfc-002-ipc-ecosystem.md). In this section, I'd like to focus the reader on the relation between the IPC and the node structure. An issue that has frequently risen is that the RPC has control of the components where it strikes me as being more logical for the component to dictate the information that is emitted/available and the knobs it wishes to expose. The RPC is also inextricably tied to the node instance and has situations where it is passed pointers directly to the storage engine and other components. + +I am currently convinced of the approach that the p2p layer takes and would like to see other IPC components follow suit. This would mean that the RPC and events system would be constructed in the node yet would pass the adequate methods to register endpoints and topics to the sub components. For example, + +```go +// Methods from the RPC and event bus that would be passed into the constructor of components like "consensus" +// NOTE: This is a hypothetical construction to convey the idea. An actual implementation may differ. +func RegisterRoute(path string, handler func(http.ResponseWriter, *http.Request)) + +func RegisterTopic(name string) EventPublisher + +type EventPublisher func (context.Context, types.EventData, []abci.Event) +``` + +This would give the components control to the information they want to expose and keep all relevant logic within that package. It accomodates more to a dynamic system where services can switch on and off. Each component would also receive access to the logger and metrics system for introspection and debuggability. + +#### IPC Rubric + +I'd like to aim to reach a state where we as a team have either an implicit or explicit rubric which can determine, in the event of some new need to communicate information, what tool it should use for doing this. In the case of inter node communication, this is obviously the p2p stack (with perhaps the exception of the light client). Metrics and logging also have clear usage patterns. RPC and the events system are less clear. The RPC is used for debugging data and fine tuned operator control as it is for general public querying and transaction submission. The RPC is also known to have been plumbed back into the application for historical queries. The events system, similarly, is used for consuming transaction events as it is for the testing of consensus state transitions. + +Principally, I think we should look to change our language away from what the actual transport is and more towards what it's being used for and to whom. We call it a peer to peer layer and not the underlying tcp connection. In the same way, we should look to split RPC into an operator interface (RPC Internal), a public interface (RPC External) and a bidirectional ABCI. + +### Seperation of consumers and suppliers + +When a service such as blocksync is turned on, it automatically begins requesting blocks to verify and apply them as it also tries to serve them to other peers catching up. We should look to distinguish these two aspects: supplying of information and consuming of information in many of these components. More concretely, I'd suggest: + +- The blocksync and statesync service, i.e. supplying information for those trying to catch up should only start running once a node has caught up i.e. after running the blocksync and/or state sync *processes* +- The blocksync and state sync processes have defined termination clauses that inform the orchestrator when they are done and where they finished. + - One way of achieving this would be that every process both passes and returns the `State` object + - In some cases, a node may specify that it wants to run blocksync indefinitely. +- The mempool should also indicate whether it wants to receive transactions or to send them only (one-directional mempool) +- Similarly, the light client itself only requests information whereas the light client service (currently part of state sync) can do both. +- This distinction needs to be communicated in the p2p layer handshake itself but should also be changeable over the lifespan of the connection. diff --git a/docs/rfc/rfc-017-abci++-vote-extension-propag.md b/docs/rfc/rfc-017-abci++-vote-extension-propag.md new file mode 100644 index 000000000..15d08f7ba --- /dev/null +++ b/docs/rfc/rfc-017-abci++-vote-extension-propag.md @@ -0,0 +1,571 @@ +# RFC 017: ABCI++ Vote Extension Propagation + +## Changelog + +- 11-Apr-2022: Initial draft (@sergio-mena). +- 15-Apr-2022: Addressed initial comments. First complete version (@sergio-mena). +- 09-May-2022: Addressed all outstanding comments. + +## Abstract + +According to the +[ABCI++ specification](https://github.com/tendermint/tendermint/blob/4743a7ad0/spec/abci%2B%2B/README.md) +(as of 11-Apr-2022), a validator MUST provide a signed vote extension for each non-`nil` precommit vote +of height *h* that it uses to propose a block in height *h+1*. When a validator is up to +date, this is easy to do, but when a validator needs to catch up this is far from trivial as this data +cannot be retrieved from the blockchain. + +This RFC presents and compares the different options to address this problem, which have been proposed +in several discussions by the Tendermint Core team. + +## Document Structure + +The RFC is structured as follows. In the [Background](#background) section, +subsections [Problem Description](#problem-description) and [Cases to Address](#cases-to-address) +explain the problem at hand from a high level perspective, i.e., abstracting away from the current +Tendermint implementation. In contrast, subsection +[Current Catch-up Mechanisms](#current-catch-up-mechanisms) delves into the details of the current +Tendermint code. + +In the [Discussion](#discussion) section, subsection [Solutions Proposed](#solutions-proposed) is also +worded abstracting away from implementation details, whilst subsections +[Feasibility of the Proposed Solutions](#feasibility-of-the-proposed-solutions) and +[Current Limitations and Possible Implementations](#current-limitations-and-possible-implementations) +analize the viability of one of the proposed solutions in the context of Tendermint's architecture +based on reactors. Finally, [Formalization Work](#formalization-work) briefly discusses the work +still needed demonstrate the correctness of the chosen solution. + +The high level subsections are aimed at readers who are familiar with consensus algorithms, in +particular with the one described in the Tendermint (white paper), but who are not necessarily +acquainted with the details of the Tendermint codebase. The other subsections, which go into +implementation details, are best understood by engineers with deep knowledge of the implementation of +Tendermint's blocksync and consensus reactors. + +## Background + +### Basic Definitions + +This document assumes that all validators have equal voting power for the sake of simplicity. This is done +without loss of generality. + +There are two types of votes in Tendermint: *prevotes* and *precommits*. Votes can be `nil` or refer to +a proposed block. This RFC focuses on precommits, +also known as *precommit votes*. In this document we sometimes call them simply *votes*. + +Validators send precommit votes to their peer nodes in *precommit messages*. According to the +[ABCI++ specification](https://github.com/tendermint/tendermint/blob/4743a7ad0/spec/abci%2B%2B/README.md), +a precommit message MUST also contain a *vote extension*. +This mandatory vote extension can be empty, but MUST be signed with the same key as the precommit +vote (i.e., the sending validator's). +Nevertheless, the vote extension is signed independently from the vote, so a vote can be separated from +its extension. +The reason for vote extensions to be mandatory in precommit messages is that, otherwise, a (malicious) +node can omit a vote extension while still providing/forwarding/sending the corresponding precommit vote. + +The validator set at height *h* is denoted *valseth*. A *commit* for height *h* consists of more +than *2nh/3* precommit votes voting for a block *b*, where *nh* denotes the size of +*valseth*. A commit does not contain `nil` precommit votes, and all votes in it refer to the +same block. An *extended commit* is a *commit* where every precommit vote has its respective vote extension +attached. + +### Problem Description + +In the version of [ABCI](https://github.com/tendermint/spec/blob/4fb99af/spec/abci/README.md) present up to +Tendermint v0.35, for any height *h*, a validator *v* MUST have the decided block *b* and a commit for +height *h* in order to decide at height *h*. Then, *v* just needs a commit for height *h* to propose at +height *h+1*, in the rounds of *h+1* where *v* is a proposer. + +In [ABCI++](https://github.com/tendermint/tendermint/blob/4743a7ad0/spec/abci%2B%2B/README.md), +the information that a validator *v* MUST have to be able to decide in *h* does not change with +respect to pre-existing ABCI: the decided block *b* and a commit for *h*. +In contrast, for proposing in *h+1*, a commit for *h* is not enough: *v* MUST now have an extended +commit. + +When a validator takes an active part in consensus at height *h*, it has all the data it needs in memory, +in its consensus state, to decide on *h* and propose in *h+1*. Things are not so easy in the cases when +*v* cannot take part in consensus because it is late (e.g., it falls behind, it crashes +and recovers, or it just starts after the others). If *v* does not take part, it cannot actively +gather precommit messages (which include vote extensions) in order to decide. +Before ABCI++, this was not a problem: full nodes are supposed to persist past blocks in the block store, +so other nodes would realise that *v* is late and send it the missing decided block at height *h* and +the corresponding commit (kept in block *h+1*) so that *v* can catch up. +However, we cannot apply this catch-up technique for ABCI++, as the vote extensions, which are part +of the needed *extended commit* are not part of the blockchain. + +### Cases to Address + +Before we tackle the description of the possible cases we need to address, let us describe the following +incremental improvement to the ABCI++ logic. Upon decision, a full node persists (e.g., in the block +store) the extended commit that allowed the node to decide. For the moment, let us assume the node only +needs to keep its *most recent* extended commit, and MAY remove any older extended commits from persistent +storage. +This improvement is so obvious that all solutions described in the [Discussion](#discussion) section use +it as a building block. Moreover, it completely addresses by itself some of the cases described in this +subsection. + +We now describe the cases (i.e. possible *runs* of the system) that have been raised in different +discussions and need to be addressed. They are (roughly) ordered from easiest to hardest to deal with. + +- **(a)** *Happy path: all validators advance together, no crash*. + + This case is included for completeness. All validators have taken part in height *h*. + Even if some of them did not manage to send a precommit message for the decided block, they all + receive enough precommit messages to be able to decide. As vote extensions are mandatory in + precommit messages, every validator *v* trivially has all the information, namely the decided block + and the extended commit, needed to propose in height *h+1* for the rounds in which *v* is the + proposer. + + No problem to solve here. + +- **(b)** *All validators advance together, then all crash at the same height*. + + This case has been raised in some discussions, the main concern being whether the vote extensions + for the previous height would be lost across the network. With the improvement described above, + namely persisting the latest extended commit at decision time, this case is solved. + When a crashed validator recovers, it recovers the last extended commit from persistent storage + and handshakes with the Application. + If need be, it also reconstructs messages for the unfinished height + (including all precommits received) from the WAL. + Then, the validator can resume where it was at the time of the crash. Thus, as extensions are + persisted, either in the WAL (in the form of received precommit messages), or in the latest + extended commit, the only way that vote extensions needed to start the next height could be lost + forever would be if all validators crashed and never recovered (e.g. disk corruption). + Since a *correct* node MUST eventually recover, this violates Tendermint's assumption of more than + *2nh/3* correct validators for every height *h*. + + No problem to solve here. + +- **(c)** *Lagging majority*. + + Let us assume the validator set does not change between *h* and *h+1*. + It is not possible by the nature of the Tendermint algorithm, which requires more + than *2nh/3* precommit votes for some round of height *h* in order to make progress. + So, only up to *nh/3* validators can lag behind. + + On the other hand, for the case where there are changes to the validator set between *h* and + *h+1* please see case (d) below, where the extreme case is discussed. + +- **(d)** *Validator set changes completely between* h *and* h+1. + + If sets *valseth* and *valseth+1* are disjoint, + more than *2nh/3* of validators in height *h* should + have actively participated in conensus in *h*. So, as of height *h*, only a minority of validators + in *h* can be lagging behind, although they could all lag behind from *h+1* on, as they are no + longer validators, only full nodes. This situation falls under the assumptions of case (h) below. + + As for validators in *valseth+1*, as they were not validators as of height *h*, they + could all be lagging behind by that time. However, by the time *h* finishes and *h+1* begins, the + chain will halt until more than *2nh+1/3* of them have caught up and started consensus + at height *h+1*. If set *valseth+1* does not change in *h+2* and subsequent + heights, only up to *nh+1/3* validators will be able to lag behind. Thus, we have + converted this case into case (h) below. + +- **(e)** *Enough validators crash to block the rest*. + + In this case, blockchain progress halts, i.e. surviving full nodes keep increasing rounds + indefinitely, until some of the crashed validators are able to recover. + Those validators that recover first will handshake with the Application and recover at the height + they crashed, which is still the same the nodes that did not crash are stuck in, so they don't need + to catch up. + Further, they had persisted the extended commit for the previous height. Nothing to solve. + + For those validators recovering later, we are in case (h) below. + +- **(f)** *Some validators crash, but not enough to block progress*. + + When the correct processes that crashed recover, they handshake with the Application and resume at + the height they were at when they crashed. As the blockchain did not stop making progress, the + recovered processes are likely to have fallen behind with respect to the progressing majority. + + At this point, the recovered processes are in case (h) below. + +- **(g)** *A new full node starts*. + + The reasoning here also applies to the case when more than one full node are starting. + When the full node starts from scratch, it has no state (its current height is 0). Ignoring + statesync for the time being, the node just needs to catch up by applying past blocks one by one + (after verifying them). + + Thus, the node is in case (h) below. + +- **(h)** *Advancing majority, lagging minority* + + In this case, some nodes are late. More precisely, at the present time, a set of full nodes, + denoted *Lhp*, are falling behind + (e.g., temporary disconnection or network partition, memory thrashing, crashes, new nodes) + an arbitrary + number of heights: + between *hs* and *hp*, where *hs < hp*, and + *hp* is the highest height + any correct full node has reached so far. + + The correct full nodes that reached *hp* were able to decide for *hp-1*. + Therefore, less than *nhp-1/3* validators of *hp-1* can be part + of *Lhp*, since enough up-to-date validators needed to actively participate + in consensus for *hp-1*. + + Since, at the present time, + no node in *Lhp* took part in any consensus between + *hs* and *hp-1*, + the reasoning above can be extended to validator set changes between *hs* and + *hp-1*. This results in the following restriction on the full nodes that can be part of *Lhp*. + + - ∀ *h*, where *hs ≤ h < hp*, + | *valseth* ∩ *Lhp* | *< nh/3* + + If this property does not hold for a particular height *h*, where + *hs ≤ h < hp*, Tendermint could not have progressed beyond *h* and + therefore no full node could have reached *hp* (a contradiction). + + These lagging nodes in *Lhp* need to catch up. They have to obtain the + information needed to make + progress from other nodes. For each height *h* between *hs* and *hp-2*, + this includes the decided block for *h*, and the + precommit votes also for *deciding h* (which can be extracted from the block at height *h+1*). + + At a given height *hc* (where possibly *hc << hp*), + a full node in *Lhp* will consider itself *caught up*, based on the + (maybe out of date) information it is getting from its peers. Then, the node needs to be ready to + propose at height *hc+1*, which requires having received the vote extensions for + *hc*. + As the vote extensions are *not* stored in the blocks, and it is difficult to have strong + guarantees on *when* a late node considers itself caught up, providing the late node with the right + vote extensions for the right height poses a problem. + +At this point, we have described and compared all cases raised in discussions leading up to this +RFC. The list above aims at being exhaustive. The analysis of each case included above makes all of +them converge into case (h). + +### Current Catch-up Mechanisms + +We now briefly describe the current catch-up mechanisms in the reactors concerned in Tendermint. + +#### Statesync + +Full nodes optionally run statesync just after starting, when they start from scratch. +If statesync succeeds, an Application snapshot is installed, and Tendermint jumps from height 0 directly +to the height the Application snapshop represents, without applying the block of any previous height. +Some light blocks are received and stored in the block store for running light-client verification of +all the skipped blocks. Light blocks are incomplete blocks, typically containing the header and the +canonical commit but, e.g., no transactions. They are stored in the block store as "signed headers". + +The statesync reactor is not really relevant for solving the problem discussed in this RFC. We will +nevertheless mention it when needed; in particular, to understand some corner cases. + +#### Blocksync + +The blocksync reactor kicks in after start up or recovery (and, optionally, after statesync is done) +and sends the following messages to its peers: + +- `StatusRequest` to query the height its peers are currently at, and +- `BlockRequest`, asking for blocks of heights the local node is missing. + +Using `BlockResponse` messages received from peers, the blocksync reactor validates each received +block using the block of the following height, saves the block in the block store, and sends the +block to the Application for execution. + +If blocksync has validated and applied the block for the height *previous* to the highest seen in +a `StatusResponse` message, or if no progress has been made after a timeout, the node considers +itself as caught up and switches to the consensus reactor. + +#### Consensus Reactor + +The consensus reactor runs the full Tendermint algorithm. For a validator this means it has to +propose blocks, and send/receive prevote/precommit messages, as mandated by Tendermint, before it can +decide and move on to the next height. + +If a full node that is running the consensus reactor falls behind at height *h*, when a peer node +realises this it will retrieve the canonical commit of *h+1* from the block store, and *convert* +it into a set of precommit votes and will send those to the late node. + +## Discussion + +### Solutions Proposed + +These are the solutions proposed in discussions leading up to this RFC. + +- **Solution 0.** *Vote extensions are made **best effort** in the specification*. + + This is the simplest solution, considered as a way to provide vote extensions in a simple enough + way so that it can be part of v0.36. + It consists in changing the specification so as to not *require* that precommit votes used upon + `PrepareProposal` contain their corresponding vote extensions. In other words, we render vote + extensions optional. + There are strong implications stemming from such a relaxation of the original specification. + + - As a vote extension is signed *separately* from the vote it is extending, an intermediate node + can now remove (i.e., censor) vote extensions from precommit messages at will. + - Further, there is no point anymore in the spec requiring the Application to accept a vote extension + passed via `VerifyVoteExtension` to consider a precommit message valid in its entirety. Remember + this behavior of `VerifyVoteExtension` is adding a constraint to Tendermint's conditions for + liveness. + In this situation, it is better and simpler to just drop the vote extension rejected by the + Application via `VerifyVoteExtension`, but still consider the precommit vote itself valid as long + as its signature verifies. + +- **Solution 1.** *Include vote extensions in the blockchain*. + + Another obvious solution, which has somehow been considered in the past, is to include the vote + extensions and their signatures in the blockchain. + The blockchain would thus include the extended commit, rather than a regular commit, as the structure + to be canonicalized in the next block. + With this solution, the current mechanisms implemented both in the blocksync and consensus reactors + would still be correct, as all the information a node needs to catch up, and to start proposing when + it considers itself as caught-up, can now be recovered from past blocks saved in the block store. + + This solution has two main drawbacks. + + - As the block format must change, upgrading a chain requires a hard fork. Furthermore, + all existing light client implementations will stop working until they are upgraded to deal with + the new format (e.g., how certain hashes calculated and/or how certain signatures are checked). + For instance, let us consider IBC, which relies on light clients. An IBC connection between + two chains will be broken if only one chain upgrades. + - The extra information (i.e., the vote extensions) that is now kept in the blockchain is not really + needed *at every height* for a late node to catch up. + - This information is only needed to be able to *propose* at the height the validator considers + itself as caught-up. If a validator is indeed late for height *h*, it is useless (although + correct) for it to call `PrepareProposal`, or `ExtendVote`, since the block is already decided. + - Moreover, some use cases require pretty sizeable vote extensions, which would result in an + important waste of space in the blockchain. + +- **Solution 2.** *Skip* propose *step in Tendermint algorithm*. + + This solution consists in modifying the Tendermint algorithm to skip the *send proposal* step in + heights where the node does not have the required vote extensions to populate the call to + `PrepareProposal`. The main idea behind this is that it should only happen when the validator is late + and, therefore, up-to-date validators have already proposed (and decided) for that height. + A small variation of this solution is, rather than skipping the *send proposal* step, the validator + sends a special *empty* or *bottom* (⊥) proposal to signal other nodes that it is not ready to propose + at (any round of) the current height. + + The appeal of this solution is its simplicity. A possible implementation does not need to extend + the data structures, or change the current catch-up mechanisms implemented in the blocksync or + in the consensus reactor. When we lack the needed information (vote extensions), we simply rely + on another correct validator to propose a valid block in other rounds of the current height. + + However, this solution can be attacked by a byzantine node in the network in the following way. + Let us consider the following scenario: + + - all validators in *valseth* send out precommit messages, with vote extensions, + for height *h*, round 0, roughly at the same time, + - all those precommit messages contain non-`nil` precommit votes, which vote for block *b* + - all those precommit messages sent in height *h*, round 0, and all messages sent in + height *h*, round *r > 0* get delayed indefinitely, so, + - all validators in *valseth* keep waiting for enough precommit + messages for height *h*, round 0, needed for deciding in height *h* + - an intermediate (malicious) full node *m* manages to receive block *b*, and gather more than + *2nh/3* precommit messages for height *h*, round 0, + - one way or another, the solution should have either (a) a mechanism for a full node to *tell* + another full node it is late, or (b) a mechanism for a full node to conclude it is late based + on other full nodes' messages; any of these mechanisms should, at the very least, + require the late node receiving the decided block and a commit (not necessarily an extended + commit) for *h*, + - node *m* uses the gathered precommit messages to build a commit for height *h*, round 0, + - in order to convince full nodes that they are late, node *m* either (a) *tells* them they + are late, or (b) shows them it (i.e. *m*) is ahead, by sending them block *b*, along with the + commit for height *h*, round 0, + - all full nodes conclude they are late from *m*'s behavior, and use block *b* and the commit for + height *h*, round 0, to decide on height *h*, and proceed to height *h+1*. + + At this point, *all* full nodes, including all validators in *valseth+1*, have advanced + to height *h+1* believing they are late, and so, expecting the *hypothetical* leading majority of + validators in *valseth+1* to propose for *h+1*. As a result, the blockhain + grinds to a halt. + A (rather complex) ad-hoc mechanism would need to be carried out by node operators to roll + back all validators to the precommit step of height *h*, round *r*, so that they can regenerate + vote extensions (remember vote extensions are non-deterministic) and continue execution. + +- **Solution 3.** *Require extended commits to be available at switching time*. + + This one is more involved than all previous solutions, and builds on an idea present in Solution 2: + vote extensions are actually not needed for Tendermint to make progress as long as the + validator is *certain* it is late. + + We define two modes. The first is denoted *catch-up mode*, and Tendermint only calls + `FinalizeBlock` for each height when in this mode. The second is denoted *consensus mode*, in + which the validator considers itself up to date and fully participates in consensus and calls + `PrepareProposal`/`ProcessProposal`, `ExtendVote`, and `VerifyVoteExtension`, before calling + `FinalizeBlock`. + + The catch-up mode does not need vote extension information to make progress, as all it needs is the + decided block at each height to call `FinalizeBlock` and keep the state-machine replication making + progress. The consensus mode, on the other hand, does need vote extension information when + starting every height. + + Validators are in consensus mode by default. When a validator in consensus mode falls behind + for whatever reason, e.g. cases (b), (d), (e), (f), (g), or (h) above, we introduce the following + key safety property: + + - for every height *hp*, a full node *f* in *hp* refuses to switch to catch-up + mode **until** there exists a height *h'* such that: + - *p* has received and (light-client) verified the blocks of + all heights *h*, where *hp ≤ h ≤ h'* + - it has received an extended commit for *h'* and has verified: + - the precommit vote signatures in the extended commit + - the vote extension signatures in the extended commit: each is signed with the same + key as the precommit vote it extends + + If the condition above holds for *hp*, namely receiving a valid sequence of blocks in + the *f*'s future, and an extended commit corresponding to the last block in the sequence, then + node *f*: + + - switches to catch-up mode, + - applies all blocks between *hp* and *h'* (calling `FinalizeBlock` only), and + - switches back to consensus mode using the extended commit for *h'* to propose in the rounds of + *h' + 1* where it is the proposer. + + This mechanism, together with the invariant it uses, ensures that the node cannot be attacked by + being fed a block without extensions to make it believe it is late, in a similar way as explained + for Solution 2. + +### Feasibility of the Proposed Solutions + +Solution 0, besides the drawbacks described in the previous section, provides guarantees that are +weaker than the rest. The Application does not have the assurance that more than *2nh/3* vote +extensions will *always* be available when calling `PrepareProposal` at height *h+1*. +This level of guarantees is probably not strong enough for vote extensions to be useful for some +important use cases that motivated them in the first place, e.g., encrypted mempool transactions. + +Solution 1, while being simple in that the changes needed in the current Tendermint codebase would +be rather small, is changing the block format, and would therefore require all blockchains using +Tendermint v0.35 or earlier to hard-fork when upgrading to v0.36. + +Since Solution 2 can be attacked, one might prefer Solution 3, even if it is more involved +to implement. Further, we must elaborate on how we can turn Solution 3, described in abstract +terms in the previous section, into a concrete implementation compatible with the current +Tendermint codebase. + +### Current Limitations and Possible Implementations + +The main limitations affecting the current version of Tendermint are the following. + +- The current version of the blocksync reactor does not use the full + [light client verification](https://github.com/tendermint/tendermint/blob/4743a7ad0/spec/light-client/README.md) + algorithm to validate blocks coming from other peers. +- The code being structured into the blocksync and consensus reactors, only switching from the + blocksync reactor to the consensus reactor is supported; switching in the opposite direction is + not supported. Alternatively, the consensus reactor could have a mechanism allowing a late node + to catch up by skipping calls to `PrepareProposal`/`ProcessProposal`, and + `ExtendVote`/`VerifyVoteExtension` and only calling `FinalizeBlock` for each height. + Such a mechanism does not exist at the time of writing this RFC. + +The blocksync reactor featuring light client verification is being actively worked on (tentatively +for v0.37). So it is best if this RFC does not try to delve into that problem, but just makes sure +its outcomes are compatible with that effort. + +In subsection [Cases to Address](#cases-to-address), we concluded that we can focus on +solving case (h) in theoretical terms. +However, as the current Tendermint version does not yet support switching back to blocksync once a +node has switched to consensus, we need to split case (h) into two cases. When a full node needs to +catch up... + +- **(h.1)** ... it has not switched yet from the blocksync reactor to the consensus reactor, or + +- **(h.2)** ... it has already switched to the consensus reactor. + +This is important in order to discuss the different possible implementations. + +#### Base Implementation: Persist and Propagate Extended Commit History + +In order to circumvent the fact that we cannot switch from the consensus reactor back to blocksync, +rather than just keeping the few most recent extended commits, nodes will need to keep +and gossip a backlog of extended commits so that the consensus reactor can still propose and decide +in out-of-date heights (even if those proposals will be useless). + +The base implementation - for which an experimental patch exists - consists in the conservative +approach of persisting in the block store *all* extended commits for which we have also stored +the full block. Currently, when statesync is run at startup, it saves light blocks. +This base implementation does not seek +to receive or persist extended commits for those light blocks as they would not be of any use. + +Then, we modify the blocksync reactor so that peers *always* send requested full blocks together +with the corresponding extended commit in the `BlockResponse` messages. This guarantees that the +block store being reconstructed by blocksync has the same information as that of peers that are +up to date (at least starting from the latest snapshot applied by statesync before starting blocksync). +Thus, blocksync has all the data it requires to switch to the consensus reactor, as long as one of +the following exit conditions are met: + +- The node is still at height 0 (where no commit or extended commit is needed) +- The node has processed at least 1 block in blocksync + +The second condition is needed in case the node has installed an Application snapshot during statesync. +If that is the case, at the time blocksync starts, the block store only has the data statesync has saved: +light blocks, and no extended commits. +Hence we need to blocksync at least one block from another node, which will be sent with its corresponding extended commit, before we can switch to consensus. + +As a side note, a chain might be started at a height *hi > 0*, all other heights +*h < hi* being non-existent. In this case, the chain is still considered to be at height 0 before +block *hi* is applied, so the first condition above allows the node to switch to consensus even +if blocksync has not processed any block (which is always the case if all nodes are starting from scratch). + +When a validator falls behind while having already switched to the consensus reactor, a peer node can +simply retrieve the extended commit for the required height from the block store and reconstruct a set of +precommit votes together with their extensions and send them in the form of precommit messages to the +validator falling behind, regardless of whether the peer node holds the extended commit because it +actually participated in that consensus and thus received the precommit messages, or it received the extended commit via a `BlockResponse` message while running blocksync. + +This solution requires a few changes to the consensus reactor: + +- upon saving the block for a given height in the block store at decision time, save the + corresponding extended commit as well +- in the catch-up mechanism, when a node realizes that another peer is more than 2 heights + behind, it uses the extended commit (rather than the canoncial commit as done previously) to + reconstruct the precommit votes with their corresponding extensions + +The changes to the blocksync reactor are more substantial: + +- the `BlockResponse` message is extended to include the extended commit of the same height as + the block included in the response (just as they are stored in the block store) +- structure `bpRequester` is likewise extended to hold the received extended commits coming in + `BlockResponse` messages +- method `PeekTwoBlocks` is modified to also return the extended commit corresponding to the first block +- when successfully verifying a received block, the reactor saves its corresponding extended commit in + the block store + +The two main drawbacks of this base implementation are: + +- the increased size taken by the block store, in particular with big extensions +- the increased bandwith taken by the new format of `BlockResponse` + +#### Possible Optimization: Pruning the Extended Commit History + +If we cannot switch from the consensus reactor back to the blocksync reactor we cannot prune the extended commit backlog in the block store without sacrificing the implementation's correctness. The asynchronous +nature of our distributed system model allows a process to fall behing an arbitrary number of +heights, and thus all extended commits need to be kept *just in case* a node that late had +previously switched to the consensus reactor. + +However, there is a possibility to optimize the base implementation. Every time we enter a new height, +we could prune from the block store all extended commits that are more than *d* heights in the past. +Then, we need to handle two new situations, roughly equivalent to cases (h.1) and (h.2) described above. + +- (h.1) A node starts from scratch or recovers after a crash. In thisy case, we need to modify the + blocksync reactor's base implementation. + - when receiving a `BlockResponse` message, it MUST accept that the extended commit set to `nil`, + - when sending a `BlockResponse` message, if the block store contains the extended commit for that + height, it MUST set it in the message, otherwise it sets it to `nil`, + - the exit conditions used for the base implementation are no longer valid; the only reliable exit + condition now consists in making sure that the last block processed by blocksync was received with + the corresponding commit, and not `nil`; this extended commit will allow the node to switch from + the blocksync reactor to the consensus reactor and immediately act as a proposer if required. +- (h.2) A node already running the consensus reactor falls behind beyond *d* heights. In principle, + the node will be stuck forever as no other node can provide the vote extensions it needs to make + progress (they all have pruned the corresponding extended commit). + However we can manually have the node crash and recover as a workaround. This effectively converts + this case into (h.1). + +### Formalization Work + +A formalization work to show or prove the correctness of the different use cases and solutions +presented here (and any other that may be found) needs to be carried out. +A question that needs a precise answer is how many extended commits (one?, two?) a node needs +to keep in persistent memory when implementing Solution 3 described above without Tendermint's +current limitations. +Another important invariant we need to prove formally is that the set of vote extensions +required to make progress will always be held somewhere in the network. + +## References + +- [ABCI++ specification](https://github.com/tendermint/tendermint/blob/4743a7ad0/spec/abci%2B%2B/README.md) +- [ABCI as of v0.35](https://github.com/tendermint/spec/blob/4fb99af/spec/abci/README.md) +- [Vote extensions issue](https://github.com/tendermint/tendermint/issues/8174) +- [Light client verification](https://github.com/tendermint/tendermint/blob/4743a7ad0/spec/light-client/README.md) diff --git a/docs/rfc/rfc-018-bls-agg-exploration.md b/docs/rfc/rfc-018-bls-agg-exploration.md new file mode 100644 index 000000000..70ca171a0 --- /dev/null +++ b/docs/rfc/rfc-018-bls-agg-exploration.md @@ -0,0 +1,555 @@ +# RFC 018: BLS Signature Aggregation Exploration + +## Changelog + +- 01-April-2022: Initial draft (@williambanfield). +- 15-April-2022: Draft complete (@williambanfield). + +## Abstract + +## Background + +### Glossary + +The terms that are attached to these types of cryptographic signing systems +become confusing quickly. Different sources appear to use slightly different +meanings of each term and this can certainly add to the confusion. Below is +a brief glossary that may be helpful in understanding the discussion that follows. + +* **Short Signature**: A signature that does not vary in length with the +number of signers. +* **Multi-Signature**: A signature generated over a single message +where, given the message and signature, a verifier is able to determine that +all parties signed the message. May be short or may vary with the number of signers. +* **Aggregated Signature**: A _short_ signature generated over messages with +possibly different content where, given the messages and signature, a verifier +should be able to determine that all the parties signed the designated messages. +* **Threshold Signature**: A _short_ signature generated from multiple signers +where, given a message and the signature, a verifier is able to determine that +a large enough share of the parties signed the message. The identities of the +parties that contributed to the signature are not revealed. +* **BLS Signature**: An elliptic-curve pairing-based signature system that +has some nice properties for short multi-signatures. May stand for +*Boneh-Lynn-Schacham* or *Barreto-Lynn-Scott* depending on the context. A +BLS signature is type of signature scheme that is distinct from other forms +of elliptic-curve signatures such as ECDSA and EdDSA. +* **Interactive**: Cryptographic scheme where parties need to perform one or +more request-response cycles to produce the cryptographic material. For +example, an interactive signature scheme may require the signer and the +verifier to cooperate to create and/or verify the signature, rather than a +signature being created ahead of time. +* **Non-interactive**: Cryptographic scheme where parties do not need to +perform any request-response cycles to produce the cryptographic material. + +### Brief notes on pairing-based elliptic-curve cryptography + +Pairing-based elliptic-curve cryptography is quite complex and relies on several +types of high-level math. Cryptography, in general, relies on being able to find +problems with an asymmetry between the difficulty of calculating the solution +and verifying that a given solution is correct. + +Pairing-based cryptography works by operating on mathematical functions that +satisfy the property of **bilinear mapping**. This property is satisfied for +functions `e` with values `P`, `Q`, `R` and `S` where `e(P, Q + R) = e(P, Q) * e(P, R)` +and `e(P + S, Q) = e(P, Q) * e(S, Q)`. The most familiar example of this is +exponentiation. Written in common notation, `g^P*(Q+R) = g^(P*Q) * g^(P*R)` for +some value `g`. + +Pairing-based elliptic-curve cryptography creates a bilinear mapping using +elliptic curves over a finite field. With some original curve, you can define two groups, +`G1` and `G2` which are points of the original curve _modulo_ different values. +Finally, you define a third group `Gt`, where points from `G1` and `G2` satisfy +the property of bilinearity with `Gt`. In this scheme, the function `e` takes +as inputs points in `G1` and `G2` and outputs values in `Gt`. Succintly, given +some point `P` in `G1` and some point `Q` in `G1`, `e(P, Q) = C` where `C` is in `Gt`. +You can efficiently compute the mapping of points in `G1` and `G2` into `Gt`, +but you cannot efficiently determine what points were summed and paired to +produce the value in `Gt`. + +Functions are then defined to map digital signatures, messages, and keys into +and out of points of `G1` or `G2` and signature verification is the process +of calculating if a set of values representing a message, public key, and digital +signature produce the same value in `Gt` through `e`. + +Signatures can be created as either points in `G1` with public keys being +created as points in `G2` or vice versa. For the case of BLS12-381, the popular +curve used, points in `G1` are represented with 48 bytes and points in `G2` are +represented with 96 bytes. It is up to the implementer of the cryptosystem to +decide which should be larger, the public keys or the signatures. + +BLS signatures rely on pairing-based elliptic-curve cryptography to produce +various types of signatures. For a more in-depth but still high level discussion +pairing-based elliptic-curve cryptography, see Vitalik Buterin's post on +[Exploring Elliptic Curve Pairings][vitalik-pairing-post]. For much more in +depth discussion, see the specific paper on BLS12-381, [Short signatures from + the Weil Pairing][bls-weil-pairing] and +[Compact Multi-Signatures for Smaller Blockchains][multi-signatures-smaller-blockchains]. + +### Adoption + +BLS signatures have already gained traction within several popular projects. + +* Algorand is working on an implementation. +* [Zcash][zcash-adoption] has adopted BLS12-381 into the protocol. +* [Ethereum 2.0][eth-2-adoption] has adopted BLS12-381 into the protocol. +* [Chia Network][chia-adoption] has adopted BLS for signing blocks. +* [Ostracon][line-ostracon-pr], a fork of Tendermint has adopted BLS for signing blocks. + +### What systems may be affected by adding aggregated signatures? + +#### Gossip + +Gossip could be updated to aggregate vote signatures during a consensus round. +This appears to be of frankly little utility. Creating an aggregated signature +incurs overhead, so frequently re-aggregating may incur a significant +overhead. How costly this is is still subject to further investigation and +performance testing. + +Even if vote signatures were aggregated before gossip, each validator would still +need to receive and verify vote extension data from each (individual) peer validator in +order for consensus to proceed. That displaces any advantage gained by aggregating signatures across the vote message in the presence of vote extensions. + +#### Block Creation + +When creating a block, the proposer may create a small set of short +multi-signatures and attach these to the block instead of including one +signature per validator. + +#### Block Verification + +Currently, we verify each validator signature using the public key associated +with that validator. With signature aggregation, verification of blocks would +not verify many signatures individually, but would instead check the (single) +multi-signature using the public keys stored by the validator. This would also +require a mechanism for indicating which validators are included in the +aggregated signature. + +#### IBC Relaying + +IBC would no longer need to transmit a large set of signatures when +updating state. These state updates do not happen for every IBC packet, only +when changing an IBC light client's view of the counterparty chain's state. +General [IBC packets][ibc-packet] only contain enough information to correctly +route the data to the counterparty chain. + +IBC does persist commit signatures to the chain in these `MsgUpdateClient` +message when updating state. This message would no longer need the full set +of unique signatures and would instead only need one signature for all of the +data in the header. + +Adding BLS signatures would create a new signature type that must be +understood by the IBC module and by the relayers. For some operations, such +as state updates, the set of data written into the chain and received by the +IBC module could be slightly smaller. + +## Discussion + +### What are the proposed benefits to aggregated signatures? + +#### Reduce Block Size + +At the moment, a commit contains a 64-byte (512-bit) signature for each validator +that voted for the block. For the Cosmos Hub, which has 175 validators in the +active set, this amounts to about 11 KiB per block. That gives an upper bound of +around 113 GiB over the lifetime of the chain's 10.12M blocks. (Note, the Hub has +increased the number of validators in the active set over time so the total +signature size over the history of the chain is likely somewhat less than that). + +Signature aggregation would only produce two signatures for the entire block. +One for the yeas and one for the nays. Each BLS aggregated signature is 48 +bytes, per the [IETF standard of BLS signatures][bls-ietf-ecdsa-compare]. +Over the lifetime of the same Cosmos Hub chain, that would amount to about 1 +GB, a savings of 112 GB. While that is a large factor of reduction it's worth +bearing in mind that, at [GCP's cost][gcp-storage-pricing] of $.026 USD per GB, +that is a total savings of around $2.50 per month. + +#### Reduce Signature Creation and Verification Time + +From the [IETF draft standard on BLS Signatures][bls-ietf], BLS signatures can be +created in 370 microseconds and verified in 2700 microseconds. Our current +[Ed25519 implementation][voi-ed25519-perf] was benchmarked locally to take +13.9 microseconds to produce a signature and 2.03 milliseconds to batch verify +128 signatures, which is slightly fewer than the 175 in the Hub. blst, a popular +implementation of BLS signature aggregation was benchmarked to perform verification +on 100 signatures in 1.5 milliseconds [when run locally][blst-verify-bench] +on an 8 thread machine and pre-aggregated public keys. It is worth noting that +the `ed25519` library verification time grew steadily with the number of signatures, +whereas the bls library verification time remains constant. This is because the +number of operations used to verify a signature does not grow at all with the +number of signatures included in the aggregate signature (as long as the signers +signed over the same message data as is the case in Tendermint). + +It is worth noting that this would also represent a _degredation_ in signature +verification time for chains with small validator sets. When batch verifying +only 32 signatures, our ed25519 library takes .57 milliseconds, whereas BLS +would still require the same 1.5 milliseconds. + +For massive validator sets, blst dominates, taking the same 1.5 milliseconds to +check an aggregated signature from 1024 validators versus our ed25519 library's +13.066 milliseconds to batch verify a set of that size. + +#### Reduce Light-Client Verification Time + +The light client aims to be a faster and lighter-weight way to verify that a +block was voted on by a Tendermint network. The light client fetches +Tendermint block headers and commit signatures, performing public key +verification to ensure that the associated validator set signed the block. +Reducing the size of the commit signature would allow the light client to fetch +block data more quickly. + +Additionally, the faster signature verification times of BLS signatures mean +that light client verification would proceed more quickly. + +However, verification of an aggregated signature is all-or-nothing. The verifier +cannot check that some singular signer had a signature included in the block. +Instead, the verifier must use all public keys to check if some signature +was included. This does mean that any light client implementation must always +be able to fetch all public keys for any height instead of potentially being +able to check if some singular validator's key signed the block. + +#### Reduce Gossip Bandwidth + +##### Vote Gossip + +It is possible to aggregate subsets of signatures during voting, so that the +network need not gossip all *n* validator signatures to all *n* validators. +Theoretically, subsets of the signatures could be aggregated during consensus +and vote messages could carry those aggregated signatures. Implementing this +would certainly increase the complexity of the gossip layer but could possibly +reduce the total number of signatures required to be verified by each validator. + +##### Block Gossip + +A reduction in the block size as a result of signature aggregation would +naturally lead to a reduction in the bandwidth required to gossip a block. +Each validator would only send and receive the smaller aggregated signatures +instead of the full list of multi-signatures as we have them now. + +### What are the drawbacks to aggregated signatures? + +#### Heterogeneous key types cannot be aggregated + +Aggregation requires a specific signature algorithm, and our legacy signing schemes +cannot be aggregated. In practice, this means that aggregated signatures could +be created for a subset of validators using BLS signatures, and validators +with other key types (such as Ed25519) would still have to be be separately +propagated in blocks and votes. + +#### Many HSMs do not support aggregated signatures + +**Hardware Signing Modules** (HSM) are a popular way to manage private keys. +They provide additional security for key management and should be used when +possible for storing highly sensitive private key material. + +Below is a list of popular HSMs along with their support for BLS signatures. + +* YubiKey + * [No support][yubi-key-bls-support] +* Amazon Cloud HSM + * [No support][cloud-hsm-support] +* Ledger + * [Lists support for the BLS12-381 curve][ledger-bls-announce] + +I cannot find support listed for Google Cloud, although perhaps it exists. + +## Feasibility of implementation + +This section outlines the various hurdles that would exist to implementing BLS +signature aggregation into Tendermint. It aims to demonstrate that we _could_ +implement BLS signatures but that it would incur risk and require breaking changes for a +reasonably unclear benefit. + +### Can aggregated signatures be added as soft-upgrades? + +In my estimation, yes. With the implementation of proposer-based timestamps, +all validators now produce signatures on only one of two messages: + +1. A [CanonicalVote][canonical-vote-proto] where the BlockID is the hash of the block or +2. A `CanonicalVote` where the `BlockID` is nil. + +The block structure can be updated to perform hashing and validation in a new +way as a soft upgrade. This would look like adding a new section to the [Block.Commit][commit-proto] structure +alongside the current `Commit.Signatures` field. This new field, tentatively named +`AggregatedSignature` would contain the following structure: + +```proto +message AggregatedSignature { + // yeas is a BitArray representing which validators in the active validator + // set issued a 'yea' vote for the block. + tendermint.libs.bits.BitArray yeas = 1; + + // absent is a BitArray representing which validators in the active + // validator set did not issue votes for the block. + tendermint.libs.bits.BitArray absent = 2; + + // yea_signature is an aggregated signature produced from all of the vote + // signatures for the block. + repeated bytes yea_signature = 3; + + // nay_signature is an aggregated signature produced from all of the vote + // signatures from votes for 'nil' for this block. + // nay_signature should be made from all of the validators that were both not + // in the 'yeas' BitArray and not in the 'absent' BitArray. + repeated bytes nay_signature = 4; +} +``` + +Adding this new field as a soft upgrade would mean hashing this data structure +into the blockID along with the old `Commit.Signatures` when both are present +as well as ensuring that the voting power represented in the new +`AggregatedSignature` and `Signatures` field was enough to commit the block +during block validation. One can certainly imagine other possible schemes for +implementing this but the above should serve as a simple enough proof of concept. + +### Implementing vote-time and commit-time signature aggregation separately + +Implementing aggregated BLS signatures as part of the block structure can easily be +achieved without implementing any 'vote-time' signature aggregation. +The block proposer would gather all of the votes, complete with signatures, +as it does now, and produce a set of aggregate signatures from all of the +individual vote signatures. + +Implementing 'vote-time' signature aggregation cannot be achieved without +also implementing commit-time signature aggregation. This is because such +signatures cannot be dis-aggregated into their constituent pieces. Therefore, +in order to implement 'vote-time' signature aggregation, we would need to +either first implement 'commit-time' signature aggregation, or implement both +'vote-time' signature aggregation while also updating the block creation and +verification protocols to allow for aggregated signatures. + +### Updating IBC clients + +In order for IBC clients to function, they must be able to perform light-client +verification of blocks on counterparty chains. Because BLS signatures are not +currently part of light-clients, chains that transmit messages over IBC +cannot update to using BLS signatures without their counterparties first +being upgraded to parse and verify BLS. If chains upgrade without their +counterparties first updating, they will lose the ability to interoperate with +non-updated chains. + +### New attack surfaces + +BLS signatures and signature aggregation comes with a new set of attack surfaces. +Additionally, it's not clear that all possible major attacks are currently known +on the BLS aggregation schemes since new ones have been discovered since the ietf +draft standard was written. The known attacks are manageable and are listed below. +Our implementation would need to prevent against these but this does not appear +to present a significant hurdle to implementation. + +#### Rogue key attack prevention + +Generating an aggregated signature requires guarding against what is called +a [rogue key attack][bls-ietf-terms]. A rogue key attack is one in which a +malicious actor can craft an _aggregate_ key that can produce signatures that +appear to include a signature from a private key that the malicious actor +does not actually know. In Tendermint terms, this would look like a Validator +producing a vote signed by both itself and some other validator where the other +validator did not actually produce the vote itself. + +The main mechanisms for preventing this require that each entity prove that it +can can sign data with just their private key. The options involve either +ensuring that each entity sign a _different_ message when producing every +signature _or_ producing a [proof of possession][bls-ietf-pop] (PoP) when announcing +their key to the network. + +A PoP is a message that demonstrates ownership of a private +key. A simple scheme for PoP is one where the entity announcing +its new public key to the network includes a digital signature over the bytes +of the public key generated using the associated private key. Everyone receiving +the public key and associated proof-of-possession can easily verify the +signature and be sure the entity owns the private key. + +This PoP scheme suits the Tendermint use case quite well since +validator keys change infrequently so the associated PoPs would not be onerous +to produce, verify, and store. Using this scheme allows signature verification +to proceed more quickly, since all signatures are over identical data and +can therefore be checked using an aggregated public key instead of one at a +time, public key by public key. + +#### Summing Zero Attacks + +[Summing zero attacks][summing-zero-paper] are attacks that rely on using the '0' point of an +elliptic curve. For BLS signatures, if the point 0 is chosen as the private +key, then the 0 point will also always be the public key and all signatures +produced by the key will also be the 0 point. This is easy enough to +detect when verifying each signature individually. + +However, because BLS signature aggregation creates an aggregated signature and +an aggregated public key, a set of colluding signers can create a pair or set +of signatures that are non-zero but which aggregate ("sum") to 0. The signatures that sum zero along with the +summed public key of the colluding signers will verify any message. This would +allow the colluding signers to sign any block or message with the same signature. +This would be reasonably easy to detect and create evidence for because, in +all other cases, the same signature should not verify more than message. It's +not exactly clear how such an attack would advantage the colluding validators +because the normal mechanisms of evidence gathering would still detect the +double signing, regardless of the signatures on both blocks being identical. + +### Backwards Compatibility + +Backwards compatibility is an important consideration for signature verification. +Specifically, it is important to consider whether chains using current versions +of IBC would be able to interact with chains adopting BLS. + +Because the `Block` shared by IBC and Tendermint is produced and parsed using +protobuf, new structures can be added to the Block without breaking the +ability of legacy users to parse the new structure. Breaking changes between +current users of IBC and new Tendermint blocks only occur if data that is +relied upon by the current users is no longer included in the current fields. + +For the case of BLS aggregated signatures, a new `AggregatedSignature` field +can therefore be added to the `Commit` field without breaking current users. +Current users will be broken when counterparty chains upgrade to the new version +and _begin using_ BLS signatures. Once counterparty chains begin using BLS +signatures, the BlockID hashes will include hashes of the `AggregatedSignature` +data structure that the legacy users will not be able to compute. Additionally, +the legacy software will not be able to parse and verify the signatures to +ensure that a supermajority of validators from the counterparty chain signed +the block. + +### Library Support + +Libraries for BLS signature creation are limited in number, although active +development appears to be ongoing. Cryptographic algorithms are difficult to +implement correctly and correctness issues are extremely serious and dangerous. +No further exploration of BLS should be undertaken without strong assurance of +a well-tested library with continuing support for creating and verifying BLS +signatures. + +At the moment, there is one candidate, `blst`, that appears to be the most +mature and well vetted. While this library is undergoing continuing auditing +and is supported by funds from the Ethereum foundation, adopting a new cryptographic +library presents some serious risks. Namely, if the support for the library were +to be discontinued, Tendermint may become saddled with the requirement of supporting +a very complex piece of software or force a massive ecosystem-wide migration away +from BLS signatures. + +This is one of the more serious reasons to avoid adopting BLS signatures at this +time. There is no gold standard library. Some projects look promising, but no +project has been formally verified with a long term promise of being supported +well into the future. + +#### Go Standard Library + +The Go Standard library has no implementation of BLS signatures. + +#### BLST + +[blst][blst], or 'blast' is an implementation of BLS signatures written in C +that provides bindings into Go as part of the repository. This library is +actively undergoing formal verification by Galois and previously received an +initial audit by NCC group, a firm I'd never heard of. + +`blst` is [targeted for use in prysm][prysm-blst], the golang implementation of Ethereum 2.0. + +#### Gnark-Crypto + +[Gnark-Crypto][gnark] is a Go-native implementation of elliptic-curve pairing-based +cryptography. It is not audited and is documented as 'as-is', although +development appears to be active so formal verification may be forthcoming. + +#### CIRCL + +[CIRCL][circl] is a go-native implementation of several cryptographic primitives, +bls12-381 among them. The library is written and maintained by Cloudflare and +appears to receive frequent contributions. However, it lists itself as experimental +and urges users to take caution before using it in production. + +### Added complexity to light client verification + +Implementing BLS signature aggregation in Tendermint would pose issues for the +light client. The light client currently validates a subset of the signatures +on a block when performing the verification algorithm. This is no longer possible +with an aggregated signature. Aggregated signature verification is all-or-nothing. +The light client could no longer check that a subset of validators from some +set of validators is represented in the signature. Instead, it would need to create +a new aggregated key with all the stated signers for each height it verified where +the validator set changed. + +This means that the speed advantages gained by using BLS cannot be fully realized +by the light client since the client needs to perform the expensive operation +of re-aggregating the public key. Aggregation is _not_ constant time in the +number of keys and instead grows linearly. When [benchmarked locally][blst-verify-bench-agg], +blst public key aggregation of 128 keys took 2.43 milliseconds. This, along with +the 1.5 milliseconds to verify a signature would raise light client signature +verification time to 3.9 milliseconds, a time above the previously mentioned +batch verification time using our ed25519 library of 2.0 milliseconds. + +Schemes to cache aggregated subsets of keys could certainly cut this time down at the +cost of adding complexity to the light client. + +### Added complexity to evidence handling + +Implementing BLS signature aggregation in Tendermint would add complexity to +the evidence handling within Tendermint. Currently, the light client can submit +evidence of a fork attempt to the chain. This evidence consists of the set of +validators that double-signed, including their public keys, with the conflicting +block. + +We can quickly check that the listed validators double signed by verifying +that each of their signatures are in the submitted conflicting block. A BLS +signature scheme would change this by requiring the light client to submit +the public keys of all of the validators that signed the conflicting block so +that the aggregated signature may be checked against the full signature set. +Again, aggregated signature verification is all-or-nothing, so without all of +the public keys, we cannot verify the signature at all. These keys would be +retrievable. Any party that wanted to create a fork would want to convince a +network that its fork is legitimate, so it would need to gossip the public keys. +This does not hamper the feasibility of implementing BLS signature aggregation +into Tendermint, but does represent yet another piece of added complexity to +the associated protocols. + +## Open Questions + +* *Q*: Can you aggregate Ed25519 signatures in Tendermint? + * There is a suggested scheme in github issue [7892][suggested-ed25519-agg], +but additional rigor would be required to fully verify its correctness. + +## Current Consideration + +Adopting a signature aggregation scheme presents some serious risks and costs +to the Tendermint project. It requires multiple backwards-incompatible changes +to the code, namely a change in the structure of the block and a new backwards-incompatible +signature and key type. It risks adding a new signature type for which new attack +types are still being discovered _and_ for which no industry standard, battle-tested +library yet exists. + +The gains boasted by this new signing scheme are modest: Verification time is +marginally faster and block sizes shrink by a few kilobytes. These are relatively +minor gains in exchange for the complexity of the change and the listed risks of the technology. +We should take a wait-and-see approach to BLS signature aggregation, monitoring +the up-and-coming projects and consider implementing it as the libraries and +standards develop. + +### References + +[line-ostracon-repo]: https://github.com/line/ostracon +[line-ostracon-pr]: https://github.com/line/ostracon/pull/117 +[mit-BLS-lecture]: https://youtu.be/BFwc2XA8rSk?t=2521 +[gcp-storage-pricing]: https://cloud.google.com/storage/pricing#north-america_2 +[yubi-key-bls-support]: https://github.com/Yubico/yubihsm-shell/issues/66 +[cloud-hsm-support]: https://docs.aws.amazon.com/cloudhsm/latest/userguide/pkcs11-key-types.html +[bls-ietf]: https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-04 +[bls-ietf-terms]: https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-04#section-1.3 +[bls-ietf-pop]: https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-04#section-3.3 +[multi-signatures-smaller-blockchains]: https://eprint.iacr.org/2018/483.pdf +[ibc-tendermint]: https://github.com/cosmos/ibc/tree/master/spec/client/ics-007-tendermint-client +[zcash-adoption]: https://github.com/zcash/zcash/issues/2502 +[chia-adoption]: https://github.com/Chia-Network/chia-blockchain#chia-blockchain +[bls-ietf-ecdsa-compare]: https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-bls-signature-04#section-1.1 +[voi-ed25519-perf]: https://github.com/williambanfield/curve25519-voi/blob/benchmark/primitives/ed25519/PERFORMANCE.txt#L79 +[blst-verify-bench]: https://github.com/williambanfield/blst/blame/bench/bindings/go/PERFORMANCE.md#L9 +[blst-verify-bench-agg]: https://github.com/williambanfield/blst/blame/bench/bindings/go/PERFORMANCE.md#L23 +[vitalik-pairing-post]: https://medium.com/@VitalikButerin/exploring-elliptic-curve-pairings-c73c1864e627 +[ledger-bls-announce]: https://www.ledger.com/first-ever-firmware-update-coming-to-the-ledger-nano-x +[commit-proto]: https://github.com/tendermint/tendermint/blob/be7cb50bb3432ee652f88a443e8ee7b8ef7122bc/proto/tendermint/types/types.proto#L121 +[canonical-vote-proto]: https://github.com/tendermint/tendermint/blob/be7cb50bb3432ee652f88a443e8ee7b8ef7122bc/spec/core/encoding.md#L283 +[blst]: https://github.com/supranational/blst +[prysm-blst]: https://github.com/prysmaticlabs/prysm/blob/develop/go.mod#L75 +[gnark]: https://github.com/ConsenSys/gnark-crypto/ +[eth-2-adoption]: https://notes.ethereum.org/@GW1ZUbNKR5iRjjKYx6_dJQ/Skxf3tNcg_ +[bls-weil-pairing]: https://www.iacr.org/archive/asiacrypt2001/22480516.pdf +[summing-zero-paper]: https://eprint.iacr.org/2021/323.pdf +[circl]: https://github.com/cloudflare/circl +[light-client-evidence]: https://github.com/tendermint/tendermint/blob/a6fd1fe20116d4b1f7e819cded81cece8e5c1ac7/types/evidence.go#L245 +[suggested-ed25519-agg]: https://github.com/tendermint/tendermint/issues/7892 diff --git a/docs/rfc/rfc-019-config-version.md b/docs/rfc/rfc-019-config-version.md new file mode 100644 index 000000000..3dfd7840b --- /dev/null +++ b/docs/rfc/rfc-019-config-version.md @@ -0,0 +1,400 @@ +# RFC 019: Configuration File Versioning + +## Changelog + +- 19-Apr-2022: Initial draft (@creachadair) +- 20-Apr-2022: Updates from review feedback (@creachadair) + +## Abstract + +Updating configuration settings is an essential part of upgrading an existing +node to a new version of the Tendermint software. Unfortunately, it is also +currently a very manual process. This document discusses some of the history of +changes to the config format, actions we've taken to improve the tooling for +configuration upgrades, and additional steps we may want to consider. + +## Background + +A Tendermint node reads configuration settings at startup from a TOML formatted +text file, typically named `config.toml`. The contents of this file are defined +by the [`github.com/tendermint/tendermint/config`][config-pkg]. + +Although many settings in this file remain valid from one version of Tendermint +to the next, new versions of Tendermint often add, update, and remove settings. +These changes often require manual intervention by operators who are upgrading +their nodes. + +I propose we should provide better tools and documentation to help operators +make configuration changes correctly during version upgrades. Ideally, as much +as possible of any configuration file update should be automated, and where +that is not possible or practical, we should provide clear, explicit directions +for what steps need to be taken manually. Moreover, when the node discovers +incorrect or invalid configuration, we should improve the diagnostics it emits +so that the operator can quickly and easily find the relevant documentation, +without having to grep through source code. + +## Discussion + +By convention, we are supposed to document required changes to the config file +in the `UPGRADING.md` file for the release that introduces them. Although we +have mostly done this, the level of detail in the upgrading instructions is +often insufficient for an operator to correctly update their file. + +The updates vary widely in complexity: Operators may need to add new required +settings, update obsolete values for existing settings, move or rename existing +settings within the file, or remove obsolete settings (which are thus invalid). +Here are a few examples of each of these cases: + +- **New required settings:** Tendermint v0.35 added a new top-level `mode` + setting that determines whether a node runs as a validator, a full node, or a + seed node. The default value is `"full"`, which means the operator of a + validator must manually add `mode = "validator"` (or set the `--mode` flag on + the command line) for their node to come up in the correct mode. + +- **Updated obsolete values:** Tendermint v0.35 removed support for versions + `"v1"` and `"v2"` of the blocksync (formerly "fastsync") protocol, requiring + any node using either of those values to update to `"v0"`. + +- **Moved/renamed settings:** Version v0.34 moved the top-level `pprof_laddr` + setting under the `[rpc]` section. + + Version v0.35 renamed every setting in the file from `snake_case` to + `kebab-case`, moved the top-level `fast_sync` setting into the `[blocksync]` + section as (itself renamed from `[fastsync]`), and moved all the top-level + `priv-validator-*` settings under a new `[priv-validator]` section with their + prefix trimmed off. + +- **Removed obsolete settings:** Version v0.34 removed the `index_all_keys` and + `index_keys` settings from the `[tx_index]` section; version v0.35 removed + the `wal-dir` setting from the `[mempool]` section, and version v0.36 removed + the `[blocksync]` section entirely. + +While many of these changes are mentioned in the config section of the upgrade +instructions, some are not mentioned at all, or are hidden in other parts of +the doc. For instance, the v0.34 `pprof_laddr` change was documented only as an +RPC flag change. (A savvy reader might realize that the flag `--rpc.pprof_laddr` +implies a corresponding config section, but it omits the related detail that +there was a top-level setting that's been renamed). The lesson here is not +that the docs are bad, but to point out that prose is not the most efficient +format to convey detailed changes like this. The upgrading instructions are +still valuable for the human reader to understand what to expect. + +### Concrete Steps + +As part of the v0.36 development cycle, we spent some time reverse-engineering +the configuration changes since the v0.34 release and built an experimental +command-line tool called [`confix`][confix], whose job it is to automatically +update the settings in a `config.toml` file to the latest version. We also +backported a version of this tool into the v0.35.x branch at release v0.35.4. + +This tool should work fine for configuration files created by Tendermint v0.34 +and later, but does not (yet) know how to handle changes from prior versions of +Tendermint. Part of the difficulty for older versions is simply logistical: To +figure out which changes to apply, we need to understand something about the +version that made the file, as well as the version we're converting it to. + +> **Discussion point:** In the future we might want to consider incorporating +> this into the node CLI directly, but we're keeping it separate for now until +> we can get some feedback from operators. + +For the experiment, we handled this by carefully searching the history of +config format changes for shibboleths to bound the version: For example, the +`[fastsync]` section was added in Tendermint v0.32 and renamed `[blocksync]` in +Tendermint v0.35. So if we see a `[fastsync]` section, we have some confidence +that the file was created by v0.32, v0.33, or v0.34. + +But such signals are delicate: The `[blocksync]` section was removed in v0.36, +so if we do not find `[fastsync]`, we cannot conclude from that alone that the +file is from v0.31 or earlier -- we have to look for corroborating details. +While such "sniffing" tactics are fine for an experiment, they aren't as robust +as we might like. + +This is especially relevant for configuration files that may have already been +manually upgraded across several versions by the time we are asked to update +them again. Another related concern is that we'd like to make sure conversion +is idempotent, so that it would be safe to rerun the tool over an +already-converted file without breaking anything. + +### Config Versioning + +One obvious tactic we could use for future releases is add a version marker to +the config file. This would give tools like `confix` (and the node itself) a +way to calibrate their expectations. Rather than being a version for the file +itself, however, this version marker would indicate which version of Tendermint +is needed to read the file. + +Provisionally, this might look something like: + +```toml +# THe minimum version of Tendermint compatible with the contents of +# this configuration file. +config-version = 'v0.35' +``` + +When initializing a new node, Tendermint would populate this field with its own +version (e.g., `v0.36`). When conducting an upgrade, tools like `confix` can +then use this to decide which conversions are valid, and then update the value +accordingly. After converting a file marked `'v0.35'` to`'v0.37'`, the +conversion tool sets the file's `config-version` to reflect its compatibility. + +> **Discussion point:** This example presumes we would keep config files +> compatible within a given release cycle, e.g., all of v0.36.x. We could also +> use patch numbers here, if we think there's some reason to permit changes +> that would require config file edits at that granularity. I don't think we +> should, but that's a design question to consider. + +Upon seeing an up-to-date version marker, the conversion tool can simply exit +with a diagnostic like "this file is already up-to-date", rather than sniffing +the keyspace and potentially introducing errors. In addition, this would let a +tool detect config files that are _newer_ than the one it understands, and +issue a safe diagnostic rather than doing something wrong. Plus, besides +avoiding potentially unsafe conversions, this would also serve as +human-readable documentation that the file is up-to-date for a given version. + +Adding a config version would not address the problem of how to convert files +created by older versions of Tendermint, but it would at least help us build +more robust config tooling going forward. + +### Stability and Change + +In light of the discussion so far, it is natural to examine why we make so many +changes to the configuration file from one version to the next, and whether we +could reduce friction by being more conservative about what we make +configurable, what config changes we make over time, and how we roll them out. + +Some changes, like renaming everything from snake case to kebab case, are +entirely gratuitous. We could safely agree not to make those kinds of changes. +Apart from that obvious case, however, many other configuration settings +provide value to node operators in cases where there is no simple, universal +setting that matches every application. + +Taking a high-level view, there are several broad reasons why we might want to +make changes to configuration settings: + +- **Lessons learned:** Configuration settings are a good way to try things out + in production, before making more invasive changes to the consensus protocol. + + For example, up until Tendermint v0.35, consensus timeouts were specified as + per-node configuration settings (e.g., `timeout-precommit` et al.). This + allowed operators to tune these values for the needs of their network, but + had the downside that individually-misconfigured nodes could stall consensus. + + Based on that experience, these timeouts have been deprecated in Tendermint + v0.36 and converted to consensus parameters, to be consistent across all + nodes in the network. + +- **Migration & experimentation:** Introducing new features and updating old + features can complicate migration for existing users of the software. + Temporary or "experimental" configuration settings can be a valuable way to + mitigate that friction. + + For example, Tendermint v0.36 introduces a new RPC event subscription + endpoint (see [ADR 075][adr075]) that will eventually replace the existing + webwocket-based interface. To give users time to migrate, v0.36 adds an + `experimental-disable-websocket` setting, defaulted to `false`, that allows + operators to selectively disable the websocket API for testing purposes + during the conversion. This setting is designed to be removed in v0.37, when + the old interface is no longer supported. + +- **Ongoing maintenance:** Sometimes configuration settings become obsolete, + and the cost of removing them trades off against the potential risks of + leaving a non-functional or deprecated knob hooked up indefinitely. + + For example, Tendermint v0.35 deprecated two alternate implementations of the + blocksync protocol, one of which was deleted entirely (`v1`) and one of which + was scheduled for removal (`v2`). The `blocksync.version` setting, which had + been added as a migration aid, became obsolete and needed to be updated. + + Despite our best intentions, sometimes engineering designs do not work out. + It's just as important to leave room to back out of changes we have since + reconsidered, as it is to support migrations forward onto new and improved + code. + +- **Clarity and legibility:** Besides configuring the software, another + important purpose of a config file is to document intent for the humans who + operate and maintain the software. Operators need adjust settings to keep the + node running, and developers need to know what options were in use when + something goes wrong so they can diagnose and fix bugs. The legibility of a + config file as a _human_ artifact is also thus important. + + For example, Tendermint v0.35 moved settings related to validator private + keys from the top-level section of the configuration file to their own + designated `[priv-validator]` section. Although this change did not make any + difference to the meaning of those settings, it made the organization of the + file easier to understand, and allowed the names of the individual settings + to be simplified (e.g., `priv-validator-key-file` became simply `key-file` in + the new section). + + Although such changes are "gratuitous" with respect to the software, there is + often value in making things more legible for the humans. While there is no + simple rule to define the line, the Potter Stewart principle can be used with + due care. + +Keeping these examples in mind, we can and should take reasonable steps to +avoid churn in the configuration file across versions where we can. However, we +must also accept that part of the reason for _having_ a config file is to allow +us flexibility elsewhere in the design. On that basis, we should not attempt +to be too dogmatic about config changes either. Unlike changes in the block +protocol, for example, which affect every user of every network that adopts +them, config changes are relatively self-contained. + +There are few guiding principles I think we can use to strike a sensible +balance: + +1. **No gratuitous changes.** Aesthetic changes that do not enhance legibility, + avert confusion, or clarity documentation, should be entirely avoided. + +2. **Prefer mechanical changes.** Whenever it is practical, change settings in + a way that can be updated by a tool without operator judgement. This implies + finding safe, universal defaults for new settings, and not changing the + default values of existing settings. + + Even if that means we have to make multiple changes (e.g., add a new setting + in the current version, deprecate the old one, and remove the old one in the + next version) it's preferable if we can mechanize each step. + +3. **Clearly signal intent.** When adding temporary or experimental settings, + they should be clearly named and documented as such. Use long names and + suggestive prefixes (e.g., `experimental-*`) so that they stand out when + read in the config file or printed in logs. + + Relatedly, using temporary or experimental settings should cause the + software to emit diagnostic logs at runtime. These log messages should be + easy to grep for, and should contain pointers to more complete documentation + (say, issue numbers or URLs) that the operator can read, as well as a hint + about when the setting is expected to become invalid. For example: + + ``` + WARNING: Websocket RPC access is deprecated and will be removed in + Tendermint v0.37. See https://tinyurl.com/adr075 for more information. + ``` + +4. **Consider both directions.** When adding a configuration setting, take some + time during the implementation process to think about how the setting could + be removed, as well as how it will be rolled out. This applies even for + settings we imagine should be permanent. Experience may cause is to rethink + our original design intent more broadly than we expected. + + This does not mean we have to spend a long time picking nits over the design + of every setting; merely that we should convince ourselves we _could_ undo + it without making too big a mess later. Even a little extra effort up front + can sometimes save a lot. + +## References + +- [Tendermint `config` package][config-pkg] +- [`confix` command-line tool][confix] +- [`condiff` command-line tool][condiff] +- [Configuration update plan][plan] +- [ADR 075: RPC Event Subscription Interface][adr075] + +[config-pkg]: https://godoc.org/github.com/tendermint/tendermint/config +[confix]: https://github.com/tendermint/tendermint/blob/master/scripts/confix +[condiff]: https://github.com/tendermint/tendermint/blob/master/scripts/confix/condiff +[plan]: https://github.com/tendermint/tendermint/blob/master/scripts/confix/plan.go +[testdata]: https://github.com/tendermint/tendermint/blob/master/scripts/confix/testdata +[adr075]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-075-rpc-subscription.md + +## Appendix: Research Notes + +Discovering when various configuration settings were added, updated, and +removed turns out to be surprisingly tedious. To solve this puzzle, we had to +answer the following questions: + +1. What changes were made between v0.x and v0.y? This is further complicated by + cases where we have backported config changes into the middle of an earlier + release cycle (e.g., `psql-conn` from v0.35.x into v0.34.13). + +2. When during the development cycle were those changes made? This allows us to + recognize features that were backported into a previous release. + +3. What were the default values of the changed settings, and did they change at + all during or across the release boundary? + +Each step of the [configuration update plan][plan] is commented with a link to +one or more PRs where that change was made. The sections below discuss how we +found these references. + +### Tracking Changes Across Releases + +To figure out what changed between two releases, we built a tool called +[`condiff`][condiff], which performs a "keyspace" diff of two TOML documents. +This diff respects the structure of the TOML file, but ignores comments, blank +lines, and configuration values, so that we can see what was added and removed. + +To use it, run: + +```shell +go run ./scripts/confix/condiff old.toml new.toml +``` + +This tool works on any TOML documents, but for our purposes we needed +Tendermint `config.toml` files. The easiest way to get these is to build the +node binary for your version of interest, run `tendermint init` on a clean home +directory, and copy the generated config file out. The [`testdata`][testdata] +directory for the `confix` tool has configs generated from the heads of each +release branch from v0.31 through v0.35. + +If you want to reproduce this yourself, it looks something like this: + +```shell +# Example for Tendermint v0.32. +git checkout --track origin/v0.32.x +go get golang.org/x/sys/unix +go mod tidy +make build +rm -fr -- tmhome +./build/tendermint --home=tmhome init +cp tmhome/config/config.toml config-v32.toml +``` + +Be advised that the further back you go, the more idiosyncrasies you will +encounter. For example, Tendermint v0.31 and earlier predate Go modules (v0.31 +used dep), and lack backport branches. And you may need to do some editing of +Makefile rules once you get back into the 20s. + +Note that when diffing config files across the v0.34/v0.35 gap, the swap from +`snake_case` to `kebab-case` makes it look like everything changed. The +`condiff` tool has a `-desnake` flag that normalizes all the keys to kebab case +in both inputs before comparison. + +### Locating Additions and Deletions + +To figure out when a configuration setting was added or removed, your tool of +choice is `git bisect`. The only tricky part is finding the endpoints for the +search. If the transition happened within a release, you can use that +release's backport branch as the endpoint (if it has one, e.g., `v0.35.x`). + +However, the start point can be more problematic. The backport branches are not +ancestors of `master` or of each other, which means you need to find some point +in history _prior_ to the change but still attached to the mainline. For recent +releases there is a dev root (e.g., `v0.35.0-dev`, `v0.34.0-dev1`, etc.). These +are not named consistently, but you can usually grep the output of `git tag` to +find them. + +In the worst case you could try starting from the root commit of the repo, but +that turns out not to work in all cases. We've done some branching shenanigans +over the years that mean the root is not a direct ancestor of all our release +branches. When you find this you will probably swear a lot. I did. + +Once you have a start and end point (say, `v0.35.0-dev` and `master`), you can +bisect in the usual way. I use `git grep` on the `config` directory to check +whether the case I am looking for is present. For example, to find when the +`[fastsync]` section was removed: + +```shell +# Setup: +git checkout master +git bisect start +git bisect bad # it's not present on tip of master. +git bisect good v0.34.0-dev1 # it was present at the start of v0.34. +``` + +```shell +# Now repeat this until it gives you a specific commit: +if git grep -q '\[fastsync\]' config ; then git bisect good ; else git bisect bad ; fi +``` + +The above example finds where a config was removed: To find where a setting was +added, do the same thing except reverse the sense of the test (`if ! git grep -q +...`). diff --git a/docs/rfc/rfc-020-onboarding-projects.rst b/docs/rfc/rfc-020-onboarding-projects.rst new file mode 100644 index 000000000..dc18de65d --- /dev/null +++ b/docs/rfc/rfc-020-onboarding-projects.rst @@ -0,0 +1,240 @@ +======================================= +RFC 020: Tendermint Onboarding Projects +======================================= + +.. contents:: + :backlinks: none + +Changelog +--------- + +- 2022-03-30: Initial draft. (@tychoish) +- 2022-04-25: Imported document to tendermint repository. (@tychoish) + +Overview +-------- + +This document describes a collection of projects that might be good for new +engineers joining the Tendermint Core team. These projects mostly describe +features that we'd be very excited to see land in the code base, but that are +intentionally outside of the critical path of a release on the roadmap, and +have the following properties that we think make good on-boarding projects: + +- require relatively little context for the project or its history beyond a + more isolated area of the code. + +- provide exposure to different areas of the codebase, so new team members + will have reason to explore the code base, build relationships with people + on the team, and gain experience with more than one area of the system. + +- be of moderate size, striking a healthy balance between trivial or + mechanical changes (which provide little insight) and large intractable + changes that require deeper insight than is available during onboarding to + address well. A good size project should have natural touchpoints or + check-ins. + +Projects +-------- + +Before diving into one of these projects, have a conversation about the +project or aspects of Tendermint that you're excited to work on with your +onboarding buddy. This will help make sure that these issues are still +relevant, help you get any context, underatnding known pitfalls, and to +confirm a high level approach or design (if relevant.) On-boarding buddies +should be prepared to do some design work before someone joins the team. + +The descriptions that follow provide some basic background and attempt to +describe the user stories and the potential impact of these project. + +E2E Test Systems +~~~~~~~~~~~~~~~~ + +Tendermint's E2E framework makes it possible to run small test networks with +different Tendermint configurations, and make sure that the system works. The +tests run Tendermint in a separate binary, and the system provides some very +high level protection against making changes that could break Tendermint in +otherwise difficult to detect ways. + +Working on the E2E system is a good place to get introduced to the Tendermint +codebase, particularly for developers who are newer to Go, as the E2E +system (generator, runner, etc.) is distinct from the rest of Tendermint and +comparatively quite small, so it may be easier to begin making changes in this +area. At the same time, because the E2E system exercises *all* of Tendermint, +work in this area is a good way to get introduced to various components of the +system. + +Configurable E2E Workloads +++++++++++++++++++++++++++ + +All E2E tests use the same workload (e.g. generated transactions, submitted to +different nodes in the network,) which has been tuned empirically to provide a +gentle but consistent parallel load that all E2E tests can pass. Ideally, the +workload generator could be configurable to have different shapes of work +(bursty, different transaction sizes, weighted to different nodes, etc.) and +even perhaps further parameterized within a basic shape, which would make it +possible to use our existing test infrastructure to answer different questions +about the performance or capability of the system. + +The work would involve adding a new parameter to the E2E test manifest, and +creating an option (e.g. "legacy") for the current load generation model, +extract configurations options for the current load generation, and then +prototype implementations of alternate load generation, and also run some +preliminary using the tools. + +Byzantine E2E Workloads ++++++++++++++++++++++++ + +There are two main kinds of integration tests in Tendermint: the E2E test +framework, and then a collection of integration tests that masquerade as +unit-tests. While some of this expansion of test scope is (potentially) +inevitable, the masquerading unit tests (e.g ``consensus.byzantine_test.go``) +end up being difficult to understand, difficult to maintain, and unreliable. + +One solution to this, would be to modify the E2E ABCI application to allow it +to inject byzantine behavior, and then have this be a configurable aspect of +a test network to be able to provoke Byzantine behavior in a "real" system and +then observe that evidence is constructed. This would make it possible to +remove the legacy tests entirely once the new tests have proven themselves. + +Abstract Orchestration Framework +++++++++++++++++++++++++++++++++ + +The orchestration of e2e test processes is presently done using docker +compose, which works well, but has proven a bit limiting as all processes need +to run on a single machine, and the log aggregation functions are confusing at +best. + +This project would replace the current orchestration with something more +generic, potentially maintaining the current system, but also allowing the e2e +tests to manage processes using k8s. There are a few "local" k8s frameworks +(e.g. kind and k3s,) which might be able to be useful for our current testing +model, but hopefully, we could use this new implementation with other k8s +systems for more flexible distribute test orchestration. + +Improve Operationalize Experience of ``run-multiple.sh`` +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +The e2e test runner currently runs a single test, and in most cases we manage +the test cases using a shell script that ensure cleanup of entire test +suites. This is a bit difficult to maintain and makes reproduction of test +cases more awkward than it should be. The e2e ``runner`` itself should provide +equivalent functionality to ``run-multiple.sh``: ensure cleanup of test cases, +collect and process output, and be able to manage entire suites of cases. + +It might also be useful to implement an e2e test orchestrator that runs all +tendermint instances in a single process, using "real" networks for faster +feedback and iteration during development. + +In addition to being a bit easier to maintain, having a more capable runner +implementation would make it easier to collect data from test runs, improve +debugability and reporting. + +Fan-Out For CI E2E Tests +++++++++++++++++++++++++ + +While there are some parallelism in the execution of e2e tests, each e2e test +job must build a tendermint e2e image, which takes about 5 minutes of CPU time +per-task, which given the size of each of the runs. + +We'd like to be able to reduce the amount of overhead per-e2e tests while +keeping the cycle time for working with the tests very low, while also +maintaining a reasonable level of test coverage. This is an impossible +tradeoff, in some ways, and the percentage of overhead at the moment is large +enough that we can make some material progress with a moderate amount of time. + +Most of this work has to do with modifying github actions configuration and +e2e artifact (docker) building to reduce redundant work. Eventually, when we +can drop the requirement for CGo storage engines, it will be possible to move +(cross) compile tendermint locally, and then inject the binary into the docker +container, which would reduce a lot of the build-time complexity, although we +can move more in this direction or have runtime flags to disable CGo +dependencies for local development. + +Remove Panics +~~~~~~~~~~~~~ + +There are lots of places in the code base which can panic, and would not be +particularly well handled. While in some cases, panics are the right answer, +in many cases the panics were just added to simplify downstream error +checking, and could easily be converted to errors. + +The `Don't Panic RFC +`_ +covers some of the background and approach. + +While the changes are in this project are relatively rote, this will provide +exposure to lots of different areas of the codebase as well as insight into +how different areas of the codebase interact with eachother, as well as +experience with the test suites and infrastructure. + +Implement more Expressive ABCI Applications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Tendermint maintains two very simple ABCI applications (a KV application used +for basic testing, and slightly more advanced test application used in the +end-to-end tests). Writing an application would provide a new engineer with +useful experiences using Tendermint that mirrors the expierence of downstream +users. + +This is more of an exploratory project, but could include providing common +interfaces on top of Tendermint consensus for other well known protocols or +tools (e.g. ``etcd``) or a DNS server or some other tool. + +Self-Regulating Reactors +~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently reactors (the internal processes that are responsible for the higher +level behavior of Tendermint) can be started and stopped, but have no +provision for being paused. These additional semantics may allow Tendermint to +pause reactors (and avoid processing their messhages, etc.) and allow better +coordination in the future. + +While this is a big project, it's possible to break this apart into many +smaller projects: make p2p channels pauseable, add pause/UN-pause hooks to the +service implementation and machinery, and finally to modify the reactor +implementations to take advantage of these additional semantics + +This project would give an engineer some exposure to the p2p layer of the +code, as well as to various aspects of the reactor implementations. + +Metrics +~~~~~~~ + +Tendermint has a metrics system that is relatively underutilized, and figuring +out ways to capture and organize the metrics to provide value to users might +provide an interesting set of projects for new engineers on Tendermint. + +Convert Logs to Metrics ++++++++++++++++++++++++ + +Because the tendermint logs tend to be quite verbose and not particularly +actionable, most users largely ignore the logging or run at very low +verbosity. While the log statements in the code do describe useful events, +taken as a whole the system is not particularly tractable, and particularly at +the Debug level, not useful. One solution to this problem is to identify log +messages that might be (e.g. increment a counter for certian kinds of errors) + +One approach might be to look at various logging statements, particularly +debug statements or errors that are logged but not returned, and see if +they're convertable to counters or other metrics. + +Expose Metrics to Tests ++++++++++++++++++++++++ + +The existing Tendermint test suites replace the metrics infrastructure with +no-op implementations, which means that tests can neither verify that metrics +are ever recorded, nor can tests use metrics to observe events in the +system. Writing an implementation, for testing, that makes it possible to +record metrics and provides an API for introspecting this data, as well as +potentially writing tests that take advantage of this type, could be useful. + +Logging Metrics ++++++++++++++++ + +In some systems, the logging system itself can provide some interesting +insights for operators: having metrics that track the number of messages at +different levels as well as the total number of messages, can act as a canary +for the system as a whole. + +This should be achievable by adding an interceptor layer within the logging +package itself that can add metrics to the existing system. diff --git a/docs/rfc/rfc-021-socket-protocol.md b/docs/rfc/rfc-021-socket-protocol.md new file mode 100644 index 000000000..74034d20a --- /dev/null +++ b/docs/rfc/rfc-021-socket-protocol.md @@ -0,0 +1,266 @@ +# RFC 021: The Future of the Socket Protocol + +## Changelog + +- 19-May-2022: Initial draft (@creachadair) +- 19-Jul-2022: Converted from ADR to RFC (@creachadair) + +## Abstract + +This RFC captures some technical discussion about the ABCI socket protocol that +was originally documented to solicit an architectural decision. This topic was +not high-enough priority as of this writing to justify making a final decision. + +For that reason, the text of this RFC has the general structure of an ADR, but +should be viewed primarily as a record of the issue for future reference. + +## Background + +The [Application Blockchain Interface (ABCI)][abci] is a client-server protocol +used by the Tendermint consensus engine to communicate with the application on +whose behalf it performs state replication. There are currently three transport +options available for ABCI applications: + +1. **In-process**: Applications written in Go can be linked directly into the + same binary as the consensus node. Such applications use a "local" ABCI + connection, which exposes application methods to the node as direct function + calls. + +2. **Socket protocol**: Out-of-process applications may export the ABCI service + via a custom socket protocol that sends requests and responses over a + Unix-domain or TCP socket connection as length-prefixed protocol buffers. + In Tendermint, this is handled by the [socket client][socket-client]. + +3. **gRPC**: Out-of-process applications may export the ABCI service via gRPC. + In Tendermint, this is handled by the [gRPC client][grpc-client]. + +Both the out-of-process options (2) and (3) have a long history in Tendermint. +The beginnings of the gRPC client were added in [May 2016][abci-start] when +ABCI was still hosted in a separate repository, and the socket client (formerly +called the "remote client") was part of ABCI from its inception in November +2015. + +At that time when ABCI was first being developed, the gRPC project was very new +(it launched Q4 2015) and it was not an obvious choice for use in Tendermint. +It took a while before the language coverage and quality of gRPC reached a +point where it could be a viable solution for out-of-process applications. For +that reason, it made sense for the initial design of ABCI to focus on a custom +protocol for out-of-process applications. + +## Problem Statement + +For practical reasons, ABCI needs an interprocess communication option to +support applications not written in Go. The two practical options are RPC and +FFI, and for operational reasons an RPC mechanism makes more sense. + +The socket protocol has not changed all that substantially since its original +design, and has the advantage of being simple to implement in almost any +reasonable language. However, its simplicity includes some limitations that +have had a negative impact on the stability and performance of out-of-process +applications using it. In particular: + +- The protocol lacks request identifiers, so the client and server must return + responses in strict FIFO order. Even if the client issues requests that have + no dependency on each other, the protocol has no way except order of issue to + map responses to requests. + + This reduces (in some cases substantially) the concurrency an application can + exploit, since the parallelism of requests in flight is gated by the slowest + active request at any moment. There have been complaints from some network + operators on that basis. + +- The protocol lacks method identifiers, so the only way for the client and + server to understand which operation is requested is to dispatch on the type + of the request and response payloads. For responses, this means that [any + error condition is terminal not only to the request, but to the entire ABCI + client](https://github.com/tendermint/tendermint/blob/master/abci/client/socket_client.go#L149). + + The historical intent of terminating for any error seems to have been that + all ABCI errors are unrecoverable and hence protocol fatal + (see [Note 1](#note1)). In practice, however, this greatly complicates + debugging a faulty node, since the only way to respond to errors is to panic + the node which loses valuable context that could have been logged. + +- There are subtle concurrency management dependencies between the client and + the server that are not clearly documented anywhere, and it is very easy for + small changes in both the client and the server to lead to tricky deadlocks, + panics, race conditions, and slowdowns. As a recent example of this, see + https://github.com/tendermint/tendermint/pull/8581. + +These limitations are fixable, but one important question is whether it is +worthwhile to fix them. We can add request and method identifiers, for +example, but doing so would be a breaking change to the protocol requiring +every application using it to update. If applications have to migrate anyway, +the stability and language coverage of gRPC have improved a lot, and today it +is probably simpler to set up and maintain an application using gRPC transport +than to reimplement the Tendermint socket protocol. + +Moreover, gRPC addresses all the above issues out-of-the-box, and requires +(much) less custom code for both the server (i.e., the application) and the +client. The project is well-funded and widely-used, which makes it a safe bet +for a dependency. + +## Decision + +There is a set of related alternatives to consider: + +- Question 1: Designate a single IPC standard for out-of-process applications? + + Claim: We should converge on one (and only one) IPC option for out-of-process + applications. We should choose an option that, after a suitable period of + deprecation for alternatives, will address most or all the highest-impact + uses of Tendermint. Maintaining multiple options increases the surface area + for bugs and vulnerabilities, and we should not have multiple options for + basic interfaces without a clear and well-documented reason. + +- Question 2a: Choose gRPC and deprecate/remove the socket protocol? + + Claim: Maintaining and improving a custom RPC protocol is a substantial + project and not directly relevant to the requirements of consensus. We would + be better served by depending on a well-maintained open-source library like + gRPC. + +- Question 2b: Improve the socket protocol and deprecate/remove gRPC? + + Claim: If we find meaningful advantages to maintaining our own custom RPC + protocol in Tendermint, we should treat it as a first-class project within + the core and invest in making it good enough that we do not require other + options. + +**One important consideration** when discussing these questions is that _any +outcome which includes keeping the socket protocol will have eventual migration +impacts for out-of-process applications_ regardless. To fix the limitations of +the socket protocol as it is currently designed will require making _breaking +changes_ to the protocol. So, while we may put off a migration cost for +out-of-process applications by retaining the socket protocol in the short term, +we will eventually have to pay those costs to fix the problems in its current +design. + +## Detailed Design + +1. If we choose to standardize on gRPC, the main work in Tendermint core will + be removing and cleaning up the code for the socket client and server. + + Besides the code cleanup, we will also need to clearly document a + deprecation schedule, and invest time in making the migration easier for + applications currently using the socket protocol. + + > **Point for discussion:** Migrating from the socket protocol to gRPC + > should mostly be a plumbing change, as long as we do it during a release + > in which we are not making other breaking changes to ABCI. However, the + > effort may be more or less depending on how gRPC integration works in the + > application's implementation language, and would have to be sure networks + > have plenty of time not only to make the change but to verify that it + > preserves the function of the network. + > + > What questions should we be asking node operators and application + > developers to understand the migration costs better? + +2. If we choose to keep only the socket protocol, we will need to follow up + with a more detailed design for extending and upgrading the protocol to fix + the existing performance and operational issues with the protocol. + + Moreover, since the gRPC interface has been around for a long time we will + also need a deprecation plan for it. + +3. If we choose to keep both options, we will still need to do all the work of + (2), but the gRPC implementation should not require any immediate changes. + + +## Alternatives Considered + +- **FFI**. Another approach we could take is to use a C-based FFI interface so + that applications written in other languages are linked directly with the + consensus node, an option currently only available for Go applications. + + An FFI interface is possible for a lot of languages, but FFI support varies + widely in coverage and quality across languages and the points of friction + can be tricky to work around. Moreover, it's much harder to add FFI support + to a language where it's missing after-the-fact for an application developer. + + Although a basic FFI interface is not too difficult on the Go side, the C + shims for an FFI can get complicated if there's a lot of variability in the + runtime environment on the other end. + + If we want to have one answer for non-Go applications, we are better off + picking an IPC-based solution (whether that's gRPC or an extension of our + custom socket protocol or something else). + +## Consequences + +- **Standardize on gRPC** + + - ✅ Addresses existing performance and operational issues. + - ✅ Replaces custom code with a well-maintained widely-used library. + - ✅ Aligns with Cosmos SDK, which already uses gRPC extensively. + - ✅ Aligns with priv validator interface, for which the socket protocol is already deprecated for gRPC. + - ❓ Applications will be hard to implement in a language without gRPC support. + - ⛔ All users of the socket protocol have to migrate to gRPC, and we believe most current out-of-process applications use the socket protocol. + +- **Standardize on socket protocol** + + - ✅ Less immediate impact for existing users (but see below). + - ✅ Simplifies ABCI API surface by removing gRPC. + - ❓ Users of the socket protocol will have a (smaller) migration. + - ❓ Potentially easier to implement for languages that do not have support. + - ⛔ Need to do all the work to fix the socket protocol (which will require existing users to update anyway later). + - ⛔ Ongoing maintenance burden for per-language server implementations. + +- **Keep both options** + + - ✅ Less immediate impact for existing users (but see below). + - ❓ Users of the socket protocol will have a (smaller) migration. + - ⛔ Still need to do all the work to fix the socket protocol (which will require existing users to update anyway later). + - ⛔ Requires ongoing maintenance and support of both gRPC and socket protocol integrations. + + +## References + +- [Application Blockchain Interface (ABCI)][abci] +- [Tendermint ABCI socket client][socket-client] +- [Tendermint ABCI gRPC client][grpc-client] +- [Initial commit of gRPC client][abci-start] + +[abci]: https://github.com/tendermint/spec/tree/master/spec/abci +[socket-client]: https://github.com/tendermint/tendermint/blob/master/abci/client/socket_client.go +[socket-server]: https://github.com/tendermint/tendermint/blob/master/abci/server/socket_server.go +[grpc-client]: https://github.com/tendermint/tendermint/blob/master/abci/client/grpc_client.go +[abci-start]: https://github.com/tendermint/abci/commit/1ab3c747182aaa38418258679c667090c2bb1e0d + +## Notes + +- **Note 1**: The choice to make all ABCI errors protocol-fatal + was intended to avoid the risk that recovering an application error could + cause application state to diverge. Divergence can break consensus, so it's + essential to avoid it. + + This is a sound principle, but conflates protocol errors with "mechanical" + errors such as timeouts, resoures exhaustion, failed connections, and so on. + Because the protocol has no way to distinguish these conditions, the only way + for an application to report an error is to panic or crash. + + Whether a node is running in the same process as the application or as a + separate process, application errors should not be suppressed or hidden. + However, it's important to ensure that errors are handled at a consistent and + well-defined point in the protocol: Having the application panic or crash + rather than reporting an error means the node sees different results + depending on whether the application runs in-process or out-of-process, even + if the application logic is otherwise identical. + +## Appendix: Known Implementations of ABCI Socket Protocol + +This is a list of known implementations of the Tendermint custom socket +protocol. Note that in most cases I have not checked how complete or correct +these implementations are; these are based on search results and a cursory +visual inspection. + +- Tendermint Core (Go): [client][socket-client], [server][socket-server] +- Informal Systems [tendermint-rs](https://github.com/informalsystems/tendermint-rs) (Rust): [client](https://github.com/informalsystems/tendermint-rs/blob/master/abci/src/client.rs), [server](https://github.com/informalsystems/tendermint-rs/blob/master/abci/src/server.rs) +- Tendermint [js-abci](https://github.com/tendermint/js-abci) (JS): [server](https://github.com/tendermint/js-abci/blob/master/src/server.js) +- [Hotmoka](https://github.com/Hotmoka/hotmoka) ABCI (Java): [server](https://github.com/Hotmoka/hotmoka/blob/master/io-hotmoka-tendermint-abci/src/main/java/io/hotmoka/tendermint_abci/Server.java) +- [Tower ABCI](https://github.com/penumbra-zone/tower-abci) (Rust): [server](https://github.com/penumbra-zone/tower-abci/blob/main/src/server.rs) +- [abci-host](https://github.com/datopia/abci-host) (Clojure): [server](https://github.com/datopia/abci-host/blob/master/src/abci/host.clj) +- [abci_server](https://github.com/KrzysiekJ/abci_server) (Erlang): [server](https://github.com/KrzysiekJ/abci_server/blob/master/src/abci_server.erl) +- [py-abci](https://github.com/davebryson/py-abci) (Python): [server](https://github.com/davebryson/py-abci/blob/master/src/abci/server.py) +- [scala-tendermint-server](https://github.com/intechsa/scala-tendermint-server) (Scala): [server](https://github.com/InTechSA/scala-tendermint-server/blob/master/src/main/scala/lu/intech/tendermint/Server.scala) +- [kepler](https://github.com/f-o-a-m/kepler) (Rust): [server](https://github.com/f-o-a-m/kepler/blob/master/hs-abci-server/src/Network/ABCI/Server.hs) diff --git a/docs/rfc/rfc-023-semi-permanent-testnet.md b/docs/rfc/rfc-023-semi-permanent-testnet.md new file mode 100644 index 000000000..ddb31a908 --- /dev/null +++ b/docs/rfc/rfc-023-semi-permanent-testnet.md @@ -0,0 +1,263 @@ +# RFC 023: Semi-permanent Testnet + +## Changelog + +- 2022-07-28: Initial draft (@mark-rushakoff) +- 2022-07-29: Renumber to 023, minor clarifications (@mark-rushakoff) + +## Abstract + +This RFC discusses a long-lived testnet, owned and operated by the Tendermint engineers. +By owning and operating a production-like testnet, +the team who develops Tendermint becomes more capable of discovering bugs that +only arise in production-like environments. +They also build expertise in operating Tendermint; +this will help guide the development of Tendermint towards operator-friendly design. + +The RFC details a rough roadmap towards a semi-permanent testnet, some of the considered tradeoffs, +and the expected outcomes from following this roadmap. + +## Background + +The author's understanding -- which is limited as a new contributor to the Tendermint project -- +is that Tendermint development has been largely treated as a library for other projects to consume. +Of course effort has been spent on unit tests, end-to-end tests, and integration tests. +But whether developing a library or an application, +there is no substitute for putting the software under a production-like load. + +First, there are classes of bugs that are unrealistic to discover in environments +that do not resemble production. +But perhaps more importantly, there are "operational features" that are best designed +by the authors of a given piece of software. +For instance, does the software have sufficient observability built-in? +Are the reported metrics useful? +Are the log messages clear and sufficiently detailed, without being too noisy? + +Furthermore, if the library authors are not only building -- +but also maintaining and operating -- an application built on top of their library, +the authors will have a greatly increased confidence that their library's API +is appropriate for other application authors. + +Once the decision has been made to run and operate a service, +one of the next strategic questions is that of deploying said service. +The author strongly holds the opinion that, when possible, +a continuous delivery model offers the most compelling set of advantages: +- The code on a particular branch (likely `main` or `master`) is exactly what is, + or what will very soon be, running in production +- There are no manual steps involved in deploying -- other than merging your pull request, + which you had to do anyway +- A bug discovered in production can be rapidly confirmed as fixed in production + +In summary, if the tendermint authors build, maintain, and continuously deliver an application +intended to serve as a long-lived testnet, they will be able to state with confidence: +- We operate the software in a production-like environment and we have observed it to be + stable and performant to our requirements +- We have discovered issues in production before any external parties have consumed our software, + and we have addressed said issues +- We have successfully used the observability tooling built into our software + (perhaps in conjunction with other off-the-shelf tooling) + to diagnose and debug issues in production + +## Discussion + +The Discussion Section proposes a variety of aspects of maintaining a testnet for Tendermint. + +### Number of testnets + +There should probably be one testnet per maintained branch of Tendermint, +i.e. one for the `main` branch +and one per `v0.N.x` branch that the authors maintain. + +There may also exist testnets for long-lived feature branches. + +We may eventually discover that there is good reason to run more than one testnet for a branch, +perhaps due to a significant configuration variation. + +### Testnet lifecycle + +The document has used the terms "long-lived" and "semi-permanent" somewhat interchangeably. +The intent of the testnet being discussed in this RFC is to exist indefinitely; +but there is a practical understanding that there will be testnet instances +which will be retired due to a variety of reasons. +For instance, once a release branch is no longer supported, +its corresponding testnet should be torn down. + +In general, new commits to branches with corresponding testnets +should result in an in-place upgrade of all nodes in the testnet +without any data loss and without requiring new configuration. +The mechanism for achieving this is outside the scope of this RFC. + +However, it is also expected that there will be +breaking changes during the development of the `main` branch. +For instance, suppose there is an unreleased feature involving storage on disk, +and the developers need to change the storage format. +It should be at the developers' discretion whether it is feasible and worthwhile +to introduce an intermediate commit that translates the old format to the new format, +or if it would be preferable to just destroy the testnet and start from scratch +without any data in the old format. + +Similarly, if a developer inadvertently pushed a breaking change to an unreleased feature, +they are free to make a judgement call between reverting the change, +adding a commit to allow a forward migration, +or simply forcing the testnet to recreate. + +### Testnet maintenance investment + +While there is certainly engineering effort required to build the tooling and infrastructure +to get the testnets up and running, +the intent is that a running testnet requires no manual upkeep under normal conditions. + +It is expected that a subset of the Tendermint engineers are familiar with and engaged in +writing the software to maintain and build the testnet infrastructure, +but the rest of the team should not need any involvement in authoring that code. + +The testnets should be configured to send notifications for events requiring triage, +such as a chain halt or a node OOMing. +The time investment necessary to address the underlying issues for those kind of events +is unpredictable. + +Aside from triaging exceptional events, an engineer may choose to spend some time +collecting metrics or profiles from testnet nodes to check performance details +before and after a particular change; +or they may inspect logs associated with an expected behavior change. +But during day-to-day work, engineers are not expected to spend any considerable time +directly interacting with the testnets. + +If we discover that there are any routine actions engineers must take against the testnet +that take any substantial focused time, +those actions should be automated to a one-line command as much as is reasonable. + +### Testnet MVP + +The minimum viable testnet meets this set of features: + +- The testnet self-updates following a new commit pushed to Tendermint's `main` branch on GitHub + (there are some omitted steps here, such as CI building appropriate binaries and + somehow notifying the testnet that a new build is available) +- The testnet runs the Tendermint KV store for MVP +- The testnet operators are notified if: + - Any node's process exits for any reason other than a restart for a new binary + - Any node stops updating blocks, and by extension if a chain halt occurs + - No other observability will be considered for MVP +- The testnet has a minimum of 1 full node and 3 validators +- The testnet has a reasonably low, constant throughput of transactions -- say 30 tx/min -- + and the testnet operators are notified if that throughput drops below 75% of target + sustained over 5 minutes +- The testnet only needs to run in a single datacenter/cloud-region for MVP, + i.e. running in multiple datacenters is out of scope for MVP +- The testnet is running directly on VMs or compute instances; + while Kubernetes or other orchestration frameworks may offer many significant advantages, + the Tendermint engineers should not be required to learn those tools in order to + perform basic debugging + +### Testnet medium-term goals + +The medium-term goals are intended to be achievable within the 6-12 month time range +following the launch of MVP. +These goals could realistically be roadmapped following the launch of the MVP testnet. + +- The `main` testnet has more than 20 nodes (completely arbitrary -- 5x more than 1+3 at MVP) +- In addition to the `main` testnet, + there is at least one testnet associated with one release branch +- The testnet no longer is simply running the Tendermint KV store; + now it is built on a more complex, custom application + that deliberately exercises a greater portion of the Tendermint stack +- Each testnet is spread across at least two cloud providers, + in order to communicate over a network more closely resembling use of Tendermint in "real" chains +- The node updates have some "jitter", + with some nodes updating immediately when a new build is available, + and others delaying up to perhaps 30-60 minutes +- The team has published some form of dashboards that have served well for debugging, + which external parties can copy/modify to their needs + - The dashboards must include metrics published by Tendermint nodes; + there should be both OS- or runtime-level metrics such as memory in use, + and application-level metrics related to the underlying blockchain + - "Published" in this context is more in the spirit of "shared with the community", + not "produced a supported open source tool" -- + this could be published to GitHub with a warning that no support is offered, + or it could simply be a blog post detailing what has worked for the Tendermint developers + - The dashboards will likely be implemented on free and open source tooling, + but that is not a hard requirement if paid software is more appropriate +- The team has produced a reference model of a log aggregation stack that external parties can use + - Similar to the "published" dashboards, this only needs to be "shared" rather than "supported" +- Chaos engineering has begun being integrated into the testnets + (this could be periodic CPU limiting or deliberate network interference, etc. + but it probably would not be filesystem corruption) +- Each testnet has at least one node running a build with the Go race detector enabled +- The testnet contains some kind of generalized notification system built in: + - Tendermint code grows "watchdog" systems built in to validate things like + subsystems have not deadlocked; e.g. if the watchdog can't acquire and immediately release + a particular mutex once in every 5-minute period, it is near certain that the target + subsystem has deadlocked, and an alert must be sent to the engineering team. + (Outside of the testnet, the watchdogs could be disabled, or they could panic on failure.) + - The notification system does some deduplication to minimize spam on system failure + +### Testnet long-term vision + +The long-term vision includes goals that are not necessary for short- or medium-term success, +but which would support building an increasingly stable and performant product. +These goals would generally be beyond the one-year plan, +and therefore they would not be part of initial planning. + +- There is a centralized dashboard to get a quick overview of all testnets, + or at least one centralized dashboard per testnet, + showing TBD basic information +- Testnets include cloud spot instances which periodically and abruptly join and leave the network +- The testnets are a heterogeneous mixture of straight VMs and Docker containers, + thereby more closely representing production blockchains +- Testnets have some manner of continuous profiling, + so that we can produce an apples-to-apples comparison of CPU/memory cost of particular operations + +### Testnet non-goals + +There are some things we are explicitly not trying to achieve with long-lived testnets: + +- The Tendermint engineers will NOT be responsible for the testnets' availability + outside of working hours; there will not be any kind of on-call schedule +- As a result of the 8x5 support noted in the previous point, + there will be NO guarantee of uptime or availability for any testnet +- The testnets will NOT be used to gate pull requests; + that responsibility belongs to unit tests, end-to-end tests, and integration tests +- Similarly, the testnet will NOT be used to automate any changes back into Tendermint source code; + we will not automatically create a revert commit due to a failed rollout, for instance +- The testnets are NOT intended to have participation from machines outside of the + Tendermint engineering team's control, as the Tendermint engineers are expected + to have full access to any instance where they may need to debug an issue +- While there will certainly be individuals within the Tendermint engineering team + who will continue to build out their individual "devops" skills to produce + the infrastructure for the testnet, it is NOT a goal that every Tendermint engineer + is even _familiar_ with the tech stack involved, whether it is Ansible, Terraform, + Kubernetes, etc. + As a rule of thumb, all engineers should be able to get shell access on any given instance + and should have access to the instance's logs. + Little if any further operational skills will be expected. +- The testnets are not intended to be _created_ for one-off experiments. + While there is nothing wrong with an engineer directly interacting with a testnet + to try something out, + a testnet comes with a considerable amount of "baggage", so end-to-end or integration tests + are closer to the intent for "trying something to see what happens". + Direct interaction should be limited to standard blockchain operations, + _not_ modifying configuration of nodes. +- Likewise, the purpose of the testnet is not to run specific "tests" per se, + but rather to demonstrate that Tendermint blockchains as a whole are stable + under a production load. + Of course we will inject faults periodically, but the intent is to observe and prove that + the testnet is resilient to those faults. + It would be the responsibility of a lower-level test to demonstrate e.g. + that the network continues when a single validator disappears without warning. +- The testnet descriptions in this document are scoped only to building directly on Tendermint; + integrating with the Cosmos SDK, or any other third-party library, is out of scope + +### Team outcomes as a result of maintaining and operating a testnet + +Finally, this section reiterates what team growth we expect by running semi-permanent testnets. + +- Confidence that Tendermint is stable under a particular production-like load +- Familiarity with typical production behavior of Tendermint, e.g. what the logs look like, + what the memory footprint looks like, and what kind of throughput is reasonable + for a network of a particular size +- Comfort and familiarity in manually inspecting a misbehaving or failing node +- Confidence that Tendermint ships sufficient tooling for external users + to operate their nodes +- Confidence that Tendermint exposes useful metrics, and comfort interpreting those metrics +- Produce useful reference documentation that gives operators confidence to run Tendermint nodes diff --git a/docs/rfc/rfc-template.md b/docs/rfc/rfc-template.md new file mode 100644 index 000000000..b3f404775 --- /dev/null +++ b/docs/rfc/rfc-template.md @@ -0,0 +1,35 @@ +# RFC {RFC-NUMBER}: {TITLE} + +## Changelog + +- {date}: {changelog} + +## Abstract + +> A brief high-level synopsis of the topic of discussion for this RFC, ideally +> just a few sentences. This should help the reader quickly decide whether the +> rest of the discussion is relevant to their interest. + +## Background + +> Any context or orientation needed for a reader to understand and participate +> in the substance of the Discussion. If necessary, this section may include +> links to other documentation or sources rather than restating existing +> material, but should provide enough detail that the reader can tell what they +> need to read to be up-to-date. + +### References + +> Links to external materials needed to follow the discussion may be added here. +> +> In addition, if the discussion in a request for comments leads to any design +> decisions, it may be helpful to add links to the ADR documents here after the +> discussion has settled. + +## Discussion + +> This section contains the core of the discussion. +> +> There is no fixed format for this section, but ideally changes to this +> section should be updated before merging to reflect any discussion that took +> place on the PR that made those changes. diff --git a/docs/tendermint-core/configuration.md b/docs/tendermint-core/configuration.md index 17aabcaef..120716bfd 100644 --- a/docs/tendermint-core/configuration.md +++ b/docs/tendermint-core/configuration.md @@ -324,9 +324,11 @@ temp_dir = "" [fastsync] # Fast Sync version to use: +# +# In v0.37, the v1 and v2 fast sync protocols were deprecated. +# Please use v0 instead. +# # 1) "v0" (default) - the legacy fast sync implementation -# 2) "v1" - refactor of v0 version for better testability -# 2) "v2" - complete redesign of v0, optimized for testability & readability version = "v0" ####################################################### diff --git a/docs/tendermint-core/metrics.md b/docs/tendermint-core/metrics.md index d6c2bd82d..b0f7033ac 100644 --- a/docs/tendermint-core/metrics.md +++ b/docs/tendermint-core/metrics.md @@ -18,38 +18,42 @@ Listen address can be changed in the config file (see The following metrics are available: -| **Name** | **Type** | **Tags** | **Description** | -| -------------------------------------- | --------- | ------------- | ---------------------------------------------------------------------- | -| consensus_height | Gauge | | Height of the chain | -| consensus_validators | Gauge | | Number of validators | -| consensus_validators_power | Gauge | | Total voting power of all validators | -| consensus_validator_power | Gauge | | Voting power of the node if in the validator set | -| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator | -| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator | -| consensus_missing_validators | Gauge | | Number of validators who did not sign | -| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators | -| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign | -| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators | -| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds | -| consensus_rounds | Gauge | | Number of rounds | -| consensus_num_txs | Gauge | | Number of transactions | -| consensus_total_txs | Gauge | | Total number of transactions committed | -| consensus_block_parts | counter | peer_id | number of blockparts transmitted by peer | -| consensus_latest_block_height | gauge | | /status sync_info number | -| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) | -| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) | -| consensus_block_size_bytes | Gauge | | Block size in bytes | -| p2p_peers | Gauge | | Number of peers node's connected to | -| p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer | -| p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer | -| p2p_peer_pending_send_bytes | gauge | peer_id | number of pending bytes to be sent to a given peer | -| p2p_num_txs | gauge | peer_id | number of transactions submitted by each peer_id | -| p2p_pending_send_bytes | gauge | peer_id | amount of data pending to be sent to peer | -| mempool_size | Gauge | | Number of uncommitted transactions | -| mempool_tx_size_bytes | histogram | | transaction sizes in bytes | -| mempool_failed_txs | counter | | number of failed transactions | -| mempool_recheck_times | counter | | number of transactions rechecked in the mempool | -| state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms | +| **Name** | **Type** | **Tags** | **Description** | +|----------------------------------------|-----------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------| +| abci_connection_method_timing_seconds | Histogram | method, type | Timings for each of the ABCI methods | +| consensus_height | Gauge | | Height of the chain | +| consensus_validators | Gauge | | Number of validators | +| consensus_validators_power | Gauge | | Total voting power of all validators | +| consensus_validator_power | Gauge | | Voting power of the node if in the validator set | +| consensus_validator_last_signed_height | Gauge | | Last height the node signed a block, if the node is a validator | +| consensus_validator_missed_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator | +| consensus_missing_validators | Gauge | | Number of validators who did not sign | +| consensus_missing_validators_power | Gauge | | Total voting power of the missing validators | +| consensus_byzantine_validators | Gauge | | Number of validators who tried to double sign | +| consensus_byzantine_validators_power | Gauge | | Total voting power of the byzantine validators | +| consensus_block_interval_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds | +| consensus_rounds | Gauge | | Number of rounds | +| consensus_num_txs | Gauge | | Number of transactions | +| consensus_total_txs | Gauge | | Total number of transactions committed | +| consensus_block_parts | counter | peer_id | number of blockparts transmitted by peer | +| consensus_latest_block_height | gauge | | /status sync_info number | +| consensus_fast_syncing | gauge | | either 0 (not fast syncing) or 1 (syncing) | +| consensus_state_syncing | gauge | | either 0 (not state syncing) or 1 (syncing) | +| consensus_block_size_bytes | Gauge | | Block size in bytes | +| consensus_step_duration | Histogram | step | Histogram of durations for each step in the consensus protocol | +| consensus_round_duration | Histogram | | Histogram of durations for all the rounds that have occurred since the process started | +| consensus_block_gossip_parts_received | Counter | matches_current | Number of block parts received by the node | +| p2p_peers | Gauge | | Number of peers node's connected to | +| p2p_peer_receive_bytes_total | counter | peer_id, chID | number of bytes per channel received from a given peer | +| p2p_peer_send_bytes_total | counter | peer_id, chID | number of bytes per channel sent to a given peer | +| p2p_peer_pending_send_bytes | gauge | peer_id | number of pending bytes to be sent to a given peer | +| p2p_num_txs | gauge | peer_id | number of transactions submitted by each peer_id | +| p2p_pending_send_bytes | gauge | peer_id | amount of data pending to be sent to peer | +| mempool_size | Gauge | | Number of uncommitted transactions | +| mempool_tx_size_bytes | histogram | | transaction sizes in bytes | +| mempool_failed_txs | counter | | number of failed transactions | +| mempool_recheck_times | counter | | number of transactions rechecked in the mempool | +| state_block_processing_time | histogram | | time between BeginBlock and EndBlock in ms | ## Useful queries diff --git a/docs/tendermint-core/running-in-production.md b/docs/tendermint-core/running-in-production.md index 41f40641e..7e7dae549 100644 --- a/docs/tendermint-core/running-in-production.md +++ b/docs/tendermint-core/running-in-production.md @@ -17,7 +17,7 @@ Tendermint keeps multiple distinct databases in the `$TMROOT/data`: - `blockstore.db`: Keeps the entire blockchain - stores blocks, block commits, and block meta data, each indexed by height. Used to sync new peers. -- `evidence.db`: Stores all verified evidence of misbehaviour. +- `evidence.db`: Stores all verified evidence of misbehavior. - `state.db`: Stores the current blockchain state (ie. height, validators, consensus params). Only grows if consensus params or validators change. Also used to temporarily store intermediate results during block processing. diff --git a/evidence/mocks/block_store.go b/evidence/mocks/block_store.go index 5a16afc2a..e61c4e0ae 100644 --- a/evidence/mocks/block_store.go +++ b/evidence/mocks/block_store.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/evidence/services.go b/evidence/services.go index 274433cbe..5c4d2e953 100644 --- a/evidence/services.go +++ b/evidence/services.go @@ -4,7 +4,7 @@ import ( "github.com/tendermint/tendermint/types" ) -//go:generate mockery --case underscore --name BlockStore +//go:generate ../scripts/mockery_generate.sh BlockStore type BlockStore interface { LoadBlockMeta(height int64) *types.BlockMeta diff --git a/go.mod b/go.mod index 15449b403..fea840fa9 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,10 @@ module github.com/tendermint/tendermint -go 1.16 +go 1.18 require ( github.com/BurntSushi/toml v1.2.0 github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d - github.com/Workiva/go-datastructures v1.0.53 github.com/adlio/schema v1.3.3 github.com/btcsuite/btcd v0.22.1 github.com/btcsuite/btcutil v1.0.3-0.20201208143702-a53e38424cce @@ -21,14 +20,13 @@ require ( github.com/golang/protobuf v1.5.2 github.com/google/orderedcode v0.0.1 github.com/gorilla/websocket v1.5.0 - github.com/gotestyourself/gotestyourself v2.2.0+incompatible // indirect github.com/gtank/merlin v0.1.1 github.com/lib/pq v1.10.6 github.com/libp2p/go-buffer-pool v0.1.0 github.com/minio/highwayhash v1.0.2 github.com/ory/dockertest v3.3.5+incompatible github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.12.2 + github.com/prometheus/client_golang v1.13.0 github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 github.com/rs/cors v1.8.2 github.com/sasha-s/go-deadlock v0.2.1-0.20190427202633-1595213edefa @@ -38,16 +36,238 @@ require ( github.com/stretchr/testify v1.8.0 github.com/tendermint/tm-db v0.6.6 golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e - golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 + golang.org/x/net v0.0.0-20220726230323-06994584191e google.golang.org/grpc v1.48.0 - gotest.tools v2.2.0+incompatible // indirect ) require ( - github.com/bufbuild/buf v1.4.0 + github.com/bufbuild/buf v1.7.0 github.com/creachadair/taskgroup v0.3.2 - github.com/golangci/golangci-lint v1.47.2 - github.com/prometheus/common v0.34.0 // indirect + github.com/golangci/golangci-lint v1.48.0 + github.com/pointlander/peg v1.0.1 + github.com/prometheus/common v0.37.0 github.com/syndtr/goleveldb v1.0.1-0.20200815110645-5c35d600f0ca +) + +require ( + github.com/prometheus/client_model v0.2.0 github.com/vektra/mockery/v2 v2.14.0 ) + +require ( + 4d63.com/gochecknoglobals v0.1.0 // indirect + github.com/Antonboom/errname v0.1.7 // indirect + github.com/Antonboom/nilnil v0.1.1 // indirect + github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect + github.com/DataDog/zstd v1.4.1 // indirect + github.com/Djarvur/go-err113 v0.0.0-20210108212216-aea10b59be24 // indirect + github.com/GaijinEntertainment/go-exhaustruct/v2 v2.2.2 // indirect + github.com/Masterminds/semver v1.5.0 // indirect + github.com/Microsoft/go-winio v0.5.2 // indirect + github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect + github.com/OpenPeeDeeP/depguard v1.1.0 // indirect + github.com/alexkohler/prealloc v1.0.0 // indirect + github.com/alingse/asasalint v0.0.11 // indirect + github.com/ashanbrown/forbidigo v1.3.0 // indirect + github.com/ashanbrown/makezero v1.1.1 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/bkielbasa/cyclop v1.2.0 // indirect + github.com/blizzy78/varnamelen v0.8.0 // indirect + github.com/bombsimon/wsl/v3 v3.3.0 // indirect + github.com/breml/bidichk v0.2.3 // indirect + github.com/breml/errchkjson v0.3.0 // indirect + github.com/bufbuild/connect-go v0.2.0 // indirect + github.com/butuzov/ireturn v0.1.1 // indirect + github.com/cespare/xxhash v1.1.0 // indirect + github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/charithe/durationcheck v0.0.9 // indirect + github.com/chavacava/garif v0.0.0-20220316182200-5cad0b5181d4 // indirect + github.com/containerd/containerd v1.6.6 // indirect + github.com/containerd/continuity v0.3.0 // indirect + github.com/containerd/typeurl v1.0.2 // indirect + github.com/cosmos/go-bip39 v0.0.0-20180819234021-555e2067c45d // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/daixiang0/gci v0.6.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/denis-tingaikin/go-header v0.4.3 // indirect + github.com/dgraph-io/badger/v2 v2.2007.2 // indirect + github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de // indirect + github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 // indirect + github.com/docker/distribution v2.8.1+incompatible // indirect + github.com/docker/docker v20.10.17+incompatible // indirect + github.com/docker/go-connections v0.4.0 // indirect + github.com/docker/go-units v0.4.0 // indirect + github.com/dustin/go-humanize v1.0.0 // indirect + github.com/esimonov/ifshort v1.0.4 // indirect + github.com/ettle/strcase v0.1.1 // indirect + github.com/fatih/color v1.13.0 // indirect + github.com/fatih/structtag v1.2.0 // indirect + github.com/firefart/nonamedreturns v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.5.4 // indirect + github.com/fzipp/gocyclo v0.6.0 // indirect + github.com/go-chi/chi/v5 v5.0.7 // indirect + github.com/go-critic/go-critic v0.6.3 // indirect + github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-toolsmith/astcast v1.0.0 // indirect + github.com/go-toolsmith/astcopy v1.0.0 // indirect + github.com/go-toolsmith/astequal v1.0.1 // indirect + github.com/go-toolsmith/astfmt v1.0.0 // indirect + github.com/go-toolsmith/astp v1.0.0 // indirect + github.com/go-toolsmith/strparse v1.0.0 // indirect + github.com/go-toolsmith/typep v1.0.2 // indirect + github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect + github.com/gobwas/glob v0.2.3 // indirect + github.com/gofrs/flock v0.8.1 // indirect + github.com/gofrs/uuid v4.2.0+incompatible // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/snappy v0.0.3 // indirect + github.com/golangci/check v0.0.0-20180506172741-cfe4005ccda2 // indirect + github.com/golangci/dupl v0.0.0-20180902072040-3e9179ac440a // indirect + github.com/golangci/go-misc v0.0.0-20220329215616-d24fe342adfe // indirect + github.com/golangci/gofmt v0.0.0-20190930125516-244bba706f1a // indirect + github.com/golangci/lint-1 v0.0.0-20191013205115-297bf364a8e0 // indirect + github.com/golangci/maligned v0.0.0-20180506175553-b1d89398deca // indirect + github.com/golangci/misspell v0.3.5 // indirect + github.com/golangci/revgrep v0.0.0-20220804021717-745bb2f7c2e6 // indirect + github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4 // indirect + github.com/google/btree v1.0.0 // indirect + github.com/google/go-cmp v0.5.8 // indirect + github.com/gordonklaus/ineffassign v0.0.0-20210914165742-4cc7213b9bc8 // indirect + github.com/gostaticanalysis/analysisutil v0.7.1 // indirect + github.com/gostaticanalysis/comment v1.4.2 // indirect + github.com/gostaticanalysis/forcetypeassert v0.1.0 // indirect + github.com/gostaticanalysis/nilerr v0.1.1 // indirect + github.com/gotestyourself/gotestyourself v2.2.0+incompatible // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/gtank/ristretto255 v0.1.2 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/go-version v1.6.0 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/hexops/gotextdiff v1.0.3 // indirect + github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/jdxcode/netrc v0.0.0-20210204082910-926c7f70242a // indirect + github.com/jgautheron/goconst v1.5.1 // indirect + github.com/jhump/protocompile v0.0.0-20220216033700-d705409f108f // indirect + github.com/jhump/protoreflect v1.12.1-0.20220721211354-060cc04fc18b // indirect + github.com/jingyugao/rowserrcheck v1.1.1 // indirect + github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af // indirect + github.com/jmhodges/levigo v1.0.0 // indirect + github.com/julz/importas v0.1.0 // indirect + github.com/kisielk/errcheck v1.6.2 // indirect + github.com/kisielk/gotool v1.0.0 // indirect + github.com/klauspost/compress v1.15.9 // indirect + github.com/klauspost/pgzip v1.2.5 // indirect + github.com/kulti/thelper v0.6.3 // indirect + github.com/kunwardeep/paralleltest v1.0.6 // indirect + github.com/kyoh86/exportloopref v0.1.8 // indirect + github.com/ldez/gomoddirectives v0.2.3 // indirect + github.com/ldez/tagliatelle v0.3.1 // indirect + github.com/leonklingele/grouper v1.1.0 // indirect + github.com/lufeee/execinquery v1.2.1 // indirect + github.com/magiconair/properties v1.8.6 // indirect + github.com/maratori/testpackage v1.1.0 // indirect + github.com/matoous/godox v0.0.0-20210227103229-6504466cf951 // indirect + github.com/mattn/go-colorable v0.1.12 // indirect + github.com/mattn/go-isatty v0.0.14 // indirect + github.com/mattn/go-runewidth v0.0.9 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect + github.com/mbilski/exhaustivestruct v1.2.0 // indirect + github.com/mgechev/revive v1.2.1 // indirect + github.com/mimoo/StrobeGo v0.0.0-20181016162300-f8f6d4d2b643 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/moby/buildkit v0.10.3 // indirect + github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect + github.com/moricho/tparallel v0.2.1 // indirect + github.com/morikuni/aec v1.0.0 // indirect + github.com/nakabonne/nestif v0.3.1 // indirect + github.com/nbutton23/zxcvbn-go v0.0.0-20210217022336-fa2cb2858354 // indirect + github.com/nishanths/exhaustive v0.8.1 // indirect + github.com/nishanths/predeclared v0.2.2 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect + github.com/opencontainers/runc v1.1.3 // indirect + github.com/pelletier/go-toml v1.9.5 // indirect + github.com/pelletier/go-toml/v2 v2.0.2 // indirect + github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect + github.com/phayes/checkstyle v0.0.0-20170904204023-bfd46e6a821d // indirect + github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect + github.com/pkg/profile v1.6.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pointlander/compress v1.1.1-0.20190518213731-ff44bd196cc3 // indirect + github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4 // indirect + github.com/polyfloyd/go-errorlint v1.0.0 // indirect + github.com/prometheus/procfs v0.8.0 // indirect + github.com/quasilyte/go-ruleguard v0.3.16-0.20220213074421-6aa060fab41a // indirect + github.com/quasilyte/gogrep v0.0.0-20220120141003-628d8b3623b5 // indirect + github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95 // indirect + github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 // indirect + github.com/rs/zerolog v1.27.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/ryancurrah/gomodguard v1.2.4 // indirect + github.com/ryanrolds/sqlclosecheck v0.3.0 // indirect + github.com/sanposhiho/wastedassign/v2 v2.0.6 // indirect + github.com/sashamelentyev/usestdlibvars v1.8.0 // indirect + github.com/securego/gosec/v2 v2.12.0 // indirect + github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c // indirect + github.com/sirupsen/logrus v1.9.0 // indirect + github.com/sivchari/containedctx v1.0.2 // indirect + github.com/sivchari/nosnakecase v1.7.0 // indirect + github.com/sivchari/tenv v1.7.0 // indirect + github.com/sonatard/noctx v0.0.1 // indirect + github.com/sourcegraph/go-diff v0.6.1 // indirect + github.com/spf13/afero v1.8.2 // indirect + github.com/spf13/cast v1.5.0 // indirect + github.com/spf13/jwalterweatherman v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/ssgreg/nlreturn/v2 v2.2.1 // indirect + github.com/stbenjam/no-sprintf-host-port v0.1.1 // indirect + github.com/stretchr/objx v0.4.0 // indirect + github.com/subosito/gotenv v1.4.0 // indirect + github.com/sylvia7788/contextcheck v1.0.4 // indirect + github.com/tdakkota/asciicheck v0.1.1 // indirect + github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c // indirect + github.com/tetafro/godot v1.4.11 // indirect + github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144 // indirect + github.com/tomarrell/wrapcheck/v2 v2.6.2 // indirect + github.com/tommy-muehle/go-mnd/v2 v2.5.0 // indirect + github.com/ultraware/funlen v0.0.3 // indirect + github.com/ultraware/whitespace v0.0.5 // indirect + github.com/uudashr/gocognit v1.0.6 // indirect + github.com/yagipy/maintidx v1.0.0 // indirect + github.com/yeya24/promlinter v0.2.0 // indirect + gitlab.com/bosi/decorder v0.2.3 // indirect + go.etcd.io/bbolt v1.3.6 // indirect + go.opencensus.io v0.23.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.33.0 // indirect + go.opentelemetry.io/otel v1.8.0 // indirect + go.opentelemetry.io/otel/trace v1.8.0 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.8.0 // indirect + go.uber.org/zap v1.21.0 // indirect + golang.org/x/exp/typeparams v0.0.0-20220613132600-b0d781184e0d // indirect + golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect + golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect + golang.org/x/sys v0.0.0-20220727055044-e65921a090b8 // indirect + golang.org/x/term v0.0.0-20220722155259-a9ba230a4035 // indirect + golang.org/x/text v0.3.7 // indirect + golang.org/x/tools v0.1.12 // indirect + google.golang.org/genproto v0.0.0-20220725144611-272f38e5d71b // indirect + google.golang.org/protobuf v1.28.1 // indirect + gopkg.in/ini.v1 v1.66.6 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + gotest.tools v2.2.0+incompatible // indirect + honnef.co/go/tools v0.3.3 // indirect + mvdan.cc/gofumpt v0.3.1 // indirect + mvdan.cc/interfacer v0.0.0-20180901003855-c20040233aed // indirect + mvdan.cc/lint v0.0.0-20170908181259-adc824a0674b // indirect + mvdan.cc/unparam v0.0.0-20220706161116-678bad134442 // indirect +) + +retract ( + [v0.35.0,v0.35.9] // See https://github.com/tendermint/tendermint/discussions/9155 +) diff --git a/go.sum b/go.sum index 8f029d0fc..f6a3856d7 100644 --- a/go.sum +++ b/go.sum @@ -21,33 +21,16 @@ cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHOb cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY= -cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= -cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= -cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= -cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= -cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= -cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= -cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= -cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= -cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= -cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= -cloud.google.com/go v0.98.0/go.mod h1:ua6Ush4NALrHk5QXDWnjvZHN93OuF0HfuEPq9I1X0cM= -cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= -cloud.google.com/go v0.100.2/go.mod h1:4Xra9TjzAeYHrl5+oeLlzbM2k3mjVhZh4UqTZ//w99A= +cloud.google.com/go v0.100.2 h1:t9Iw5QH5v4XtlEQaCtUY7x6sCABps8sW0acw7e2WQ6Y= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v0.1.0/go.mod h1:GAesmwr110a34z04OlxYkATPBEfVhkymfTBXtfbBFow= -cloud.google.com/go/compute v1.3.0/go.mod h1:cCZiE1NHEtai4wiufUhW8I8S1JKkAnhnQJWM7YD99wM= -cloud.google.com/go/compute v1.5.0/go.mod h1:9SMHyhJlzhlkJqrPAc839t2BZFTSk6Jdj6mkzQJeu0M= -cloud.google.com/go/compute v1.6.0/go.mod h1:T29tfhtVbq1wvAPo0E3+7vhgmkOYeXjhFvz/FMzPu0s= -cloud.google.com/go/compute v1.6.1/go.mod h1:g85FgpzFvNULZ+S8AYq87axRKuf2Kh7deLqV/jJ3thU= +cloud.google.com/go/compute v1.6.1 h1:2sMmt8prCn7DPaG4Pmh0N3Inmc8cT8ae5k1M6VJ9Wqc= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -66,14 +49,9 @@ github.com/Antonboom/errname v0.1.7 h1:mBBDKvEYwPl4WFFNwec1CZO096G6vzK9vvDQzAwka github.com/Antonboom/errname v0.1.7/go.mod h1:g0ONh16msHIPgJSGsecu1G/dcF2hlYR/0SddnIAGavU= github.com/Antonboom/nilnil v0.1.1 h1:PHhrh5ANKFWRBh7TdYmyyq2gyT2lotnvFvvFbylF81Q= github.com/Antonboom/nilnil v0.1.1/go.mod h1:L1jBqoWM7AOeTD+tSquifKSesRHs4ZdaxvZR+xdJEaI= -github.com/Azure/azure-sdk-for-go/sdk/azcore v0.19.0/go.mod h1:h6H6c8enJmmocHUbLiiGY6sx7f9i+X3m1CHdd5c6Rdw= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.11.0/go.mod h1:HcM1YX14R7CJcghJGOYCgdezslRSVzqwLf/q+4Y2r/0= -github.com/Azure/azure-sdk-for-go/sdk/internal v0.7.0/go.mod h1:yqy467j36fJxcRV2TzfVZ1pCb5vxm4BtZPUdYWe/Xo8= -github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0= github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= @@ -81,17 +59,12 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d h1:nalkkPQcITbvhmL4+C4cKA87NW0tfm3Kl9VXRoPywFg= github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d/go.mod h1:URdX5+vg25ts3aCh8H5IFZybJYKWhJHYMTnf+ULtoC4= github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60= -github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= -github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM= github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= github.com/Djarvur/go-err113 v0.0.0-20210108212216-aea10b59be24 h1:sHglBQTwgx+rWPdisA5ynNEsoARbiCBOyGcJM4/OzsM= github.com/Djarvur/go-err113 v0.0.0-20210108212216-aea10b59be24/go.mod h1:4UJr5HIiMZrwgkSPdsjy2uOQExX/WEILpIrO9UPGuXs= -github.com/GaijinEntertainment/go-exhaustruct/v2 v2.2.0 h1:V9xVvhKbLt7unNEGAruK1xXglyc668Pq3Xx0MNTNqpo= -github.com/GaijinEntertainment/go-exhaustruct/v2 v2.2.0/go.mod h1:n/vLeA7V+QY84iYAGwMkkUUp9ooeuftMEvaDrSVch+Q= -github.com/HdrHistogram/hdrhistogram-go v1.1.0/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= -github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= -github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= +github.com/GaijinEntertainment/go-exhaustruct/v2 v2.2.2 h1:DGdS4FlsdM6OkluXOhgkvwx05ZjD3Idm9WqtYnOmSuY= +github.com/GaijinEntertainment/go-exhaustruct/v2 v2.2.2/go.mod h1:xj0D2jwLdp6tOKLheyZCsfL0nz8DaicmJxSwj3VcHtY= github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= @@ -106,17 +79,10 @@ github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/OpenPeeDeeP/depguard v1.1.0 h1:pjK9nLPS1FwQYGGpPxoMYpe7qACHOhAWQMQzV71i49o= github.com/OpenPeeDeeP/depguard v1.1.0/go.mod h1:JtAMzWkmFEzDPyAd+W0NHl1lvpQKTvT9jnRVsohBKpc= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE= -github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/Workiva/go-datastructures v1.0.53 h1:J6Y/52yX10Xc5JjXmGtWoSSxs3mZnGSaq37xZZh7Yig= -github.com/Workiva/go-datastructures v1.0.53/go.mod h1:1yZL+zfsztete+ePzZz/Zb1/t5BnDuE2Ya2MMGhzP6A= github.com/adlio/schema v1.3.3 h1:oBJn8I02PyTB466pZO1UZEn1TV5XLlifBSyMrmHl/1I= github.com/adlio/schema v1.3.3/go.mod h1:1EsRssiv9/Ce2CMzq5DoL7RiMshhuigQxrR4DMV9fHg= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= -github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= -github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -124,20 +90,12 @@ github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRF github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/alexkohler/prealloc v1.0.0 h1:Hbq0/3fJPQhNkN0dR95AVrr6R7tou91y0uHG5pOcUuw= github.com/alexkohler/prealloc v1.0.0/go.mod h1:VetnK3dIgFBBKmg0YnD9F9x6Icjd+9cvfHR56wJVlKE= -github.com/alingse/asasalint v0.0.10 h1:qqGPDTV0ff0tWHN/nnIlSdjlU/EwRPaUY4SfpE1rnms= -github.com/alingse/asasalint v0.0.10/go.mod h1:nCaoMhw7a9kSJObvQyVzNTPBDbNpdocqrSP7t/cW5+I= -github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= -github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/alingse/asasalint v0.0.11 h1:SFwnQXJ49Kx/1GghOFz1XGqHYKp21Kq1nHad/0WQRnw= +github.com/alingse/asasalint v0.0.11/go.mod h1:nCaoMhw7a9kSJObvQyVzNTPBDbNpdocqrSP7t/cW5+I= github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/aokoli/goutils v1.0.1/go.mod h1:SijmP0QR8LtwsmDs8Yii5Z/S4trXFGFC2oO5g9DP+DQ= -github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-metrics v0.3.9/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= -github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= -github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/ashanbrown/forbidigo v1.3.0 h1:VkYIwb/xxdireGAdJNZoo24O4lmnEWkactplBlWTShc= github.com/ashanbrown/forbidigo v1.3.0/go.mod h1:vVW7PEdqEFqapJe95xHkTfB1+XvZXBFg8t0sG2FIxmI= github.com/ashanbrown/makezero v1.1.1 h1:iCQ87C0V0vSyO+M9E/FZYbu65auqH0lnsOkf5FcB28s= @@ -145,13 +103,8 @@ github.com/ashanbrown/makezero v1.1.1/go.mod h1:i1bJLCRSCHOcOa9Y6MyF2FTfMZMFdHvx github.com/aws/aws-sdk-go v1.23.20/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.25.37/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.36.30/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= -github.com/aws/aws-sdk-go v1.40.45/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q= -github.com/aws/aws-sdk-go-v2 v1.9.1/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4= -github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.8.1/go.mod h1:CM+19rL1+4dFWnOQKwDc7H1KwXTz+h61oUSHyhV0b3o= -github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= -github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -171,30 +124,25 @@ github.com/btcsuite/btcd v0.20.1-beta/go.mod h1:wVuoA8VJLEcwgqHBwHmzLRazpKxTv13P github.com/btcsuite/btcd v0.22.1 h1:CnwP9LM/M9xuRrGSCGeMVs9iv09uMqwsVX7EeIpgV2c= github.com/btcsuite/btcd v0.22.1/go.mod h1:wqgTSL29+50LRkmOVknEdmt8ZojIzhuWvgu/iptuN7Y= github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1 h1:q0rUy8C/TYNBQS1+CGKw68tLOFYSNEs0TFnxxnS9+4U= -github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= github.com/btcsuite/btclog v0.0.0-20170628155309-84c8d2346e9f/go.mod h1:TdznJufoqS23FtqVCzL0ZqgP5MqXbb4fg/WgDys70nA= github.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= github.com/btcsuite/btcutil v1.0.3-0.20201208143702-a53e38424cce h1:YtWJF7RHm2pYCvA5t0RPmAaLUhREsKuKd+SLhxFbFeQ= github.com/btcsuite/btcutil v1.0.3-0.20201208143702-a53e38424cce/go.mod h1:0DVlHczLPewLcPGEIeUEzfOJhqGPQ0mJJRDBtD307+o= github.com/btcsuite/go-socks v0.0.0-20170105172521-4720035b7bfd/go.mod h1:HHNXQzUsZCxOoE+CPiyCTO6x34Zs86zZUiwtpXoGdtg= github.com/btcsuite/goleveldb v0.0.0-20160330041536-7834afc9e8cd/go.mod h1:F+uVaaLLH7j4eDXPRvw78tMflu7Ie2bzYOH4Y8rRKBY= -github.com/btcsuite/goleveldb v1.0.0/go.mod h1:QiK9vBlgftBg6rWQIj6wFzbPfRjiykIEhBH4obrXJ/I= github.com/btcsuite/snappy-go v0.0.0-20151229074030-0bdef8d06723/go.mod h1:8woku9dyThutzjeg+3xrA5iCpBRH8XEEg3lh6TiUghc= -github.com/btcsuite/snappy-go v1.0.0/go.mod h1:8woku9dyThutzjeg+3xrA5iCpBRH8XEEg3lh6TiUghc= github.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792/go.mod h1:ghJtEyQwv5/p4Mg4C0fgbePVuGr935/5ddU9Z3TmDRY= github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46fmI40EZs= -github.com/bufbuild/buf v1.4.0 h1:GqE3a8CMmcFvWPzuY3Mahf9Kf3S9XgZ/ORpfYFzO+90= -github.com/bufbuild/buf v1.4.0/go.mod h1:mwHG7klTHnX+rM/ym8LXGl7vYpVmnwT96xWoRB4H5QI= +github.com/bufbuild/buf v1.7.0 h1:uWRjhIXcrWkzIkA5TqXGyJbF51VW54QJsQZ3nwaes5Q= +github.com/bufbuild/buf v1.7.0/go.mod h1:Go40fMAF46PnPLC7jJgTQhAI95pmC0+VtxFKVC0qLq0= +github.com/bufbuild/connect-go v0.2.0 h1:WuMI/jLiJIhysHWvLWlxRozV67mGjCOUuDSl/lkDVic= +github.com/bufbuild/connect-go v0.2.0/go.mod h1:4efZ2eXFENwd4p7tuLaL9m0qtTsCOzuBvrohvRGevDM= github.com/butuzov/ireturn v0.1.1 h1:QvrO2QF2+/Cx1WA/vETCIYBKtRjc30vesdoPUNo1EbY= github.com/butuzov/ireturn v0.1.1/go.mod h1:Wh6Zl3IMtTpaIKbmwzqi6olnM9ptYQxxVacMsOEFPoc= -github.com/casbin/casbin/v2 v2.37.0/go.mod h1:vByNa/Fchek0KZUgG5wEsl7iFsiviAYKRtgrQfcJqHg= github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= -github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= -github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -209,24 +157,23 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= -github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= -github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= -github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211130200136-a8f946100490/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= +github.com/containerd/containerd v1.6.6 h1:xJNPhbrmz8xAMDNoVjHy9YHtWwEQNS+CDkcIRh7t8Y0= +github.com/containerd/containerd v1.6.6/go.mod h1:ZoP1geJldzCVY3Tonoz7b1IXk8rIX0Nltt5QE4OMNk0= github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg= github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM= +github.com/containerd/typeurl v1.0.2 h1:Chlt8zIieDbzQFzXzAeBEF92KhExuE4p9p92/QmY7aY= +github.com/containerd/typeurl v1.0.2/go.mod h1:9trJWW2sRlGub4wZJRTW83VtbOLS6hwcDZXTn6oPz9s= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= @@ -239,31 +186,28 @@ github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfc github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cosmos/go-bip39 v0.0.0-20180819234021-555e2067c45d h1:49RLWk1j44Xu4fjHb6JFYmeUnDORVwHNkDxaQ0ctCVU= github.com/cosmos/go-bip39 v0.0.0-20180819234021-555e2067c45d/go.mod h1:tSxLoYXyBmiFeKpvmq4dzayMdCjCnu8uqmCysIGBT2Y= -github.com/cpuguy83/go-md2man v1.0.10 h1:BSKMNlYxDvnunlTymqtgONjNnaRV1sTpcovwwjF22jk= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creachadair/taskgroup v0.3.2 h1:zlfutDS+5XG40AOxcHDSThxKzns8Tnr9jnr6VqkYlkM= github.com/creachadair/taskgroup v0.3.2/go.mod h1:wieWwecHVzsidg2CsUnFinW1faVN4+kq+TDlRJQ0Wbk= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/daixiang0/gci v0.4.3 h1:wf7x0xRjQqTlA2dzHTI0A/xPyp7VcBatBG9nwGatwbQ= -github.com/daixiang0/gci v0.4.3/go.mod h1:EpVfrztufwVgQRXjnX4zuNinEpLj5OmMjtu/+MB0V0c= +github.com/daixiang0/gci v0.6.2 h1:TXCP5RqjE/UupXO+p33MEhqdv7QxjKGw5MVkt9ATiMs= +github.com/daixiang0/gci v0.6.2/go.mod h1:EpVfrztufwVgQRXjnX4zuNinEpLj5OmMjtu/+MB0V0c= github.com/davecgh/go-spew v0.0.0-20161028175848-04cdfd42973b/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/decred/dcrd/lru v1.0.0/go.mod h1:mxKOwFd7lFjN2GZYsiz/ecgqR6kkYAl+0pz0tEMk218= github.com/denis-tingaikin/go-header v0.4.3 h1:tEaZKAlqql6SKCY++utLmkPLd6K8IBM20Ha7UVm+mtU= github.com/denis-tingaikin/go-header v0.4.3/go.mod h1:0wOCWuN71D5qIgE2nz9KrKmuYBAC2Mra5RassOIQ2/c= github.com/denisenkom/go-mssqldb v0.12.0 h1:VtrkII767ttSPNRfFekePK3sctr+joXgO58stqQbtUA= -github.com/denisenkom/go-mssqldb v0.12.0/go.mod h1:iiK0YP1ZeepvmBQk/QpLEhhTNJgfzrpArPY/aFvc9yU= github.com/dgraph-io/badger/v2 v2.2007.2 h1:EjjK0KqwaFMlPin1ajhP943VPENHJdEz1KLIegjaI3k= github.com/dgraph-io/badger/v2 v2.2007.2/go.mod h1:26P/7fbL4kUZVEVKLAKXkBXKOydDmM2p1e+NhhnBCAE= github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de h1:t0UHb5vdojIDUqktM6+xJAfScFBsVpXZmqC9dsgJmeA= @@ -271,11 +215,9 @@ github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de/go.mod h1:KP github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= -github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/cli v20.10.14+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M= -github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68= +github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v20.10.17+incompatible h1:JYCuMrWaVNophQTOrMMoSwudOVEfcegoZZrleKc1xwE= github.com/docker/docker v20.10.17+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= @@ -285,23 +227,16 @@ github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= -github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPOWUZ7hQAEvzN5Pf27BkQQ= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.0.14/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v0.6.2/go.mod h1:2t7qjJNvHPx8IjnBOzl9E9/baC+qXE/TeeyBRzgJDws= github.com/esimonov/ifshort v1.0.4 h1:6SID4yGWfRae/M7hkVDVVyppy8q/v9OuxNdmjLQStBA= github.com/esimonov/ifshort v1.0.4/go.mod h1:Pe8zjlRrJ80+q2CxHLfEOfTwxCZ4O+MuhcHcfgNWTk0= github.com/ettle/strcase v0.1.1 h1:htFueZyVeE1XNnMEfbqp5r67qAN/4r6ya1ysq8Q+Zcw= @@ -313,33 +248,27 @@ github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqL github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 h1:E2s37DuLxFhQDg5gKsWoLBOB0n+ZW8s599zru8FJ2/Y= github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= -github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4= github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/firefart/nonamedreturns v1.0.4 h1:abzI1p7mAEPYuR4A+VLKn4eNDOycjYo2phmY9sfv40Y= github.com/firefart/nonamedreturns v1.0.4/go.mod h1:TDhe/tjI1BXo48CmYbUduTV7BdIga8MAO/xbKdcVsGI= -github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= -github.com/franela/goblin v0.0.0-20210519012713-85d372ac71e2/go.mod h1:VzmDKDJVZI3aJmnRI9VjAn9nJ8qPPsN1fqzr9dqInIo= -github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/frankban/quicktest v1.14.2/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= -github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/fullstorydev/grpcurl v1.6.0/go.mod h1:ZQ+ayqbKMJNhzLmbpCiurTVlaK2M/3nqZCxaQ2Ze/sM= github.com/fzipp/gocyclo v0.6.0 h1:lsblElZG7d3ALtGMx9fmxeTKZaLLpU8mET09yN4BBLo= github.com/fzipp/gocyclo v0.6.0/go.mod h1:rXPyn8fnlpa0R2csP/31uerbiVBugk5whMdlyaLkLoA= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-chi/chi/v5 v5.0.7 h1:rDTPXLDHGATaeHvVlLcR4Qe0zftYethFucbjVQ1PxU8= +github.com/go-chi/chi/v5 v5.0.7/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= github.com/go-critic/go-critic v0.6.3 h1:abibh5XYBTASawfTQ0rA7dVtQT+6KzpGqb/J+DxRDaw= github.com/go-critic/go-critic v0.6.3/go.mod h1:c6b3ZP1MQ7o6lPR7Rv3lEf7pYQUmAcx8ABHgdZCQt/k= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= @@ -358,12 +287,15 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= -github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-redis/redis v6.15.8+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= -github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-toolsmith/astcast v1.0.0 h1:JojxlmI6STnFVG9yOImLeGREv8W2ocNUM+iOhR6jE7g= @@ -385,7 +317,6 @@ github.com/go-toolsmith/typep v1.0.2 h1:8xdsa1+FSIH/RhEkgnD1j2CJOy5mNllW1Q9tRiYw github.com/go-toolsmith/typep v1.0.2/go.mod h1:JSQCQMUPdRlMZFswiq3TGpNp1GMktqkR2Ns5AIQkATU= github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b h1:khEcpUM4yFcxg4/FHQWkvVRmgijNXRfzkIDHh23ggEo= github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b/go.mod h1:aUCEOzzezBEjDBbFBoSiya/gduyIiWYRP6CnSFIV8AM= -github.com/go-zookeeper/zk v1.0.2/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -400,12 +331,8 @@ github.com/gogo/protobuf v1.3.0/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY= -github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang-sql/sqlexp v0.0.0-20170517235910-f1bb20e5a188 h1:+eHOFJl1BaXrQxKX+T06f78590z4qA2ZzBTqahsKSE4= -github.com/golang-sql/sqlexp v0.0.0-20170517235910-f1bb20e5a188/go.mod h1:vXjM/+wXQnTPR4KqTKDgJukSZ6amVRtWMPEjE6sQoK8= -github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -420,8 +347,6 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= -github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -438,10 +363,8 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -453,16 +376,16 @@ github.com/golangci/go-misc v0.0.0-20220329215616-d24fe342adfe h1:6RGUuS7EGotKx6 github.com/golangci/go-misc v0.0.0-20220329215616-d24fe342adfe/go.mod h1:gjqyPShc/m8pEMpk0a3SeagVb0kaqvhscv+i9jI5ZhQ= github.com/golangci/gofmt v0.0.0-20190930125516-244bba706f1a h1:iR3fYXUjHCR97qWS8ch1y9zPNsgXThGwjKPrYfqMPks= github.com/golangci/gofmt v0.0.0-20190930125516-244bba706f1a/go.mod h1:9qCChq59u/eW8im404Q2WWTrnBUQKjpNYKMbU4M7EFU= -github.com/golangci/golangci-lint v1.47.2 h1:qvMDVv49Hrx3PSEXZ0bD/yhwSbhsOihQjFYCKieegIw= -github.com/golangci/golangci-lint v1.47.2/go.mod h1:lpS2pjBZtRyXewUcOY7yUL3K4KfpoWz072yRN8AuhHg= +github.com/golangci/golangci-lint v1.48.0 h1:hRiBNk9iRqdAKMa06ntfEiLyza1/3IE9rHLNJaek4a8= +github.com/golangci/golangci-lint v1.48.0/go.mod h1:5N+oxduCho+7yuccW69upg/O7cxjfR/d+IQeiNxGmKM= github.com/golangci/lint-1 v0.0.0-20191013205115-297bf364a8e0 h1:MfyDlzVjl1hoaPzPD4Gpb/QgoRfSBR0jdhwGyAWwMSA= github.com/golangci/lint-1 v0.0.0-20191013205115-297bf364a8e0/go.mod h1:66R6K6P6VWk9I95jvqGxkqJxVWGFy9XlDwLwVz1RCFg= github.com/golangci/maligned v0.0.0-20180506175553-b1d89398deca h1:kNY3/svz5T29MYHubXix4aDDuE3RWHkPvopM/EDv/MA= github.com/golangci/maligned v0.0.0-20180506175553-b1d89398deca/go.mod h1:tvlJhZqDe4LMs4ZHD0oMUlt9G2LWuDGoisJTBzLMV9o= github.com/golangci/misspell v0.3.5 h1:pLzmVdl3VxTOncgzHcvLOKirdvcx/TydsClUQXTehjo= github.com/golangci/misspell v0.3.5/go.mod h1:dEbvlSfYbMQDtrpRMQU675gSDLDNa8sCPPChZ7PhiVA= -github.com/golangci/revgrep v0.0.0-20210930125155-c22e5001d4f2 h1:SgM7GDZTxtTTQPU84heOxy34iG5Du7F2jcoZnvp+fXI= -github.com/golangci/revgrep v0.0.0-20210930125155-c22e5001d4f2/go.mod h1:LK+zW4MpyytAWQRz0M4xnzEk50lSvqDQKfx304apFkY= +github.com/golangci/revgrep v0.0.0-20220804021717-745bb2f7c2e6 h1:DIPQnGy2Gv2FSA4B/hh8Q7xx3B7AIDk3DAMeHclH1vQ= +github.com/golangci/revgrep v0.0.0-20220804021717-745bb2f7c2e6/go.mod h1:0AKcRCkMoKvUvlf89F6O7H2LYdhr1zBh736mBItOdRs= github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4 h1:zwtduBRr5SSWhqsYNgcuWO2kFlpdOZbP0+yRjmvPGys= github.com/golangci/unconvert v0.0.0-20180507085042-28b1c447d1f4/go.mod h1:Izgrg8RkN3rCIMLGE9CyYmU9pY2Jer6DgANEnZ/L/cQ= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -482,14 +405,12 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= github.com/google/orderedcode v0.0.1 h1:UzfcAexk9Vhv8+9pNOgRu41f16lHq725vPwnSeiG/Us= github.com/google/orderedcode v0.0.1/go.mod h1:iVyU4/qPKHY5h/wSd6rZZCDcLJNxiWO6dvsYES2Sb20= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -503,15 +424,9 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= -github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/trillian v1.3.11/go.mod h1:0tPraVHrSDkA3BO6vKX67zgLXs6SsOAbHEivX+9mPgw= github.com/google/uuid v0.0.0-20161128191214-064e2069ce9c/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -520,20 +435,12 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= -github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM= -github.com/googleapis/gax-go/v2 v2.2.0/go.mod h1:as02EH8zWkzwUoLbBaFeQ+arQaj/OthfcblKl4IGNaM= -github.com/googleapis/gax-go/v2 v2.3.0/go.mod h1:b8LNqSzNabLiUpXKkY7HAR5jr6bIT99EXz9pXxye9YM= -github.com/googleapis/gax-go/v2 v2.4.0/go.mod h1:XOTVJ59hdnfJLIP/dh8n5CGryZR2LxK9wbMD5+iXC6c= github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/gookit/color v1.5.1/go.mod h1:wZFzea4X8qN6vHOSP2apMb4/+w/orMznEzYsIHPaqKM= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gordonklaus/ineffassign v0.0.0-20200309095847-7953dde2c7bf/go.mod h1:cuNKsD1zp2v6XfE/orVX2QE1LC+i254ceGcVeDT3pTU= github.com/gordonklaus/ineffassign v0.0.0-20210914165742-4cc7213b9bc8 h1:PVRE9d4AQKmbelZ7emNig1+NT27DUmKZn5qXxfio54U= github.com/gordonklaus/ineffassign v0.0.0-20210914165742-4cc7213b9bc8/go.mod h1:Qcp2HIAYhR7mNUVSIxZww3Guk4it82ghYcEXIAk+QT0= github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75/go.mod h1:g2644b03hfBX9Ov0ZBDgXXens4rxSxmqFBbhvKv2yVA= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= @@ -542,7 +449,6 @@ github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad github.com/gostaticanalysis/analysisutil v0.0.0-20190318220348-4088753ea4d3/go.mod h1:eEOZF4jCKGi+aprrirO9e7WKB3beBRtWgqGunKl6pKE= github.com/gostaticanalysis/analysisutil v0.0.3/go.mod h1:eEOZF4jCKGi+aprrirO9e7WKB3beBRtWgqGunKl6pKE= github.com/gostaticanalysis/analysisutil v0.1.0/go.mod h1:dMhHRU9KTiDcuLGdy87/2gTR8WruwYZrKdRq9m1O6uw= -github.com/gostaticanalysis/analysisutil v0.4.1/go.mod h1:18U/DLpRgIUd459wGxVHE0fRgmo1UgHDcbw7F5idXu0= github.com/gostaticanalysis/analysisutil v0.7.1 h1:ZMCjoue3DtDWQ5WyU16YbjbQEQ3VuzwxALrpYd+HeKk= github.com/gostaticanalysis/analysisutil v0.7.1/go.mod h1:v21E3hY37WKMGSnbsw2S/ojApNWb6C1//mXO48CXbVc= github.com/gostaticanalysis/comment v1.3.0/go.mod h1:xMicKDx7XRXYdVwY9f9wQpDJVnqWxw9wCauCMKp+IBI= @@ -555,12 +461,13 @@ github.com/gostaticanalysis/nilerr v0.1.1 h1:ThE+hJP0fEp4zWLkWHWcRyI2Od0p7DlgYG3 github.com/gostaticanalysis/nilerr v0.1.1/go.mod h1:wZYb6YI5YAxxq0i1+VJbY0s2YONW0HU0GPE3+5PWN4A= github.com/gostaticanalysis/testutil v0.3.1-0.20210208050101-bfb5c8eec0e4/go.mod h1:D+FIZ+7OahH3ePw/izIEeH5I06eKs1IKI4Xr64/Am3M= github.com/gostaticanalysis/testutil v0.4.0 h1:nhdCmubdmDF6VEatUNjgUZBJKWRqugoISdUv3PPQgHY= -github.com/gostaticanalysis/testutil v0.4.0/go.mod h1:bLIoPefWXrRi/ssLFWX1dx7Repi5x3CuviD3dgAZaBU= github.com/gotestyourself/gotestyourself v2.2.0+incompatible h1:AQwinXlbQR2HvPjQZOmDhRqsv5mZf+Jb1RnSLxcqZcI= github.com/gotestyourself/gotestyourself v2.2.0+incompatible/go.mod h1:zZKM6oeNM8k+FRljX1mnzVYeS8wiGgQyvST1/GafPbY= github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.2.2/go.mod h1:EaizFBKfUKtMIF5iaDEhniwNedqGo9FuLFzppDr3uwI= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.12.1/go.mod h1:8XEsbTttt/W+VvjtQhLACqCisSPWTxCZ7sBRjU6iH9c= @@ -570,32 +477,11 @@ github.com/gtank/merlin v0.1.1 h1:eQ90iG7K9pOhtereWsmyRJ6RAwcP4tHTDBHXNg+u5is= github.com/gtank/merlin v0.1.1/go.mod h1:T86dnYJhcGOh5BjZFCJWTDeTK7XW8uE+E21Cy/bIQ+s= github.com/gtank/ristretto255 v0.1.2 h1:JEqUCPA1NvLq5DwYtuzigd7ss8fwbYay9fi4/5uMzcc= github.com/gtank/ristretto255 v0.1.2/go.mod h1:Ph5OpO6c7xKUGROZfWVLiJf9icMDwUeIvY4OmlYW69o= -github.com/hashicorp/consul/api v1.10.1/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= -github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= -github.com/hashicorp/consul/api v1.12.0/go.mod h1:6pVBMo0ebnYdt2S3H87XhekM/HHrUoTD2XXb/VrZVy0= -github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= -github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v0.12.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-hclog v0.16.2/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-hclog v1.0.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-hclog v1.2.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= -github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= -github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= -github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= -github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= -github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.2.1/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek= github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= @@ -604,35 +490,21 @@ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.1/go.mod h1:4gW7WsVCke5TE7EPeYliwHlRUyBtfCwuFwuMg2DmyNY= -github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= -github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk= -github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= -github.com/hashicorp/serf v0.9.7/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.0.0/go.mod h1:4qWG/gcEcfX4z/mBDHJ++3ReCw9ibxbsNJbcucJdbSo= github.com/huandu/xstrings v1.2.0/go.mod h1:DvyZB1rfVYsBIigL8HwpZgxHwXozlTgGqn63UyNX5k4= -github.com/hudl/fargo v1.4.0/go.mod h1:9Ai6uvFy5fQNq6VPKtg+Ceq1+eTY4nKUlR2JElEOcDo= -github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.4/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/imdario/mergo v0.3.8/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= -github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk= -github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/jdxcode/netrc v0.0.0-20210204082910-926c7f70242a h1:d4+I1YEKVmWZrgkt6jpXBnLgV2ZjO0YxEtLDdfIZfH4= github.com/jdxcode/netrc v0.0.0-20210204082910-926c7f70242a/go.mod h1:Zi/ZFkEqFHTm7qkjyNJjaWH4LQA9LQhGJyF0lTYGpxw= github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= -github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jgautheron/goconst v1.5.1 h1:HxVbL1MhydKs8R8n/HE5NPvzfaYmQJA3o879lE4+WcM= github.com/jgautheron/goconst v1.5.1/go.mod h1:aAosetZ5zaeC/2EfMeRswtxUFBpe2Hr7HzkgX4fanO4= github.com/jhump/gopoet v0.0.0-20190322174617-17282ff210b3/go.mod h1:me9yfT6IJSlOL3FCfrg+L6yzUEZ+5jW6WHt4Sk+UPUI= @@ -642,8 +514,8 @@ github.com/jhump/protocompile v0.0.0-20220216033700-d705409f108f h1:BNuUg9k2EiJm github.com/jhump/protocompile v0.0.0-20220216033700-d705409f108f/go.mod h1:qr2b5kx4HbFS7/g4uYO5qv9ei8303JMsC7ESbYiqr2Q= github.com/jhump/protoreflect v1.6.1/go.mod h1:RZQ/lnuN+zqeRVpQigTwO6o0AJUkxbnSnpuG7toUTG4= github.com/jhump/protoreflect v1.11.0/go.mod h1:U7aMIjN0NWq9swDP7xDdoMfRHb35uiuTd3Z9nFXJf5E= -github.com/jhump/protoreflect v1.12.1-0.20220417024638-438db461d753 h1:uFlcJKZPLQd7rmOY/RrvBuUaYmAFnlFHKLivhO6cOy8= -github.com/jhump/protoreflect v1.12.1-0.20220417024638-438db461d753/go.mod h1:JytZfP5d0r8pVNLZvai7U/MCuTWITgrI4tTg7puQFKI= +github.com/jhump/protoreflect v1.12.1-0.20220721211354-060cc04fc18b h1:izTof8BKh/nE1wrKOrloNA5q4odOarjf+Xpe+4qow98= +github.com/jhump/protoreflect v1.12.1-0.20220721211354-060cc04fc18b/go.mod h1:JytZfP5d0r8pVNLZvai7U/MCuTWITgrI4tTg7puQFKI= github.com/jingyugao/rowserrcheck v1.1.1 h1:zibz55j/MJtLsjP1OF4bSdgXxwL1b+Vn7Tjzq7gFzUs= github.com/jingyugao/rowserrcheck v1.1.1/go.mod h1:4yvlZSDb3IyDTUZJUmpZfm2Hwok+Dtp+nu2qOq+er9c= github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af h1:KA9BjwUk7KlCh6S9EAGWBt1oExIUv9WyNCiRz5amv48= @@ -656,38 +528,31 @@ github.com/jmhodges/levigo v1.0.0/go.mod h1:Q6Qx+uH3RAqyK4rFQroq9RL7mdkABMcfhEI+ github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jonboulle/clockwork v0.2.0/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= -github.com/josharian/txtarfs v0.0.0-20210218200122-0702f000015a/go.mod h1:izVPOvVRsHiKkeGCT6tYBNWyDVuzj9wAaBb5R9qamfw= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/julz/importas v0.1.0 h1:F78HnrsjY3cR7j0etXy5+TU1Zuy7Xt08X/1aJnH5xXY= github.com/julz/importas v0.1.0/go.mod h1:oSFU2R4XK/P7kNBrnL/FEQlDGN1/6WoxXEjSSXO0DV0= -github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/errcheck v1.6.1 h1:cErYo+J4SmEjdXZrVXGwLJCE2sB06s23LpkcyWNrT+s= -github.com/kisielk/errcheck v1.6.1/go.mod h1:nXw/i/MfnvRHqXa7XXmQMUB0oNFGuBrNI8d8NLy0LPw= +github.com/kisielk/errcheck v1.6.2 h1:uGQ9xI8/pgc9iOoCe7kWQgRE6SBTrCGmTSf0LrEtY7c= +github.com/kisielk/errcheck v1.6.2/go.mod h1:nXw/i/MfnvRHqXa7XXmQMUB0oNFGuBrNI8d8NLy0LPw= github.com/kisielk/gotool v1.0.0 h1:AV2c/EiW3KqPNT9ZKl07ehoAGi4C5/01Cfbblndcapg= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4= -github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= -github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A= -github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -696,10 +561,8 @@ github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxv github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -718,21 +581,16 @@ github.com/ldez/tagliatelle v0.3.1/go.mod h1:8s6WJQwEYHbKZDsp/LjArytKOG8qaMrKQQ3 github.com/leonklingele/grouper v1.1.0 h1:tC2y/ygPbMFSBOs3DcyaEMKnnwH7eYKzohOtRrf0SAg= github.com/leonklingele/grouper v1.1.0/go.mod h1:uk3I3uDfi9B6PeUjsCKi6ndcf63Uy7snXgR4yDYQVDY= github.com/letsencrypt/pkcs11key/v4 v4.0.0/go.mod h1:EFUvBDay26dErnNb70Nd0/VW3tJiIbETBPTl9ATXQag= -github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.8.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lib/pq v1.9.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/lib/pq v1.10.4/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lib/pq v1.10.6 h1:jbk+ZieJ0D7EVGJYpL9QTz7/YW6UHbmdnZWYyK5cdBs= github.com/lib/pq v1.10.6/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= github.com/lufeee/execinquery v1.2.1 h1:hf0Ems4SHcUGBxpGN7Jz78z1ppVkP/837ZlETPCEtOM= github.com/lufeee/execinquery v1.2.1/go.mod h1:EC7DrEKView09ocscGHC+apXMIaorh4xqSxS/dy8SbM= -github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= -github.com/lyft/protoc-gen-star v0.5.3/go.mod h1:V0xaHgaf5oCCqmcxYcWiDfTiKsZsRc87/1qhoTACD8w= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/magiconair/properties v1.8.6 h1:5ibWZ6iY0NctNGWo87LalDlEZ6R41TqbbDamhfG/Qzo= github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/maratori/testpackage v1.1.0 h1:GJY4wlzQhuBusMF1oahQCBtUV/AQ/k69IZ68vxaac2Q= @@ -743,16 +601,13 @@ github.com/matryer/is v1.4.0 h1:sosSmIWwkYITGrxZ25ULNDeKiMNzFSr4V/eqBQP0PeE= github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= -github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= -github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= @@ -763,43 +618,32 @@ github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/Qd github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/go-sqlite3 v1.14.9 h1:10HX2Td0ocZpYEjhilsuo6WWtUqttj2Kb0KtD86/KYA= -github.com/mattn/go-sqlite3 v1.14.9/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= +github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/mbilski/exhaustivestruct v1.2.0 h1:wCBmUnSYufAHO6J4AVWY6ff+oxWxsVFrwgOdMUQePUo= github.com/mbilski/exhaustivestruct v1.2.0/go.mod h1:OeTBVxQWoEmB2J2JCHmXWPJ0aksxSUOUy+nvtVEfzXc= github.com/mgechev/dots v0.0.0-20210922191527-e955255bf517/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg= github.com/mgechev/revive v1.2.1 h1:GjFml7ZsoR0IrQ2E2YIvWFNS5GPDV7xNwvA5GM1HZC4= github.com/mgechev/revive v1.2.1/go.mod h1:+Ro3wqY4vakcYNtkBWdZC7dBg1xSB6sp054wWwmeFm0= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.35/go.mod h1:KNUDUusw/aVsxyTYZM1oqvCicbwhgbNgztCETuNZ7xM= -github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= -github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4= github.com/miekg/pkcs11 v1.0.2/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/mimoo/StrobeGo v0.0.0-20181016162300-f8f6d4d2b643 h1:hLDRPB66XQT/8+wG9WsDpiCvZf1yKO7sz7scAjSlBa0= github.com/mimoo/StrobeGo v0.0.0-20181016162300-f8f6d4d2b643/go.mod h1:43+3pMjjKimDBf5Kr4ZFNGbLql1zKkbImw+fZbw3geM= -github.com/minio/highwayhash v1.0.1/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= -github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/mitchellh/mapstructure v1.4.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/mitchellh/reflectwalk v1.0.1/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/moby/buildkit v0.10.3 h1:/dGykD8FW+H4p++q5+KqKEo6gAkYKyBQHdawdjVwVAU= +github.com/moby/buildkit v0.10.3/go.mod h1:jxeOuly98l9gWHai0Ojrbnczrk/rf+o9/JqNhY+UCSo= github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= -github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 h1:dcztxKSvZ4Id8iPpHERQBbIJfabdt4wUm5qy3wOL2Zc= github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6/go.mod h1:E2VnQOmVuvZB6UYnnDB0qG5Nq/1tD9acaOpo6xmt0Kw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -807,10 +651,11 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= github.com/moricho/tparallel v0.2.1 h1:95FytivzT6rYzdJLdtfn6m1bfFJylOJK41+lgv/EHf4= github.com/moricho/tparallel v0.2.1/go.mod h1:fXEIZxG2vdfl0ZF8b42f5a78EhjjD5mX8qUplsoSU4k= +github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= +github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mozilla/scribe v0.0.0-20180711195314-fb71baf557c1/go.mod h1:FIczTrinKo8VaLxe6PWTPEXRXDIHz2QAwiaBaP5/4a8= github.com/mozilla/tls-observatory v0.0.0-20210609171429-7bc42856d2e5/go.mod h1:FUqVoUPHSEdDR0MnFM3Dh8AU0pZHLXUD127SAJGER/s= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= @@ -820,13 +665,6 @@ github.com/mwitkow/go-proto-validators v0.0.0-20180403085117-0950a7990007/go.mod github.com/mwitkow/go-proto-validators v0.2.0/go.mod h1:ZfA1hW+UH/2ZHOWvQ3HnQaU0DtnpXu850MZiy+YUgcc= github.com/nakabonne/nestif v0.3.1 h1:wm28nZjhQY5HyYPx+weN3Q65k6ilSBxDb8v5S81B81U= github.com/nakabonne/nestif v0.3.1/go.mod h1:9EtoZochLn5iUprVDmDjqGKPofoUEBL8U4Ngq6aY7OE= -github.com/nats-io/jwt v1.2.2/go.mod h1:/xX356yQA6LuXI9xWW7mZNpxgF2mBmGecH+Fj34sP5Q= -github.com/nats-io/jwt/v2 v2.0.3/go.mod h1:VRP+deawSXyhNjXmxPCHskrR6Mq50BqpEI5SEcNiGlY= -github.com/nats-io/nats-server/v2 v2.5.0/go.mod h1:Kj86UtrXAL6LwYRA6H4RqzkHhK0Vcv2ZnKD5WbQ1t3g= -github.com/nats-io/nats.go v1.12.1/go.mod h1:BPko4oXsySz4aSWeFgOHLZs3G4Jq4ZAyE6/zMCxRT6w= -github.com/nats-io/nkeys v0.2.0/go.mod h1:XdZpAbhgyyODYqjTawOnIOI7VlbKSarI9Gfy1tqEu/s= -github.com/nats-io/nkeys v0.3.0/go.mod h1:gvUNGjVcM2IPr5rCsRsC6Wb3Hr2CQAm08dsxtV6A5y4= -github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nbutton23/zxcvbn-go v0.0.0-20210217022336-fa2cb2858354 h1:4kuARK6Y6FxaNu/BnU2OAaLF86eTVhP2hjTB6iMvItA= github.com/nbutton23/zxcvbn-go v0.0.0-20210217022336-fa2cb2858354/go.mod h1:KSVJerMDfblTH7p5MZaTt+8zaT2iEk3AkVb9PQdZuE8= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= @@ -839,7 +677,6 @@ github.com/nishanths/predeclared v0.2.2/go.mod h1:RROzoN6TnGQupbC+lqggsOlcgysk3L github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= -github.com/oklog/ulid/v2 v2.0.2/go.mod h1:mtBL0Qe/0HAx6/a4Z30qxVIAL1eQDweXq5lxOEiwQ68= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.2/go.mod h1:rSAaSIOAGT9odnlyGlUfAJaoc5w2fSBUmeGDbRWPxyQ= @@ -850,111 +687,90 @@ github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo v1.16.2/go.mod h1:CObGmKUOKaSC0RjmoAK7tKyn4Azo5P2IWuoMnvwxz1E= github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= github.com/onsi/ginkgo/v2 v2.1.4 h1:GNapqRSid3zijZ9H77KrgVG4/8KqiyRsxcSxe+7ApXY= github.com/onsi/ginkgo/v2 v2.1.4/go.mod h1:um6tUpWM/cxCK3/FK8BXqEiUMUwRgSM4JXG47RKZmLU= -github.com/onsi/gomega v1.4.1/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.13.0/go.mod h1:lRk9szgn8TxENtWd0Tp4c3wjlRfMTMH27I+3Je41yGY= github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= -github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= -github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v1.1.2/go.mod h1:Tj1hFw6eFWp/o33uxGf5yF2BX5yz2Z6iptFpuvbbKqc= +github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 h1:rc3tiVYb5z54aKaDfakKn0dDjIyPpTtszkjuMzyt7ec= +github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/runc v1.1.3 h1:vIXrkId+0/J2Ymu2m7VjGvbSlAId9XNRPhn2p4b+d8w= github.com/opencontainers/runc v1.1.3/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/openzipkin/zipkin-go v0.2.5/go.mod h1:KpXfKdgRDnnhsxw4pNIH9Md5lyFqKUa4YDFlwRYAMyE= github.com/ory/dockertest v3.3.5+incompatible h1:iLLK6SQwIhcbrG783Dghaaa3WPzGc+4Emza6EbVUUGA= github.com/ory/dockertest v3.3.5+incompatible/go.mod h1:1vX4m9wsvi00u5bseYwXaSnhNrne+V0E6LAcBILJdPs= github.com/ory/dockertest/v3 v3.9.1 h1:v4dkG+dlu76goxMiTT2j8zV7s4oPPEppKT8K8p2f1kY= -github.com/ory/dockertest/v3 v3.9.1/go.mod h1:42Ir9hmvaAPm0Mgibk6mBPi7SFvTXxEcnztDYOJ//uM= github.com/otiai10/copy v1.2.0 h1:HvG945u96iNadPoG2/Ja2+AUJeW5YuFQMixq9yirC+k= github.com/otiai10/copy v1.2.0/go.mod h1:rrF5dJ5F0t/EWSYODDu4j9/vEeYHMkc8jt0zJChqQWw= github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE= github.com/otiai10/curr v1.0.0/go.mod h1:LskTG5wDwr8Rs+nNQ+1LlxRjAtTZZjtJW4rMXl6j4vs= github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo= github.com/otiai10/mint v1.3.1/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= -github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= github.com/pelletier/go-toml/v2 v2.0.2 h1:+jQXlF3scKIcSEKkdHzXhCTDLPFi5r1wnK6yPS+49Gw= github.com/pelletier/go-toml/v2 v2.0.2/go.mod h1:MovirKjgVRESsAvNZlAjtFwV867yGuwRkXbG66OzopI= -github.com/performancecopilot/speed/v4 v4.0.0/go.mod h1:qxrSyuDGrTOWfV+uKRFhfxw6h/4HXRGUiZiufxo49BM= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 h1:q2e307iGHPdTGp0hoxKjt1H5pDo6utceo3dQVK3I5XQ= github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o= github.com/phayes/checkstyle v0.0.0-20170904204023-bfd46e6a821d h1:CdDQnGF8Nq9ocOS/xlSptM1N3BbrA6/kmaep5ggwaIA= github.com/phayes/checkstyle v0.0.0-20170904204023-bfd46e6a821d/go.mod h1:3OzsM7FXDQlpCiw2j81fOmAwQLnZnLGXVKUzeKQXIAw= -github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= -github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/pkg/profile v1.6.0 h1:hUDfIISABYI59DyeB3OTay/HxSRwTQ8rB/H83k6r5dM= github.com/pkg/profile v1.6.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18= -github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pointlander/compress v1.1.1-0.20190518213731-ff44bd196cc3 h1:hUmXhbljNFtrH5hzV9kiRoddZ5nfPTq3K0Sb2hYYiqE= +github.com/pointlander/compress v1.1.1-0.20190518213731-ff44bd196cc3/go.mod h1:q5NXNGzqj5uPnVuhGkZfmgHqNUhf15VLi6L9kW0VEc0= +github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4 h1:RHHRCZeaNyBXdYPMjZNH8/XHDBH38TZzw8izrW7dmBE= +github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4/go.mod h1:RdR1j20Aj5pB6+fw6Y9Ur7lMHpegTEjY1vc19hEZL40= +github.com/pointlander/peg v1.0.1 h1:mgA/GQE8TeS9MdkU6Xn6iEzBmQUQCNuWD7rHCK6Mjs0= +github.com/pointlander/peg v1.0.1/go.mod h1:5hsGDQR2oZI4QoWz0/Kdg3VSVEC31iJw/b7WjqCBGRI= github.com/polyfloyd/go-errorlint v1.0.0 h1:pDrQG0lrh68e602Wfp68BlUTRFoHn8PZYAjLgt2LFsM= github.com/polyfloyd/go-errorlint v1.0.0/go.mod h1:KZy4xxPJyy88/gldCe5OdW6OQRtNO3EZE7hXzmnebgA= -github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= -github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= -github.com/prometheus/client_golang v1.12.2 h1:51L9cDoUHVrXx4zWYlcLQIZ+d+VXHgqnYKkIuq4g/34= -github.com/prometheus/client_golang v1.12.2/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= +github.com/prometheus/client_golang v1.13.0 h1:b71QUfeo5M8gq2+evJdTPfZhYMAU0uKPkyPJ7TPsloU= +github.com/prometheus/client_golang v1.13.0/go.mod h1:vTeo+zgvILHsnnj/39Ou/1fPN5nJFOEMgftOUOmlvYQ= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= -github.com/prometheus/common v0.30.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.34.0 h1:RBmGO9d/FVjqHT0yUGQwBJhkwKV+wPCn7KGpvfab0uE= -github.com/prometheus/common v0.34.0/go.mod h1:gB3sOl7P0TvJabZpLY5uQMpUqRCPPCyRLCZYc7JZTNE= +github.com/prometheus/common v0.37.0 h1:ccBbHCgIiT9uSoFY0vX8H3zsNR5eLt17/RQLUvn8pXE= +github.com/prometheus/common v0.37.0/go.mod h1:phzohg0JFMnBEFGxTDbfu3QyL5GI8gTQJFhYO5B3mfA= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo= +github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= github.com/pseudomuto/protoc-gen-doc v1.3.2/go.mod h1:y5+P6n3iGrbKG+9O04V5ld71in3v/bX88wUwgt+U8EA= github.com/pseudomuto/protokit v0.2.0/go.mod h1:2PdH30hxVHsup8KpBTOXTBeMVhJZVio3Q8ViKSAXT0Q= github.com/quasilyte/go-ruleguard v0.3.1-0.20210203134552-1b5a410e1cc8/go.mod h1:KsAh3x0e7Fkpgs+Q9pNLS5XpFSvYCEVl5gP9Pp1xp30= @@ -962,7 +778,6 @@ github.com/quasilyte/go-ruleguard v0.3.16-0.20220213074421-6aa060fab41a h1:sWFav github.com/quasilyte/go-ruleguard v0.3.16-0.20220213074421-6aa060fab41a/go.mod h1:VMX+OnnSw4LicdiEGtRSD/1X8kW7GuEscjYNr4cOIT4= github.com/quasilyte/go-ruleguard/dsl v0.3.0/go.mod h1:KeCP03KrjuSO0H1kTuZQCWlQPulDV6YMIXmpQss17rU= github.com/quasilyte/go-ruleguard/dsl v0.3.16/go.mod h1:KeCP03KrjuSO0H1kTuZQCWlQPulDV6YMIXmpQss17rU= -github.com/quasilyte/go-ruleguard/dsl v0.3.21/go.mod h1:KeCP03KrjuSO0H1kTuZQCWlQPulDV6YMIXmpQss17rU= github.com/quasilyte/go-ruleguard/rules v0.0.0-20201231183845-9e62ed36efe1/go.mod h1:7JTjp89EGyU1d6XfBiXihJNG37wB2VRkd125Q1u7Plc= github.com/quasilyte/go-ruleguard/rules v0.0.0-20211022131956-028d6511ab71/go.mod h1:4cgAphtvu7Ftv7vOT2ZOYhC6CvBxZixcasr8qIOTA50= github.com/quasilyte/gogrep v0.0.0-20220120141003-628d8b3623b5 h1:PDWGei+Rf2bBiuZIbZmM20J2ftEy9IeUCHA8HbQqed8= @@ -971,49 +786,38 @@ github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95 h1:L8QM9bvf github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95/go.mod h1:rlzQ04UMyJXu/aOvhd8qT+hvDrFpiwqp8MRXDY9szc0= github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 h1:M8mH9eK4OUR4lu7Gd+PU1fV2/qnDNfzT635KRSObncs= github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567/go.mod h1:DWNGW8A4Y+GyBgPuaQJuWiy0XYftx4Xm/y5Jqk9I6VQ= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ= github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/remyoudompheng/go-dbus v0.0.0-20121104212943-b7232d34b1d5/go.mod h1:+u151txRmLpwxBmpYn9z3d1sdJdjRPQpsXuYeY9jNls= -github.com/remyoudompheng/go-liblzma v0.0.0-20190506200333-81bf2d431b96/go.mod h1:90HvCY7+oHHUKkbeMCiHt1WuFR2/hPJ9QrljDG+v6ls= -github.com/remyoudompheng/go-misc v0.0.0-20190427085024-2d6ac652a50e/go.mod h1:80FQABjoFzZ2M5uEa6FUaJYEmqU2UOKojlFVak1UAwI= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.1 h1:geMPLpDpQOgVyCg5z5GoRwLHepNdb71NXb67XFkP+Eg= -github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/cors v1.8.2 h1:KCooALfAYGs415Cwu5ABvv9n9509fSiG5SQJn/AQo4U= github.com/rs/cors v1.8.2/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.27.0 h1:1T7qCieN22GVc8S4Q2yuexzBb1EqjbgjSH9RohbMjKs= github.com/rs/zerolog v1.27.0/go.mod h1:7frBqO0oezxmnO7GF86FY++uy8I0Tk/If5ni1G9Qc0U= -github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/ryancurrah/gomodguard v1.2.3 h1:ww2fsjqocGCAFamzvv/b8IsRduuHHeK2MHTcTxZTQX8= -github.com/ryancurrah/gomodguard v1.2.3/go.mod h1:rYbA/4Tg5c54mV1sv4sQTP5WOPBcoLtnBZ7/TEhXAbg= +github.com/ryancurrah/gomodguard v1.2.4 h1:CpMSDKan0LtNGGhPrvupAoLeObRFjND8/tU1rEOtBp4= +github.com/ryancurrah/gomodguard v1.2.4/go.mod h1:+Kem4VjWwvFpUJRJSwa16s1tBJe+vbv02+naTow2f6M= github.com/ryanrolds/sqlclosecheck v0.3.0 h1:AZx+Bixh8zdUBxUA1NxbxVAS78vTPq4rCb8OUZI9xFw= github.com/ryanrolds/sqlclosecheck v0.3.0/go.mod h1:1gREqxyTGR3lVtpngyFo3hZAgk0KCtEdgEkHwDbigdA= -github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig= -github.com/sagikazarmark/crypt v0.6.0/go.mod h1:U8+INwJo3nBv1m6A/8OBXAq7Jnpspk5AxSgDyEQcea8= github.com/sanposhiho/wastedassign/v2 v2.0.6 h1:+6/hQIHKNJAUixEj6EmOngGIisyeI+T3335lYTyxRoA= github.com/sanposhiho/wastedassign/v2 v2.0.6/go.mod h1:KyZ0MWTwxxBmfwn33zh3k1dmsbF2ud9pAAGfoLfjhtI= github.com/sasha-s/go-deadlock v0.2.1-0.20190427202633-1595213edefa h1:0U2s5loxrTy6/VgfVoLuVLFJcURKLH49ie0zSch7gh4= github.com/sasha-s/go-deadlock v0.2.1-0.20190427202633-1595213edefa/go.mod h1:F73l+cr82YSh10GxyRI6qZiCgK64VaZjwesgfQ1/iLM= -github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= +github.com/sashamelentyev/usestdlibvars v1.8.0 h1:QnWP9IOEuRyYKH+IG0LlQIjuJlc0rfdo4K3/Zh3WRMw= +github.com/sashamelentyev/usestdlibvars v1.8.0/go.mod h1:BFt7b5mSVHaaa26ZupiNRV2ODViQBxZZVhtAxAJRrjs= github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/securego/gosec/v2 v2.12.0 h1:CQWdW7ATFpvLSohMVsajscfyHJ5rsGmEXmsNcsDNmAg= github.com/securego/gosec/v2 v2.12.0/go.mod h1:iTpT+eKTw59bSgklBHlSnH5O2tNygHMDxfvMubA4i7I= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c h1:W65qqJCIOVP4jpqPQ0YvHYKwcMEMVWIzWC5iNQQfBTU= github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c/go.mod h1:/PevMnwAxekIXwN8qQyfc5gl2NlkB3CQlkizAbOkeBs= -github.com/shirou/gopsutil/v3 v3.22.6/go.mod h1:EdIubSnZhbAvBS1yJ7Xi+AShB/hxwLHOMz4MCYz7yMs= github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk= github.com/shurcooL/go-goon v0.0.0-20170922171312-37c2f522c041/go.mod h1:N5mDOmsrJOB+vfqUK+7DmDyjhSLIIBnXo9lvZJj3MWQ= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= @@ -1021,40 +825,33 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sivchari/containedctx v1.0.2 h1:0hLQKpgC53OVF1VT7CeoFHk9YKstur1XOgfYIc1yrHI= github.com/sivchari/containedctx v1.0.2/go.mod h1:PwZOeqm4/DLoJOqMSIJs3aKqXRX4YO+uXww087KZ7Bw= -github.com/sivchari/nosnakecase v1.5.0 h1:ZBvAu1H3uteN0KQ0IsLpIFOwYgPEhKLyv2ahrVkub6M= -github.com/sivchari/nosnakecase v1.5.0/go.mod h1:CwDzrzPea40/GB6uynrNLiorAlgFRvRbFSgJx2Gs+QY= +github.com/sivchari/nosnakecase v1.7.0 h1:7QkpWIRMe8x25gckkFd2A5Pi6Ymo0qgr4JrhGt95do8= +github.com/sivchari/nosnakecase v1.7.0/go.mod h1:CwDzrzPea40/GB6uynrNLiorAlgFRvRbFSgJx2Gs+QY= github.com/sivchari/tenv v1.7.0 h1:d4laZMBK6jpe5PWepxlV9S+LC0yXqvYHiq8E6ceoVVE= github.com/sivchari/tenv v1.7.0/go.mod h1:64yStXKSOxDfX47NlhVwND4dHwfZDdbp2Lyl018Icvg= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa h1:YJfZp12Z3AFhSBeXOlv4BO55RMwPn2NoQeDsrdWnBtY= github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa/go.mod h1:oJyF+mSPHbB5mVY2iO9KV3pTt/QbIkGaO8gQ2WrDbP4= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/sonatard/noctx v0.0.1 h1:VC1Qhl6Oxx9vvWo3UDgrGXYCeKCe3Wbw7qAWL6FrmTY= github.com/sonatard/noctx v0.0.1/go.mod h1:9D2D/EoULe8Yy2joDHJj7bv3sZoq9AaSb8B4lqBjiZI= -github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= github.com/sourcegraph/go-diff v0.6.1 h1:hmA1LzxW0n1c3Q4YbrFgg4P99GSnebYa3x8gr0HZqLQ= github.com/sourcegraph/go-diff v0.6.1/go.mod h1:iBszgVvyxdc8SFZ7gm69go2KDdt3ag071iBaWPF6cjs= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= -github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/afero v1.8.2 h1:xehSyVa0YnHWsJ49JFljMpg1HX19V6NDZ1fkm1Xznbo= github.com/spf13/afero v1.8.2/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= -github.com/spf13/cobra v1.3.0/go.mod h1:BrRVncBjOJa/eUcVVm9CE+oC6as8k+VYr4NY7WCi9V4= -github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= github.com/spf13/cobra v1.5.0 h1:X+jTBEBqF0bHN+9cSMgmfuvv2VHJ9ezmFNf9Y/XstYU= github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= @@ -1065,16 +862,12 @@ github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnIn github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= -github.com/spf13/viper v1.10.0/go.mod h1:SoyBPwAtKDzypXNDFKN5kzH7ppppbGZtls1UpIy5AsM= github.com/spf13/viper v1.12.0 h1:CZ7eSOd3kZoaYDLbXnmzgQI5RlciuXBMA+18HwHRfZQ= github.com/spf13/viper v1.12.0/go.mod h1:b6COn30jlNxbm/V2IqWiNWkJ+vZNiMNksliPCiuKtSI= github.com/ssgreg/nlreturn/v2 v2.2.1 h1:X4XDI7jstt3ySqGU86YGAURbxw3oTDPK9sPEi6YEwQ0= github.com/ssgreg/nlreturn/v2 v2.2.1/go.mod h1:E/iiPB78hV7Szg2YfRgyIrk1AD6JVMTRkkxBiELzh2I= github.com/stbenjam/no-sprintf-host-port v0.1.1 h1:tYugd/yrm1O0dV+ThCbaKZh195Dfm07ysF0U6JQXczc= github.com/stbenjam/no-sprintf-host-port v0.1.1/go.mod h1:TLhvtIvONRzdmkFiio4O8LHsN9N74I+PhRquPsxpL0I= -github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/amqp v1.0.0/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/handy v0.0.0-20200128134331-0f66f006fb2e/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0 h1:M2gUjqZET1qApGOWNSnZ49BAIMX4F/1plDv3+l31EJ4= @@ -1089,11 +882,8 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= -github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= -github.com/subosito/gotenv v1.3.0/go.mod h1:YzJjq/33h7nrwdY+iHMhEOEEbW0ovIz0tB6t6PwAXzs= github.com/subosito/gotenv v1.4.0 h1:yAzM1+SmVcz5R4tXGsNMu1jUl2aOJXoiWUCEwwnGrvs= github.com/subosito/gotenv v1.4.0/go.mod h1:mZd6rFysKEcUhUHXJk0C/08wAgyDBFuwEYL7vWWGaGo= github.com/sylvia7788/contextcheck v1.0.4 h1:MsiVqROAdr0efZc/fOCt0c235qm9XJqHtWwM+2h2B04= @@ -1115,9 +905,6 @@ github.com/tetafro/godot v1.4.11 h1:BVoBIqAf/2QdbFmSwAWnaIqDivZdOV0ZRwEm6jivLKw= github.com/tetafro/godot v1.4.11/go.mod h1:LR3CJpxDVGlYOWn3ZZg1PgNZdTUvzsZWu8xaEohUpn8= github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144 h1:kl4KhGNsJIbDHS9/4U9yQo1UcPQM0kOMJHn29EoH/Ro= github.com/timakin/bodyclose v0.0.0-20210704033933-f49887972144/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk= -github.com/tinylib/msgp v1.1.5/go.mod h1:eQsjooMTnV42mHu917E26IogZ2930nFyBQdofk10Udg= -github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk= -github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= @@ -1126,8 +913,6 @@ github.com/tomarrell/wrapcheck/v2 v2.6.2/go.mod h1:ao7l5p0aOlUNJKI0qVwB4Yjlqutd0 github.com/tomasen/realip v0.0.0-20180522021738-f0c99a92ddce/go.mod h1:o8v6yHRoik09Xen7gje4m9ERNah1d1PPsVq1VEx9vE4= github.com/tommy-muehle/go-mnd/v2 v2.5.0 h1:iAj0a8e6+dXSL7Liq0aXPox36FiN1dBbjA6lt9fl65s= github.com/tommy-muehle/go-mnd/v2 v2.5.0/go.mod h1:WsUAkMJMYww6l/ufffCD3m+P7LEvr8TnZn9lwVDlgzw= -github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+lC5RZ8IT9rBXDaEDnpnw/Cl+HFiw+v/7Q= -github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/ultraware/funlen v0.0.3 h1:5ylVWm8wsNwH5aWo9438pwvsK0QiqVuUrt9bn7S/iLA= github.com/ultraware/funlen v0.0.3/go.mod h1:Dp4UiAus7Wdb9KUZsYWZEWiRzGuM2kXM1lPbfaF6xhA= @@ -1137,22 +922,14 @@ github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijb github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/uudashr/gocognit v1.0.6 h1:2Cgi6MweCsdB6kpcVQp7EW4U23iBFQWfTXiWlyp842Y= github.com/uudashr/gocognit v1.0.6/go.mod h1:nAIUuVBnYU7pcninia3BHOvQkpQCeO76Uscky5BOwcY= -github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= -github.com/valyala/fasthttp v1.30.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus= -github.com/valyala/quicktemplate v1.7.0/go.mod h1:sqKJnoaOF88V07vkO+9FL8fb9uZg/VPSJnLYn+LmLk8= -github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/vektra/mockery/v2 v2.14.0 h1:KZ1p5Hrn8tiY+LErRMr14HHle6khxo+JKOXLBW/yfqs= github.com/vektra/mockery/v2 v2.14.0/go.mod h1:bnD1T8tExSgPD1ripLkDbr60JA9VtQeu12P3wgLZd7M= github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= -github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= -github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= @@ -1168,27 +945,14 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -gitlab.com/bosi/decorder v0.2.2 h1:LRfb3lP6mZWjUzpMOCLTVjcnl/SqZWBWmKNqQvMocQs= -gitlab.com/bosi/decorder v0.2.2/go.mod h1:9K1RB5+VPNQYtXtTDAzd2OEftsZb1oV0IrJrzChSdGE= +gitlab.com/bosi/decorder v0.2.3 h1:gX4/RgK16ijY8V+BRQHAySfQAb354T7/xQpDB2n10P0= +gitlab.com/bosi/decorder v0.2.3/go.mod h1:9K1RB5+VPNQYtXtTDAzd2OEftsZb1oV0IrJrzChSdGE= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.4/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd v0.0.0-20200513171258-e048e166ab9c/go.mod h1:xCI7ZzBfRuGgBXyXO6yfWfDmlWd35khcWpUa4L0xI/k= -go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= -go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= -go.etcd.io/etcd/api/v3 v3.5.4/go.mod h1:5GB2vv4A4AOn3yk7MftYGHkUfGtDHnEraIjym4dYz5A= -go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= -go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= -go.etcd.io/etcd/client/pkg/v3 v3.5.4/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= -go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= -go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs= -go.etcd.io/etcd/client/v2 v2.305.4/go.mod h1:Ud+VUwIi9/uQHOMA+4ekToJ12lTxlv0zB/+DHwTGEbU= -go.etcd.io/etcd/client/v3 v3.5.0/go.mod h1:AIKXXVX/DQXtfTEqBryiLTUXwON+GuvO6Z7lLS/oTh0= -go.etcd.io/etcd/client/v3 v3.5.4/go.mod h1:ZaRkVgBZC+L+dLCjTcF1hRXpgZXQPOvnA/Ak/gq3kiY= go.mozilla.org/mozlog v0.0.0-20170222151521-4bb13139d403/go.mod h1:jHoPAGnDrCy6kaI2tAze5Prf0Nr0w/oNkROt2lw3n3o= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -1198,6 +962,12 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.33.0 h1:z6rnla1Asjzn0FrhohzIbDi4bxbtc6EMmQ7f5ZPn+pA= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.33.0/go.mod h1:y/SlJpJQPd2UzfBCj0E9Flk9FDCtTyqUmaCB41qFrWI= +go.opentelemetry.io/otel v1.8.0 h1:zcvBFizPbpa1q7FehvFiHbQwGzmPILebO0tyqIR5Djg= +go.opentelemetry.io/otel v1.8.0/go.mod h1:2pkj+iMj0o03Y+cW6/m8Y4WkRdYN3AvCXCnzRMp9yvM= +go.opentelemetry.io/otel/trace v1.8.0 h1:cSy0DF9eGI5WIfNwZ1q2iUyGj00tGzP24dE1lOlHrfY= +go.opentelemetry.io/otel/trace v1.8.0/go.mod h1:0Bt3PXY8w+3pheS3hQUt+wow8b1ojPaTBoTCh2zIFI4= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -1205,56 +975,37 @@ go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= -go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= -go.uber.org/zap v1.19.1/go.mod h1:j3DNczoxDZroyBnOT1L/Q79cfUMGZxlv/9dzN7SM1rI= go.uber.org/zap v1.21.0 h1:WefMeulhovoZ2sYXz7st6K0sLj7bBhpiFaud4r4zST8= go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= golang.org/x/crypto v0.0.0-20170930174604-9419663f5a44/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180501155221-613d6eafa307/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200115085410-6d4e4cb37c7d/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= -golang.org/x/crypto v0.0.0-20210314154223-e6e6c4f2bb5b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= -golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= -golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20210915214749-c084706c2272/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM= golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= @@ -1264,12 +1015,9 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5 h1:FR+oGxGfbQu1d+jglI3rCkjAjUnhRSZcUxr+DqlDLNo= golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5/go.mod h1:4M0jN8W1tt0AVLNr8HDosyJCDCDuyL9N9+3m7wDWgKw= -golang.org/x/exp/typeparams v0.0.0-20220218215828-6cf2b201936e/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= golang.org/x/exp/typeparams v0.0.0-20220613132600-b0d781184e0d h1:+W8Qf4iJtMGKkyAygcKohjxTk4JPsL9DpzApJ22m5Ic= golang.org/x/exp/typeparams v0.0.0-20220613132600-b0d781184e0d/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= -golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1295,16 +1043,13 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/net v0.0.0-20180719180050-a680a1efc54d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1343,30 +1088,16 @@ golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20210917221730-978cfadd31cf/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220325170049-de3da57026de/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220520000938-2e3eb7b945c2/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 h1:Yqz/iviulwKwAREEeUd3nbBFn0XuyJqkoft2IlrvOhc= -golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.0.0-20220726230323-06994584191e h1:wOQNKh1uuDGRnmgF0jDxh7ctgGy/3P4rYWQRVJD4/Yg= +golang.org/x/net v0.0.0-20220726230323-06994584191e/go.mod h1:AaygXjzTFtRAg2ttMY5RMuhpJ3cNnI0XpyFJD1iQRSM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1376,17 +1107,9 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= -golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= -golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= +golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 h1:OSnWWcOd/CtWQC2cYSBgbTSJv3ciqd8r54ySIW2y3RE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1399,13 +1122,11 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 h1:w8s32wxx3sY+OjLlv9qltkLU5yvJzxjjgiHWLjdIcw4= -golang.org/x/sync v0.0.0-20220513210516-0976fa681c29/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1424,8 +1145,6 @@ golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1439,7 +1158,6 @@ golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1456,82 +1174,51 @@ golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210816183151-1e6c022a8912/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210917161153-d61c044b1678/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211105183446-c75c47738b0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220405210540-1e041c57c461/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220406163625-3f8b81556e12/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220624220833-87e55d714810/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220702020025-31831981b65f h1:xdsejrW/0Wf2diT5CPp3XmKUNbr7Xvw8kYilQ+6qjRY= golang.org/x/sys v0.0.0-20220702020025-31831981b65f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220727055044-e65921a090b8 h1:dyU22nBWzrmTQxtNrr4dzVOvaw35nUYE279vF9UmsI8= +golang.org/x/sys v0.0.0-20220727055044-e65921a090b8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.0.0-20220411215600-e5f449aeb171/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.0.0-20220526004731-065cf7ba2467 h1:CBpWXWQpIRjzmkkA+M7q9Fqnwd2mZr3AFqexg8YTfoM= -golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.0.0-20220722155259-a9ba230a4035 h1:Q5284mrmYTpACcm+eAKjKJH48BBwSyfJqmmGDTtT8Vc= +golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= @@ -1540,15 +1227,13 @@ golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac h1:7zkz7BUtwNFFqcowJ+RIgu2MaV/MapERkDIy+mwPyjs= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190228203856-589c23e65e65/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190307163923-6a08e3108db3/go.mod h1:25r3+/G6/xytQM8iWZKq3Hn0kr0rgFKPUNVEL/dr3z4= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190311215038-5c2858a9cfe5/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -1556,7 +1241,6 @@ golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190321232350-e250d351ecad/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190322203728-c1a832b0ad89/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -1565,7 +1249,6 @@ golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190910044552-dd2b5c81c578/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190916130336-e45ffcd953cc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1619,9 +1302,7 @@ golang.org/x/tools v0.0.0-20200831203904-5a2aa26beb65/go.mod h1:Cj7w3i3Rnn0Xh82u golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= golang.org/x/tools v0.0.0-20201001104356-43ebab892c4c/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= golang.org/x/tools v0.0.0-20201002184944-ecd9fd270d5d/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= -golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201023174141-c8cfbd0f21e6/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20201028025901-8cd080b735b3/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= @@ -1634,29 +1315,17 @@ golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.1-0.20210205202024-ef80cdb6ec6d/go.mod h1:9bzcO0MWcOuT0tm1iBGzDVPshzfwoVvREIui8C+MHqU= golang.org/x/tools v0.1.1-0.20210302220138-2ac05c832e1a/go.mod h1:9bzcO0MWcOuT0tm1iBGzDVPshzfwoVvREIui8C+MHqU= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= -golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.9-0.20211228192929-ee1ca4ffc4da/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= -golang.org/x/tools v0.1.11-0.20220513221640-090b14e8501f/go.mod h1:SgwaegtQh8clINPpECJMqnxLv9I09HLqnW3RMqW0CA4= golang.org/x/tools v0.1.11/go.mod h1:SgwaegtQh8clINPpECJMqnxLv9I09HLqnW3RMqW0CA4= -golang.org/x/tools v0.1.12-0.20220628192153-7743d1d949f1 h1:NHLFZ56qCjD+0hYY3kE5Wl40Z7q4Gn9Ln/7YU0lsGko= -golang.org/x/tools v0.1.12-0.20220628192153-7743d1d949f1/go.mod h1:SgwaegtQh8clINPpECJMqnxLv9I09HLqnW3RMqW0CA4= +golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= -gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= -gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= -gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= @@ -1677,27 +1346,6 @@ google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz513 google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= -google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= -google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= -google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo= -google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4= -google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw= -google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU= -google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k= -google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= -google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= -google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI= -google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU= -google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= -google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw= -google.golang.org/api v0.63.0/go.mod h1:gs4ij2ffTRXwuzzgJl/56BdwJaA194ijkfn++9tDuPo= -google.golang.org/api v0.67.0/go.mod h1:ShHKP8E60yPsKNw/w8w+VYaj9H6buA5UqDp8dhbQZ6g= -google.golang.org/api v0.70.0/go.mod h1:Bs4ZM2HGifEvXwd50TtW70ovgJffJYw2oRCOFU/SkfA= -google.golang.org/api v0.71.0/go.mod h1:4PyU6e6JogV1f9eA4voyrTY2batOLdgZ5qZ5HOCc4j8= -google.golang.org/api v0.74.0/go.mod h1:ZpfMZOVRMywNyvJFeqL9HRWBgAuRfSjJFpe9QtRRyDs= -google.golang.org/api v0.75.0/go.mod h1:pU9QmyHLnzlpar1Mjt4IbapUCy8J+6HD6GeELN69ljA= -google.golang.org/api v0.78.0/go.mod h1:1Sg78yoMLOhlQTeF+ARBoytAcH1NNyyl390YMy6rKmw= -google.golang.org/api v0.81.0/go.mod h1:FA6Mb/bZxj706H2j+j2d6mHEEaHBmbbWnkfvmorOCko= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1705,6 +1353,7 @@ google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww google.golang.org/appengine v1.6.2/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20170818010345-ee236bd376b0/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= @@ -1748,52 +1397,9 @@ google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= -google.golang.org/genproto v0.0.0-20210513213006-bf773b8c8384/go.mod h1:P3QM42oQyzQSnHPnZ/vqoCdDmzH28fzWByN9asMeM8A= -google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20210604141403-392c879c8b08/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= -google.golang.org/genproto v0.0.0-20210713002101-d411969a0d9a/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= -google.golang.org/genproto v0.0.0-20210716133855-ce7ef5c701ea/go.mod h1:AxrInvYm1dci+enl5hChSFPOmmUF1+uAa/UsgNRWd7k= -google.golang.org/genproto v0.0.0-20210728212813-7823e685a01f/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= -google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKrgPT52GcgX759i1sleT07tiKowYBGbczaW48= -google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= -google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210909211513-a8c4777a87af/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210917145530-b395a37504d4/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211008145708-270636b82663/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211028162531-8db9c33dc351/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211129164237-f09f9a12af12/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211203200212-54befc351ae9/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20211221195035-429b39de9b1c/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20220207164111-0872dc986b00/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20220218161850-94dd64e39d7c/go.mod h1:kGP+zUP2Ddo0ayMi4YuN7C3WZyJvGLZRh8Z5wnAqvEI= -google.golang.org/genproto v0.0.0-20220222213610-43724f9ea8cf/go.mod h1:kGP+zUP2Ddo0ayMi4YuN7C3WZyJvGLZRh8Z5wnAqvEI= -google.golang.org/genproto v0.0.0-20220304144024-325a89244dc8/go.mod h1:kGP+zUP2Ddo0ayMi4YuN7C3WZyJvGLZRh8Z5wnAqvEI= -google.golang.org/genproto v0.0.0-20220310185008-1973136f34c6/go.mod h1:kGP+zUP2Ddo0ayMi4YuN7C3WZyJvGLZRh8Z5wnAqvEI= -google.golang.org/genproto v0.0.0-20220324131243-acbaeb5b85eb/go.mod h1:hAL49I2IFola2sVEjAn7MEwsja0xp51I0tlGAf9hz4E= -google.golang.org/genproto v0.0.0-20220407144326-9054f6ed7bac/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= -google.golang.org/genproto v0.0.0-20220413183235-5e96e2839df9/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= -google.golang.org/genproto v0.0.0-20220414192740-2d67ff6cf2b4/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= -google.golang.org/genproto v0.0.0-20220421151946-72621c1f0bd3/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= -google.golang.org/genproto v0.0.0-20220429170224-98d788798c3e/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= -google.golang.org/genproto v0.0.0-20220505152158-f39f71e6c8f3/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4= -google.golang.org/genproto v0.0.0-20220519153652-3a47de7e79bd h1:e0TwkXOdbnH/1x5rc5MZ/VYyiZ4v+RdVfrGMqEwT68I= -google.golang.org/genproto v0.0.0-20220519153652-3a47de7e79bd/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4= +google.golang.org/genproto v0.0.0-20220725144611-272f38e5d71b h1:SfSkJugek6xm7lWywqth4r2iTrYLpD8lOj1nMIIhMNM= +google.golang.org/genproto v0.0.0-20220725144611-272f38e5d71b/go.mod h1:iHe1svFLAZg9VWz891+QbRMwUv9O/1Ww+/mngYeThbc= google.golang.org/grpc v1.8.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -1816,22 +1422,10 @@ google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= -google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= -google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.46.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= -google.golang.org/grpc v1.46.2/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= google.golang.org/grpc v1.48.0 h1:rQOsyJ/8+ufEDJd/Gdsz7HG220Mh9HAhFHRGnIjda0w= google.golang.org/grpc v1.48.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1846,8 +1440,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= -google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1860,8 +1454,6 @@ gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qS gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= -gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/ini.v1 v1.66.4/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.66.6 h1:LATuAqN/shcYAOkv3wl2L4rkaKqkcgTBQjOyYDvcPKI= gopkg.in/ini.v1 v1.66.6/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= @@ -1881,13 +1473,12 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= -gotest.tools/v3 v3.2.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A= +gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= @@ -1895,8 +1486,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.3.2 h1:ytYb4rOqyp1TSa2EPvNVwtPQJctSELKaMyLfqNP4+34= -honnef.co/go/tools v0.3.2/go.mod h1:jzwdWgg7Jdq75wlfblQxO4neNaFFSvgc1tD5Wv8U0Yw= +honnef.co/go/tools v0.3.3 h1:oDx7VAwstgpYpb3wv0oxiZlxY+foCpRAwY7Vk6XpAgA= +honnef.co/go/tools v0.3.3/go.mod h1:jzwdWgg7Jdq75wlfblQxO4neNaFFSvgc1tD5Wv8U0Yw= mvdan.cc/gofumpt v0.3.1 h1:avhhrOmv0IuvQVK7fvwV91oFSGAk5/6Po8GXTzICeu8= mvdan.cc/gofumpt v0.3.1/go.mod h1:w3ymliuxvzVx8DAutBnVyDqYb1Niy/yCJt/lk821YCE= mvdan.cc/interfacer v0.0.0-20180901003855-c20040233aed h1:WX1yoOaKQfddO/mLzdV4wptyWgoH/6hwLs7QHTixo0I= @@ -1906,8 +1497,6 @@ mvdan.cc/lint v0.0.0-20170908181259-adc824a0674b/go.mod h1:2odslEg/xrtNQqCYg2/jC mvdan.cc/unparam v0.0.0-20220706161116-678bad134442 h1:seuXWbRB1qPrS3NQnHmFKLJLtskWyueeIzmLXghMGgk= mvdan.cc/unparam v0.0.0-20220706161116-678bad134442/go.mod h1:F/Cxw/6mVrNKqrR2YjFf5CaW0Bw4RL8RfbEf4GRggJk= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/libs/autofile/autofile_test.go b/libs/autofile/autofile_test.go index 3e3814d5e..ff6056fc5 100644 --- a/libs/autofile/autofile_test.go +++ b/libs/autofile/autofile_test.go @@ -1,7 +1,6 @@ package autofile import ( - "io/ioutil" "os" "path/filepath" "syscall" @@ -24,7 +23,7 @@ func TestSIGHUP(t *testing.T) { }) // First, create a temporary directory and move into it - dir, err := ioutil.TempDir("", "sighup_test") + dir, err := os.MkdirTemp("", "sighup_test") require.NoError(t, err) t.Cleanup(func() { os.RemoveAll(dir) @@ -49,7 +48,7 @@ func TestSIGHUP(t *testing.T) { require.NoError(t, err) // Move into a different temporary directory - otherDir, err := ioutil.TempDir("", "sighup_test_other") + otherDir, err := os.MkdirTemp("", "sighup_test_other") require.NoError(t, err) defer os.RemoveAll(otherDir) err = os.Chdir(otherDir) @@ -79,7 +78,7 @@ func TestSIGHUP(t *testing.T) { } // The current directory should be empty - files, err := ioutil.ReadDir(".") + files, err := os.ReadDir(".") require.NoError(t, err) assert.Empty(t, files) } @@ -87,7 +86,7 @@ func TestSIGHUP(t *testing.T) { // // Manually modify file permissions, close, and reopen using autofile: // // We expect the file permissions to be changed back to the intended perms. // func TestOpenAutoFilePerms(t *testing.T) { -// file, err := ioutil.TempFile("", "permission_test") +// file, err := os.CreateTemp("", "permission_test") // require.NoError(t, err) // err = file.Close() // require.NoError(t, err) @@ -113,7 +112,7 @@ func TestSIGHUP(t *testing.T) { func TestAutoFileSize(t *testing.T) { // First, create an AutoFile writing to a tempfile dir - f, err := ioutil.TempFile("", "sighup_test") + f, err := os.CreateTemp("", "sighup_test") require.NoError(t, err) err = f.Close() require.NoError(t, err) diff --git a/libs/autofile/group_test.go b/libs/autofile/group_test.go index 0981923eb..ffdb70013 100644 --- a/libs/autofile/group_test.go +++ b/libs/autofile/group_test.go @@ -2,7 +2,6 @@ package autofile import ( "io" - "io/ioutil" "os" "path/filepath" "testing" @@ -122,7 +121,7 @@ func TestRotateFile(t *testing.T) { } }() - dir, err := ioutil.TempDir("", "rotate_test") + dir, err := os.MkdirTemp("", "rotate_test") require.NoError(t, err) defer os.RemoveAll(dir) err = os.Chdir(dir) @@ -151,21 +150,21 @@ func TestRotateFile(t *testing.T) { require.NoError(t, err) // Read g.Head.Path+"000" - body1, err := ioutil.ReadFile(g.Head.Path + ".000") + body1, err := os.ReadFile(g.Head.Path + ".000") assert.NoError(t, err, "Failed to read first rolled file") if string(body1) != "Line 1\nLine 2\nLine 3\n" { t.Errorf("got unexpected contents: [%v]", string(body1)) } // Read g.Head.Path - body2, err := ioutil.ReadFile(g.Head.Path) + body2, err := os.ReadFile(g.Head.Path) assert.NoError(t, err, "Failed to read first rolled file") if string(body2) != "Line 4\nLine 5\nLine 6\n" { t.Errorf("got unexpected contents: [%v]", string(body2)) } // Make sure there are no files in the current, temporary directory - files, err := ioutil.ReadDir(".") + files, err := os.ReadDir(".") require.NoError(t, err) assert.Empty(t, files) diff --git a/libs/cli/helper.go b/libs/cli/helper.go index 4b87bd60b..37fe34fc9 100644 --- a/libs/cli/helper.go +++ b/libs/cli/helper.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "os" "path/filepath" @@ -19,7 +18,7 @@ func WriteConfigVals(dir string, vals map[string]string) error { data += fmt.Sprintf("%s = \"%s\"\n", k, v) } cfile := filepath.Join(dir, "config.toml") - return ioutil.WriteFile(cfile, []byte(data), 0600) + return os.WriteFile(cfile, []byte(data), 0600) } // RunWithArgs executes the given command with the specified command line args diff --git a/libs/cli/setup_test.go b/libs/cli/setup_test.go index 0cb322344..fec49e5c1 100644 --- a/libs/cli/setup_test.go +++ b/libs/cli/setup_test.go @@ -2,7 +2,7 @@ package cli import ( "fmt" - "io/ioutil" + "os" "strconv" "strings" "testing" @@ -55,7 +55,7 @@ func TestSetupEnv(t *testing.T) { } func tempDir() string { - cdir, err := ioutil.TempDir("", "test-cli") + cdir, err := os.MkdirTemp("", "test-cli") if err != nil { panic(err) } diff --git a/libs/log/tm_logger_test.go b/libs/log/tm_logger_test.go index cbe29d994..8427febf1 100644 --- a/libs/log/tm_logger_test.go +++ b/libs/log/tm_logger_test.go @@ -2,7 +2,7 @@ package log_test import ( "bytes" - "io/ioutil" + "io" "strings" "testing" @@ -90,11 +90,11 @@ func TestError(t *testing.T) { } func BenchmarkTMLoggerSimple(b *testing.B) { - benchmarkRunner(b, log.NewTMLogger(ioutil.Discard), baseInfoMessage) + benchmarkRunner(b, log.NewTMLogger(io.Discard), baseInfoMessage) } func BenchmarkTMLoggerContextual(b *testing.B) { - benchmarkRunner(b, log.NewTMLogger(ioutil.Discard), withInfoMessage) + benchmarkRunner(b, log.NewTMLogger(io.Discard), withInfoMessage) } func benchmarkRunner(b *testing.B, logger log.Logger, f func(log.Logger)) { diff --git a/libs/log/tmfmt_logger_test.go b/libs/log/tmfmt_logger_test.go index 4b82455f1..12c2fbbc4 100644 --- a/libs/log/tmfmt_logger_test.go +++ b/libs/log/tmfmt_logger_test.go @@ -3,7 +3,7 @@ package log_test import ( "bytes" "errors" - "io/ioutil" + "io" "math" "regexp" "testing" @@ -62,16 +62,16 @@ func TestTMFmtLogger(t *testing.T) { } func BenchmarkTMFmtLoggerSimple(b *testing.B) { - benchmarkRunnerKitlog(b, log.NewTMFmtLogger(ioutil.Discard), baseMessage) + benchmarkRunnerKitlog(b, log.NewTMFmtLogger(io.Discard), baseMessage) } func BenchmarkTMFmtLoggerContextual(b *testing.B) { - benchmarkRunnerKitlog(b, log.NewTMFmtLogger(ioutil.Discard), withMessage) + benchmarkRunnerKitlog(b, log.NewTMFmtLogger(io.Discard), withMessage) } func TestTMFmtLoggerConcurrency(t *testing.T) { t.Parallel() - testConcurrency(t, log.NewTMFmtLogger(ioutil.Discard), 10000) + testConcurrency(t, log.NewTMFmtLogger(io.Discard), 10000) } func benchmarkRunnerKitlog(b *testing.B, logger kitlog.Logger, f func(kitlog.Logger)) { diff --git a/libs/os/os.go b/libs/os/os.go index a24e1ba7f..16aa3e68b 100644 --- a/libs/os/os.go +++ b/libs/os/os.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "os/signal" "syscall" @@ -62,11 +61,11 @@ func FileExists(filePath string) bool { } func ReadFile(filePath string) ([]byte, error) { - return ioutil.ReadFile(filePath) + return os.ReadFile(filePath) } func MustReadFile(filePath string) []byte { - fileBytes, err := ioutil.ReadFile(filePath) + fileBytes, err := os.ReadFile(filePath) if err != nil { Exit(fmt.Sprintf("MustReadFile failed: %v", err)) return nil @@ -75,7 +74,7 @@ func MustReadFile(filePath string) []byte { } func WriteFile(filePath string, contents []byte, mode os.FileMode) error { - return ioutil.WriteFile(filePath, contents, mode) + return os.WriteFile(filePath, contents, mode) } func MustWriteFile(filePath string, contents []byte, mode os.FileMode) { diff --git a/libs/os/os_test.go b/libs/os/os_test.go index 88bf1412c..4de518444 100644 --- a/libs/os/os_test.go +++ b/libs/os/os_test.go @@ -3,7 +3,6 @@ package os import ( "bytes" "fmt" - "io/ioutil" "os" "path/filepath" "testing" @@ -12,7 +11,7 @@ import ( ) func TestCopyFile(t *testing.T) { - tmpfile, err := ioutil.TempFile("", "example") + tmpfile, err := os.CreateTemp("", "example") if err != nil { t.Fatal(err) } @@ -29,7 +28,7 @@ func TestCopyFile(t *testing.T) { if _, err := os.Stat(copyfile); os.IsNotExist(err) { t.Fatal("copy should exist") } - data, err := ioutil.ReadFile(copyfile) + data, err := os.ReadFile(copyfile) if err != nil { t.Fatal(err) } @@ -40,7 +39,7 @@ func TestCopyFile(t *testing.T) { } func TestEnsureDir(t *testing.T) { - tmp, err := ioutil.TempDir("", "ensure-dir") + tmp, err := os.MkdirTemp("", "ensure-dir") require.NoError(t, err) defer os.RemoveAll(tmp) @@ -54,7 +53,7 @@ func TestEnsureDir(t *testing.T) { require.NoError(t, err) // Should fail on file. - err = ioutil.WriteFile(filepath.Join(tmp, "file"), []byte{}, 0644) + err = os.WriteFile(filepath.Join(tmp, "file"), []byte{}, 0644) require.NoError(t, err) err = EnsureDir(filepath.Join(tmp, "file"), 0755) require.Error(t, err) @@ -76,7 +75,7 @@ func TestEnsureDir(t *testing.T) { // the origin is positively a non-directory and that it is ready for copying. // See https://github.com/tendermint/tendermint/issues/6427 func TestTrickedTruncation(t *testing.T) { - tmpDir, err := ioutil.TempDir(os.TempDir(), "pwn_truncate") + tmpDir, err := os.MkdirTemp(os.TempDir(), "pwn_truncate") if err != nil { t.Fatal(err) } @@ -84,12 +83,12 @@ func TestTrickedTruncation(t *testing.T) { originalWALPath := filepath.Join(tmpDir, "wal") originalWALContent := []byte("I AM BECOME DEATH, DESTROYER OF ALL WORLDS!") - if err := ioutil.WriteFile(originalWALPath, originalWALContent, 0755); err != nil { + if err := os.WriteFile(originalWALPath, originalWALContent, 0755); err != nil { t.Fatal(err) } // 1. Sanity check. - readWAL, err := ioutil.ReadFile(originalWALPath) + readWAL, err := os.ReadFile(originalWALPath) if err != nil { t.Fatal(err) } @@ -104,7 +103,7 @@ func TestTrickedTruncation(t *testing.T) { } // 3. Check the WAL's content - reReadWAL, err := ioutil.ReadFile(originalWALPath) + reReadWAL, err := os.ReadFile(originalWALPath) if err != nil { t.Fatal(err) } diff --git a/libs/pubsub/query/.gitignore b/libs/pubsub/query/.gitignore new file mode 100644 index 000000000..8b5301465 --- /dev/null +++ b/libs/pubsub/query/.gitignore @@ -0,0 +1,7 @@ +# This is a temporary directory to hold the peg binary, +# to work around https://github.com/pointlander/peg/issues/129. +# Note that once we have a new version of peg fixing #129, +# we may still want to keep this .gitignore to prevent anyone +# from accidentally running "git add ." and including their built +# peg binary in a commit. +.bin/ diff --git a/libs/pubsub/query/Makefile b/libs/pubsub/query/Makefile index aef42b2df..e08800817 100644 --- a/libs/pubsub/query/Makefile +++ b/libs/pubsub/query/Makefile @@ -1,11 +1,7 @@ -gen_query_parser: - go get -u -v github.com/pointlander/peg - peg -inline -switch query.peg - fuzzy_test: go get -u -v github.com/dvyukov/go-fuzz/go-fuzz go get -u -v github.com/dvyukov/go-fuzz/go-fuzz-build go-fuzz-build github.com/tendermint/tendermint/libs/pubsub/query/fuzz_test go-fuzz -bin=./fuzz_test-fuzz.zip -workdir=./fuzz_test/output -.PHONY: gen_query_parser fuzzy_test +.PHONY: fuzzy_test diff --git a/libs/pubsub/query/peg.go b/libs/pubsub/query/peg.go index 816589f02..d4961ed48 100644 --- a/libs/pubsub/query/peg.go +++ b/libs/pubsub/query/peg.go @@ -1,3 +1,10 @@ package query -//go:generate peg -inline -switch query.peg +// Normally I would use go run, +// but the "Code generated by" comment for peg includes the full arg0, +// which includes an unpredictable temporary directory, +// resulting in a nondeterminstic generated source file. +// Using go build is the workaround as detailed in https://github.com/pointlander/peg/issues/129. + +//go:generate go build -o ./.bin/peg github.com/pointlander/peg +//go:generate ./.bin/peg -inline -switch query.peg diff --git a/libs/pubsub/query/query.go b/libs/pubsub/query/query.go index cf6903ccf..0643bc3cb 100644 --- a/libs/pubsub/query/query.go +++ b/libs/pubsub/query/query.go @@ -39,7 +39,9 @@ type Condition struct { // invalid. func New(s string) (*Query, error) { p := &QueryParser{Buffer: fmt.Sprintf(`"%s"`, s)} - p.Init() + if err := p.Init(); err != nil { + return nil, err + } if err := p.Parse(); err != nil { return nil, err } @@ -101,7 +103,7 @@ func (q *Query) Conditions() ([]Condition, error) { buffer, begin, end := q.parser.Buffer, 0, 0 // tokens must be in the following order: tag ("tx.gas") -> operator ("=") -> operand ("7") - for token := range q.parser.Tokens() { + for _, token := range q.parser.Tokens() { switch token.pegRule { case rulePegText: begin, end = int(token.begin), int(token.end) @@ -213,7 +215,7 @@ func (q *Query) Matches(events map[string][]string) (bool, error) { // tokens must be in the following order: // tag ("tx.gas") -> operator ("=") -> operand ("7") - for token := range q.parser.Tokens() { + for _, token := range q.parser.Tokens() { switch token.pegRule { case rulePegText: begin, end = int(token.begin), int(token.end) diff --git a/libs/pubsub/query/query.peg.go b/libs/pubsub/query/query.peg.go index a8e14c869..e2d160652 100644 --- a/libs/pubsub/query/query.peg.go +++ b/libs/pubsub/query/query.peg.go @@ -1,11 +1,14 @@ -// nolint package query +// Code generated by ./.bin/peg -inline -switch query.peg DO NOT EDIT. + import ( "fmt" - "math" + "io" + "os" "sort" "strconv" + "strings" ) const endSymbol rune = 1114112 @@ -35,10 +38,6 @@ const ( rulel ruleg rulePegText - - rulePre - ruleIn - ruleSuf ) var rul3s = [...]string{ @@ -63,10 +62,15 @@ var rul3s = [...]string{ "l", "g", "PegText", +} - "Pre_", - "_In_", - "_Suf", +type token32 struct { + pegRule + begin, end uint32 +} + +func (t *token32) String() string { + return fmt.Sprintf("\x1B[34m%v\x1B[m %v %v", rul3s[t.pegRule], t.begin, t.end) } type node32 struct { @@ -74,57 +78,43 @@ type node32 struct { up, next *node32 } -func (node *node32) print(depth int, buffer string) { - for node != nil { - for c := 0; c < depth; c++ { - fmt.Printf(" ") +func (node *node32) print(w io.Writer, pretty bool, buffer string) { + var print func(node *node32, depth int) + print = func(node *node32, depth int) { + for node != nil { + for c := 0; c < depth; c++ { + fmt.Fprintf(w, " ") + } + rule := rul3s[node.pegRule] + quote := strconv.Quote(string(([]rune(buffer)[node.begin:node.end]))) + if !pretty { + fmt.Fprintf(w, "%v %v\n", rule, quote) + } else { + fmt.Fprintf(w, "\x1B[36m%v\x1B[m %v\n", rule, quote) + } + if node.up != nil { + print(node.up, depth+1) + } + node = node.next } - fmt.Printf("\x1B[34m%v\x1B[m %v\n", rul3s[node.pegRule], strconv.Quote(string(([]rune(buffer)[node.begin:node.end])))) - if node.up != nil { - node.up.print(depth+1, buffer) - } - node = node.next } + print(node, 0) } -func (node *node32) Print(buffer string) { - node.print(0, buffer) +func (node *node32) Print(w io.Writer, buffer string) { + node.print(w, false, buffer) } -type element struct { - node *node32 - down *element -} - -/* ${@} bit structure for abstract syntax tree */ -type token32 struct { - pegRule - begin, end, next uint32 -} - -func (t *token32) isZero() bool { - return t.pegRule == ruleUnknown && t.begin == 0 && t.end == 0 && t.next == 0 -} - -func (t *token32) isParentOf(u token32) bool { - return t.begin <= u.begin && t.end >= u.end && t.next > u.next -} - -func (t *token32) getToken32() token32 { - return token32{pegRule: t.pegRule, begin: uint32(t.begin), end: uint32(t.end), next: uint32(t.next)} -} - -func (t *token32) String() string { - return fmt.Sprintf("\x1B[34m%v\x1B[m %v %v %v", rul3s[t.pegRule], t.begin, t.end, t.next) +func (node *node32) PrettyPrint(w io.Writer, buffer string) { + node.print(w, true, buffer) } type tokens32 struct { - tree []token32 - ordered [][]token32 + tree []token32 } -func (t *tokens32) trim(length int) { - t.tree = t.tree[0:length] +func (t *tokens32) Trim(length uint32) { + t.tree = t.tree[:length] } func (t *tokens32) Print() { @@ -133,51 +123,14 @@ func (t *tokens32) Print() { } } -func (t *tokens32) Order() [][]token32 { - if t.ordered != nil { - return t.ordered - } - - depths := make([]int32, 1, math.MaxInt16) - for i, token := range t.tree { - if token.pegRule == ruleUnknown { - t.tree = t.tree[:i] - break - } - depth := int(token.next) - if length := len(depths); depth >= length { - depths = depths[:depth+1] - } - depths[depth]++ - } - depths = append(depths, 0) - - ordered, pool := make([][]token32, len(depths)), make([]token32, len(t.tree)+len(depths)) - for i, depth := range depths { - depth++ - ordered[i], pool, depths[i] = pool[:depth], pool[depth:], 0 - } - - for i, token := range t.tree { - depth := token.next - token.next = uint32(i) - ordered[depth][depths[depth]] = token - depths[depth]++ - } - t.ordered = ordered - return ordered -} - -type state32 struct { - token32 - depths []int32 - leaf bool -} - func (t *tokens32) AST() *node32 { + type element struct { + node *node32 + down *element + } tokens := t.Tokens() - stack := &element{node: &node32{token32: <-tokens}} - for token := range tokens { + var stack *element + for _, token := range tokens { if token.begin == token.end { continue } @@ -189,180 +142,55 @@ func (t *tokens32) AST() *node32 { } stack = &element{node: node, down: stack} } - return stack.node -} - -func (t *tokens32) PreOrder() (<-chan state32, [][]token32) { - s, ordered := make(chan state32, 6), t.Order() - go func() { - var states [8]state32 - for i := range states { - states[i].depths = make([]int32, len(ordered)) - } - depths, state, depth := make([]int32, len(ordered)), 0, 1 - write := func(t token32, leaf bool) { - S := states[state] - state, S.pegRule, S.begin, S.end, S.next, S.leaf = (state+1)%8, t.pegRule, t.begin, t.end, uint32(depth), leaf - copy(S.depths, depths) - s <- S - } - - states[state].token32 = ordered[0][0] - depths[0]++ - state++ - a, b := ordered[depth-1][depths[depth-1]-1], ordered[depth][depths[depth]] - depthFirstSearch: - for { - for { - if i := depths[depth]; i > 0 { - if c, j := ordered[depth][i-1], depths[depth-1]; a.isParentOf(c) && - (j < 2 || !ordered[depth-1][j-2].isParentOf(c)) { - if c.end != b.begin { - write(token32{pegRule: ruleIn, begin: c.end, end: b.begin}, true) - } - break - } - } - - if a.begin < b.begin { - write(token32{pegRule: rulePre, begin: a.begin, end: b.begin}, true) - } - break - } - - next := depth + 1 - if c := ordered[next][depths[next]]; c.pegRule != ruleUnknown && b.isParentOf(c) { - write(b, false) - depths[depth]++ - depth, a, b = next, b, c - continue - } - - write(b, true) - depths[depth]++ - c, parent := ordered[depth][depths[depth]], true - for { - if c.pegRule != ruleUnknown && a.isParentOf(c) { - b = c - continue depthFirstSearch - } else if parent && b.end != a.end { - write(token32{pegRule: ruleSuf, begin: b.end, end: a.end}, true) - } - - depth-- - if depth > 0 { - a, b, c = ordered[depth-1][depths[depth-1]-1], a, ordered[depth][depths[depth]] - parent = a.isParentOf(b) - continue - } - - break depthFirstSearch - } - } - - close(s) - }() - return s, ordered -} - -func (t *tokens32) PrintSyntax() { - tokens, ordered := t.PreOrder() - max := -1 - for token := range tokens { - if !token.leaf { - fmt.Printf("%v", token.begin) - for i, leaf, depths := 0, int(token.next), token.depths; i < leaf; i++ { - fmt.Printf(" \x1B[36m%v\x1B[m", rul3s[ordered[i][depths[i]-1].pegRule]) - } - fmt.Printf(" \x1B[36m%v\x1B[m\n", rul3s[token.pegRule]) - } else if token.begin == token.end { - fmt.Printf("%v", token.begin) - for i, leaf, depths := 0, int(token.next), token.depths; i < leaf; i++ { - fmt.Printf(" \x1B[31m%v\x1B[m", rul3s[ordered[i][depths[i]-1].pegRule]) - } - fmt.Printf(" \x1B[31m%v\x1B[m\n", rul3s[token.pegRule]) - } else { - for c, end := token.begin, token.end; c < end; c++ { - if i := int(c); max+1 < i { - for j := max; j < i; j++ { - fmt.Printf("skip %v %v\n", j, token.String()) - } - max = i - } else if i := int(c); i <= max { - for j := i; j <= max; j++ { - fmt.Printf("dupe %v %v\n", j, token.String()) - } - } else { - max = int(c) - } - fmt.Printf("%v", c) - for i, leaf, depths := 0, int(token.next), token.depths; i < leaf; i++ { - fmt.Printf(" \x1B[34m%v\x1B[m", rul3s[ordered[i][depths[i]-1].pegRule]) - } - fmt.Printf(" \x1B[34m%v\x1B[m\n", rul3s[token.pegRule]) - } - fmt.Printf("\n") - } + if stack != nil { + return stack.node } + return nil } func (t *tokens32) PrintSyntaxTree(buffer string) { - tokens, _ := t.PreOrder() - for token := range tokens { - for c := 0; c < int(token.next); c++ { - fmt.Printf(" ") - } - fmt.Printf("\x1B[34m%v\x1B[m %v\n", rul3s[token.pegRule], strconv.Quote(string(([]rune(buffer)[token.begin:token.end])))) + t.AST().Print(os.Stdout, buffer) +} + +func (t *tokens32) WriteSyntaxTree(w io.Writer, buffer string) { + t.AST().Print(w, buffer) +} + +func (t *tokens32) PrettyPrintSyntaxTree(buffer string) { + t.AST().PrettyPrint(os.Stdout, buffer) +} + +func (t *tokens32) Add(rule pegRule, begin, end, index uint32) { + tree, i := t.tree, int(index) + if i >= len(tree) { + t.tree = append(tree, token32{pegRule: rule, begin: begin, end: end}) + return } + tree[i] = token32{pegRule: rule, begin: begin, end: end} } -func (t *tokens32) Add(rule pegRule, begin, end, depth uint32, index int) { - t.tree[index] = token32{pegRule: rule, begin: uint32(begin), end: uint32(end), next: uint32(depth)} -} - -func (t *tokens32) Tokens() <-chan token32 { - s := make(chan token32, 16) - go func() { - for _, v := range t.tree { - s <- v.getToken32() - } - close(s) - }() - return s -} - -func (t *tokens32) Error() []token32 { - ordered := t.Order() - length := len(ordered) - tokens, length := make([]token32, length), length-1 - for i := range tokens { - o := ordered[length-i] - if len(o) > 1 { - tokens[i] = o[len(o)-2].getToken32() - } - } - return tokens -} - -func (t *tokens32) Expand(index int) { - tree := t.tree - if index >= len(tree) { - expanded := make([]token32, 2*len(tree)) - copy(expanded, tree) - t.tree = expanded - } +func (t *tokens32) Tokens() []token32 { + return t.tree } type QueryParser struct { Buffer string buffer []rune rules [21]func() bool - Parse func(rule ...int) error - Reset func() + parse func(rule ...int) error + reset func() Pretty bool tokens32 } +func (p *QueryParser) Parse(rule ...int) error { + return p.parse(rule...) +} + +func (p *QueryParser) Reset() { + p.reset() +} + type textPosition struct { line, symbol int } @@ -400,7 +228,7 @@ type parseError struct { } func (e *parseError) Error() string { - tokens, error := []token32{e.max}, "\n" + tokens, err := []token32{e.max}, "\n" positions, p := make([]int, 2*len(tokens)), 0 for _, token := range tokens { positions[p], p = int(token.begin), p+1 @@ -413,35 +241,74 @@ func (e *parseError) Error() string { } for _, token := range tokens { begin, end := int(token.begin), int(token.end) - error += fmt.Sprintf(format, + err += fmt.Sprintf(format, rul3s[token.pegRule], translations[begin].line, translations[begin].symbol, translations[end].line, translations[end].symbol, strconv.Quote(string(e.p.buffer[begin:end]))) } - return error + return err } func (p *QueryParser) PrintSyntaxTree() { - p.tokens32.PrintSyntaxTree(p.Buffer) -} - -func (p *QueryParser) Highlighter() { - p.PrintSyntax() -} - -func (p *QueryParser) Init() { - p.buffer = []rune(p.Buffer) - if len(p.buffer) == 0 || p.buffer[len(p.buffer)-1] != endSymbol { - p.buffer = append(p.buffer, endSymbol) + if p.Pretty { + p.tokens32.PrettyPrintSyntaxTree(p.Buffer) + } else { + p.tokens32.PrintSyntaxTree(p.Buffer) } +} - tree := tokens32{tree: make([]token32, math.MaxInt16)} - var max token32 - position, depth, tokenIndex, buffer, _rules := uint32(0), uint32(0), 0, p.buffer, p.rules +func (p *QueryParser) WriteSyntaxTree(w io.Writer) { + p.tokens32.WriteSyntaxTree(w, p.Buffer) +} - p.Parse = func(rule ...int) error { +func (p *QueryParser) SprintSyntaxTree() string { + var bldr strings.Builder + p.WriteSyntaxTree(&bldr) + return bldr.String() +} + +func Pretty(pretty bool) func(*QueryParser) error { + return func(p *QueryParser) error { + p.Pretty = pretty + return nil + } +} + +func Size(size int) func(*QueryParser) error { + return func(p *QueryParser) error { + p.tokens32 = tokens32{tree: make([]token32, 0, size)} + return nil + } +} +func (p *QueryParser) Init(options ...func(*QueryParser) error) error { + var ( + max token32 + position, tokenIndex uint32 + buffer []rune + ) + for _, option := range options { + err := option(p) + if err != nil { + return err + } + } + p.reset = func() { + max = token32{} + position, tokenIndex = 0, 0 + + p.buffer = []rune(p.Buffer) + if len(p.buffer) == 0 || p.buffer[len(p.buffer)-1] != endSymbol { + p.buffer = append(p.buffer, endSymbol) + } + buffer = p.buffer + } + p.reset() + + _rules := p.rules + tree := p.tokens32 + p.parse = func(rule ...int) error { r := 1 if len(rule) > 0 { r = rule[0] @@ -449,22 +316,17 @@ func (p *QueryParser) Init() { matches := p.rules[r]() p.tokens32 = tree if matches { - p.trim(tokenIndex) + p.Trim(tokenIndex) return nil } return &parseError{p, max} } - p.Reset = func() { - position, tokenIndex, depth = 0, 0, 0 - } - add := func(rule pegRule, begin uint32) { - tree.Expand(tokenIndex) - tree.Add(rule, begin, position, depth, tokenIndex) + tree.Add(rule, begin, position, tokenIndex) tokenIndex++ if begin != position && position > max.end { - max = token32{rule, begin, position, depth} + max = token32{rule, begin, position} } } @@ -496,10 +358,9 @@ func (p *QueryParser) Init() { nil, /* 0 e <- <('"' condition (' '+ and ' '+ condition)* '"' !.)> */ func() bool { - position0, tokenIndex0, depth0 := position, tokenIndex, depth + position0, tokenIndex0 := position, tokenIndex { position1 := position - depth++ if buffer[position] != rune('"') { goto l0 } @@ -509,34 +370,33 @@ func (p *QueryParser) Init() { } l2: { - position3, tokenIndex3, depth3 := position, tokenIndex, depth + position3, tokenIndex3 := position, tokenIndex if buffer[position] != rune(' ') { goto l3 } position++ l4: { - position5, tokenIndex5, depth5 := position, tokenIndex, depth + position5, tokenIndex5 := position, tokenIndex if buffer[position] != rune(' ') { goto l5 } position++ goto l4 l5: - position, tokenIndex, depth = position5, tokenIndex5, depth5 + position, tokenIndex = position5, tokenIndex5 } { position6 := position - depth++ { - position7, tokenIndex7, depth7 := position, tokenIndex, depth + position7, tokenIndex7 := position, tokenIndex if buffer[position] != rune('a') { goto l8 } position++ goto l7 l8: - position, tokenIndex, depth = position7, tokenIndex7, depth7 + position, tokenIndex = position7, tokenIndex7 if buffer[position] != rune('A') { goto l3 } @@ -544,14 +404,14 @@ func (p *QueryParser) Init() { } l7: { - position9, tokenIndex9, depth9 := position, tokenIndex, depth + position9, tokenIndex9 := position, tokenIndex if buffer[position] != rune('n') { goto l10 } position++ goto l9 l10: - position, tokenIndex, depth = position9, tokenIndex9, depth9 + position, tokenIndex = position9, tokenIndex9 if buffer[position] != rune('N') { goto l3 } @@ -559,21 +419,20 @@ func (p *QueryParser) Init() { } l9: { - position11, tokenIndex11, depth11 := position, tokenIndex, depth + position11, tokenIndex11 := position, tokenIndex if buffer[position] != rune('d') { goto l12 } position++ goto l11 l12: - position, tokenIndex, depth = position11, tokenIndex11, depth11 + position, tokenIndex = position11, tokenIndex11 if buffer[position] != rune('D') { goto l3 } position++ } l11: - depth-- add(ruleand, position6) } if buffer[position] != rune(' ') { @@ -582,57 +441,53 @@ func (p *QueryParser) Init() { position++ l13: { - position14, tokenIndex14, depth14 := position, tokenIndex, depth + position14, tokenIndex14 := position, tokenIndex if buffer[position] != rune(' ') { goto l14 } position++ goto l13 l14: - position, tokenIndex, depth = position14, tokenIndex14, depth14 + position, tokenIndex = position14, tokenIndex14 } if !_rules[rulecondition]() { goto l3 } goto l2 l3: - position, tokenIndex, depth = position3, tokenIndex3, depth3 + position, tokenIndex = position3, tokenIndex3 } if buffer[position] != rune('"') { goto l0 } position++ { - position15, tokenIndex15, depth15 := position, tokenIndex, depth + position15, tokenIndex15 := position, tokenIndex if !matchDot() { goto l15 } goto l0 l15: - position, tokenIndex, depth = position15, tokenIndex15, depth15 + position, tokenIndex = position15, tokenIndex15 } - depth-- add(rulee, position1) } return true l0: - position, tokenIndex, depth = position0, tokenIndex0, depth0 + position, tokenIndex = position0, tokenIndex0 return false }, /* 1 condition <- <(tag ' '* ((le ' '* ((&('D' | 'd') date) | (&('T' | 't') time) | (&('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') number))) / (ge ' '* ((&('D' | 'd') date) | (&('T' | 't') time) | (&('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') number))) / ((&('E' | 'e') exists) | (&('=') (equal ' '* ((&('\'') value) | (&('D' | 'd') date) | (&('T' | 't') time) | (&('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') number)))) | (&('>') (g ' '* ((&('D' | 'd') date) | (&('T' | 't') time) | (&('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') number)))) | (&('<') (l ' '* ((&('D' | 'd') date) | (&('T' | 't') time) | (&('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') number)))) | (&('C' | 'c') (contains ' '* value)))))> */ func() bool { - position16, tokenIndex16, depth16 := position, tokenIndex, depth + position16, tokenIndex16 := position, tokenIndex { position17 := position - depth++ { position18 := position - depth++ { position19 := position - depth++ { - position22, tokenIndex22, depth22 := position, tokenIndex, depth + position22, tokenIndex22 := position, tokenIndex { switch buffer[position] { case '<': @@ -640,88 +495,76 @@ func (p *QueryParser) Init() { goto l22 } position++ - break case '>': if buffer[position] != rune('>') { goto l22 } position++ - break case '=': if buffer[position] != rune('=') { goto l22 } position++ - break case '\'': if buffer[position] != rune('\'') { goto l22 } position++ - break case '"': if buffer[position] != rune('"') { goto l22 } position++ - break case ')': if buffer[position] != rune(')') { goto l22 } position++ - break case '(': if buffer[position] != rune('(') { goto l22 } position++ - break case '\\': if buffer[position] != rune('\\') { goto l22 } position++ - break case '\r': if buffer[position] != rune('\r') { goto l22 } position++ - break case '\n': if buffer[position] != rune('\n') { goto l22 } position++ - break case '\t': if buffer[position] != rune('\t') { goto l22 } position++ - break default: if buffer[position] != rune(' ') { goto l22 } position++ - break } } goto l16 l22: - position, tokenIndex, depth = position22, tokenIndex22, depth22 + position, tokenIndex = position22, tokenIndex22 } if !matchDot() { goto l16 } l20: { - position21, tokenIndex21, depth21 := position, tokenIndex, depth + position21, tokenIndex21 := position, tokenIndex { - position24, tokenIndex24, depth24 := position, tokenIndex, depth + position24, tokenIndex24 := position, tokenIndex { switch buffer[position] { case '<': @@ -729,109 +572,94 @@ func (p *QueryParser) Init() { goto l24 } position++ - break case '>': if buffer[position] != rune('>') { goto l24 } position++ - break case '=': if buffer[position] != rune('=') { goto l24 } position++ - break case '\'': if buffer[position] != rune('\'') { goto l24 } position++ - break case '"': if buffer[position] != rune('"') { goto l24 } position++ - break case ')': if buffer[position] != rune(')') { goto l24 } position++ - break case '(': if buffer[position] != rune('(') { goto l24 } position++ - break case '\\': if buffer[position] != rune('\\') { goto l24 } position++ - break case '\r': if buffer[position] != rune('\r') { goto l24 } position++ - break case '\n': if buffer[position] != rune('\n') { goto l24 } position++ - break case '\t': if buffer[position] != rune('\t') { goto l24 } position++ - break default: if buffer[position] != rune(' ') { goto l24 } position++ - break } } goto l21 l24: - position, tokenIndex, depth = position24, tokenIndex24, depth24 + position, tokenIndex = position24, tokenIndex24 } if !matchDot() { goto l21 } goto l20 l21: - position, tokenIndex, depth = position21, tokenIndex21, depth21 + position, tokenIndex = position21, tokenIndex21 } - depth-- add(rulePegText, position19) } - depth-- add(ruletag, position18) } l26: { - position27, tokenIndex27, depth27 := position, tokenIndex, depth + position27, tokenIndex27 := position, tokenIndex if buffer[position] != rune(' ') { goto l27 } position++ goto l26 l27: - position, tokenIndex, depth = position27, tokenIndex27, depth27 + position, tokenIndex = position27, tokenIndex27 } { - position28, tokenIndex28, depth28 := position, tokenIndex, depth + position28, tokenIndex28 := position, tokenIndex { position30 := position - depth++ if buffer[position] != rune('<') { goto l29 } @@ -840,19 +668,18 @@ func (p *QueryParser) Init() { goto l29 } position++ - depth-- add(rulele, position30) } l31: { - position32, tokenIndex32, depth32 := position, tokenIndex, depth + position32, tokenIndex32 := position, tokenIndex if buffer[position] != rune(' ') { goto l32 } position++ goto l31 l32: - position, tokenIndex, depth = position32, tokenIndex32, depth32 + position, tokenIndex = position32, tokenIndex32 } { switch buffer[position] { @@ -860,26 +687,22 @@ func (p *QueryParser) Init() { if !_rules[ruledate]() { goto l29 } - break case 'T', 't': if !_rules[ruletime]() { goto l29 } - break default: if !_rules[rulenumber]() { goto l29 } - break } } goto l28 l29: - position, tokenIndex, depth = position28, tokenIndex28, depth28 + position, tokenIndex = position28, tokenIndex28 { position35 := position - depth++ if buffer[position] != rune('>') { goto l34 } @@ -888,19 +711,18 @@ func (p *QueryParser) Init() { goto l34 } position++ - depth-- add(rulege, position35) } l36: { - position37, tokenIndex37, depth37 := position, tokenIndex, depth + position37, tokenIndex37 := position, tokenIndex if buffer[position] != rune(' ') { goto l37 } position++ goto l36 l37: - position, tokenIndex, depth = position37, tokenIndex37, depth37 + position, tokenIndex = position37, tokenIndex37 } { switch buffer[position] { @@ -908,38 +730,34 @@ func (p *QueryParser) Init() { if !_rules[ruledate]() { goto l34 } - break case 'T', 't': if !_rules[ruletime]() { goto l34 } - break default: if !_rules[rulenumber]() { goto l34 } - break } } goto l28 l34: - position, tokenIndex, depth = position28, tokenIndex28, depth28 + position, tokenIndex = position28, tokenIndex28 { switch buffer[position] { case 'E', 'e': { position40 := position - depth++ { - position41, tokenIndex41, depth41 := position, tokenIndex, depth + position41, tokenIndex41 := position, tokenIndex if buffer[position] != rune('e') { goto l42 } position++ goto l41 l42: - position, tokenIndex, depth = position41, tokenIndex41, depth41 + position, tokenIndex = position41, tokenIndex41 if buffer[position] != rune('E') { goto l16 } @@ -947,14 +765,14 @@ func (p *QueryParser) Init() { } l41: { - position43, tokenIndex43, depth43 := position, tokenIndex, depth + position43, tokenIndex43 := position, tokenIndex if buffer[position] != rune('x') { goto l44 } position++ goto l43 l44: - position, tokenIndex, depth = position43, tokenIndex43, depth43 + position, tokenIndex = position43, tokenIndex43 if buffer[position] != rune('X') { goto l16 } @@ -962,14 +780,14 @@ func (p *QueryParser) Init() { } l43: { - position45, tokenIndex45, depth45 := position, tokenIndex, depth + position45, tokenIndex45 := position, tokenIndex if buffer[position] != rune('i') { goto l46 } position++ goto l45 l46: - position, tokenIndex, depth = position45, tokenIndex45, depth45 + position, tokenIndex = position45, tokenIndex45 if buffer[position] != rune('I') { goto l16 } @@ -977,14 +795,14 @@ func (p *QueryParser) Init() { } l45: { - position47, tokenIndex47, depth47 := position, tokenIndex, depth + position47, tokenIndex47 := position, tokenIndex if buffer[position] != rune('s') { goto l48 } position++ goto l47 l48: - position, tokenIndex, depth = position47, tokenIndex47, depth47 + position, tokenIndex = position47, tokenIndex47 if buffer[position] != rune('S') { goto l16 } @@ -992,14 +810,14 @@ func (p *QueryParser) Init() { } l47: { - position49, tokenIndex49, depth49 := position, tokenIndex, depth + position49, tokenIndex49 := position, tokenIndex if buffer[position] != rune('t') { goto l50 } position++ goto l49 l50: - position, tokenIndex, depth = position49, tokenIndex49, depth49 + position, tokenIndex = position49, tokenIndex49 if buffer[position] != rune('T') { goto l16 } @@ -1007,45 +825,41 @@ func (p *QueryParser) Init() { } l49: { - position51, tokenIndex51, depth51 := position, tokenIndex, depth + position51, tokenIndex51 := position, tokenIndex if buffer[position] != rune('s') { goto l52 } position++ goto l51 l52: - position, tokenIndex, depth = position51, tokenIndex51, depth51 + position, tokenIndex = position51, tokenIndex51 if buffer[position] != rune('S') { goto l16 } position++ } l51: - depth-- add(ruleexists, position40) } - break case '=': { position53 := position - depth++ if buffer[position] != rune('=') { goto l16 } position++ - depth-- add(ruleequal, position53) } l54: { - position55, tokenIndex55, depth55 := position, tokenIndex, depth + position55, tokenIndex55 := position, tokenIndex if buffer[position] != rune(' ') { goto l55 } position++ goto l54 l55: - position, tokenIndex, depth = position55, tokenIndex55, depth55 + position, tokenIndex = position55, tokenIndex55 } { switch buffer[position] { @@ -1053,47 +867,40 @@ func (p *QueryParser) Init() { if !_rules[rulevalue]() { goto l16 } - break case 'D', 'd': if !_rules[ruledate]() { goto l16 } - break case 'T', 't': if !_rules[ruletime]() { goto l16 } - break default: if !_rules[rulenumber]() { goto l16 } - break } } - break case '>': { position57 := position - depth++ if buffer[position] != rune('>') { goto l16 } position++ - depth-- add(ruleg, position57) } l58: { - position59, tokenIndex59, depth59 := position, tokenIndex, depth + position59, tokenIndex59 := position, tokenIndex if buffer[position] != rune(' ') { goto l59 } position++ goto l58 l59: - position, tokenIndex, depth = position59, tokenIndex59, depth59 + position, tokenIndex = position59, tokenIndex59 } { switch buffer[position] { @@ -1101,42 +908,36 @@ func (p *QueryParser) Init() { if !_rules[ruledate]() { goto l16 } - break case 'T', 't': if !_rules[ruletime]() { goto l16 } - break default: if !_rules[rulenumber]() { goto l16 } - break } } - break case '<': { position61 := position - depth++ if buffer[position] != rune('<') { goto l16 } position++ - depth-- add(rulel, position61) } l62: { - position63, tokenIndex63, depth63 := position, tokenIndex, depth + position63, tokenIndex63 := position, tokenIndex if buffer[position] != rune(' ') { goto l63 } position++ goto l62 l63: - position, tokenIndex, depth = position63, tokenIndex63, depth63 + position, tokenIndex = position63, tokenIndex63 } { switch buffer[position] { @@ -1144,34 +945,29 @@ func (p *QueryParser) Init() { if !_rules[ruledate]() { goto l16 } - break case 'T', 't': if !_rules[ruletime]() { goto l16 } - break default: if !_rules[rulenumber]() { goto l16 } - break } } - break default: { position65 := position - depth++ { - position66, tokenIndex66, depth66 := position, tokenIndex, depth + position66, tokenIndex66 := position, tokenIndex if buffer[position] != rune('c') { goto l67 } position++ goto l66 l67: - position, tokenIndex, depth = position66, tokenIndex66, depth66 + position, tokenIndex = position66, tokenIndex66 if buffer[position] != rune('C') { goto l16 } @@ -1179,14 +975,14 @@ func (p *QueryParser) Init() { } l66: { - position68, tokenIndex68, depth68 := position, tokenIndex, depth + position68, tokenIndex68 := position, tokenIndex if buffer[position] != rune('o') { goto l69 } position++ goto l68 l69: - position, tokenIndex, depth = position68, tokenIndex68, depth68 + position, tokenIndex = position68, tokenIndex68 if buffer[position] != rune('O') { goto l16 } @@ -1194,14 +990,14 @@ func (p *QueryParser) Init() { } l68: { - position70, tokenIndex70, depth70 := position, tokenIndex, depth + position70, tokenIndex70 := position, tokenIndex if buffer[position] != rune('n') { goto l71 } position++ goto l70 l71: - position, tokenIndex, depth = position70, tokenIndex70, depth70 + position, tokenIndex = position70, tokenIndex70 if buffer[position] != rune('N') { goto l16 } @@ -1209,14 +1005,14 @@ func (p *QueryParser) Init() { } l70: { - position72, tokenIndex72, depth72 := position, tokenIndex, depth + position72, tokenIndex72 := position, tokenIndex if buffer[position] != rune('t') { goto l73 } position++ goto l72 l73: - position, tokenIndex, depth = position72, tokenIndex72, depth72 + position, tokenIndex = position72, tokenIndex72 if buffer[position] != rune('T') { goto l16 } @@ -1224,14 +1020,14 @@ func (p *QueryParser) Init() { } l72: { - position74, tokenIndex74, depth74 := position, tokenIndex, depth + position74, tokenIndex74 := position, tokenIndex if buffer[position] != rune('a') { goto l75 } position++ goto l74 l75: - position, tokenIndex, depth = position74, tokenIndex74, depth74 + position, tokenIndex = position74, tokenIndex74 if buffer[position] != rune('A') { goto l16 } @@ -1239,14 +1035,14 @@ func (p *QueryParser) Init() { } l74: { - position76, tokenIndex76, depth76 := position, tokenIndex, depth + position76, tokenIndex76 := position, tokenIndex if buffer[position] != rune('i') { goto l77 } position++ goto l76 l77: - position, tokenIndex, depth = position76, tokenIndex76, depth76 + position, tokenIndex = position76, tokenIndex76 if buffer[position] != rune('I') { goto l16 } @@ -1254,14 +1050,14 @@ func (p *QueryParser) Init() { } l76: { - position78, tokenIndex78, depth78 := position, tokenIndex, depth + position78, tokenIndex78 := position, tokenIndex if buffer[position] != rune('n') { goto l79 } position++ goto l78 l79: - position, tokenIndex, depth = position78, tokenIndex78, depth78 + position, tokenIndex = position78, tokenIndex78 if buffer[position] != rune('N') { goto l16 } @@ -1269,80 +1065,75 @@ func (p *QueryParser) Init() { } l78: { - position80, tokenIndex80, depth80 := position, tokenIndex, depth + position80, tokenIndex80 := position, tokenIndex if buffer[position] != rune('s') { goto l81 } position++ goto l80 l81: - position, tokenIndex, depth = position80, tokenIndex80, depth80 + position, tokenIndex = position80, tokenIndex80 if buffer[position] != rune('S') { goto l16 } position++ } l80: - depth-- add(rulecontains, position65) } l82: { - position83, tokenIndex83, depth83 := position, tokenIndex, depth + position83, tokenIndex83 := position, tokenIndex if buffer[position] != rune(' ') { goto l83 } position++ goto l82 l83: - position, tokenIndex, depth = position83, tokenIndex83, depth83 + position, tokenIndex = position83, tokenIndex83 } if !_rules[rulevalue]() { goto l16 } - break } } } l28: - depth-- add(rulecondition, position17) } return true l16: - position, tokenIndex, depth = position16, tokenIndex16, depth16 + position, tokenIndex = position16, tokenIndex16 return false }, /* 2 tag <- <<(!((&('<') '<') | (&('>') '>') | (&('=') '=') | (&('\'') '\'') | (&('"') '"') | (&(')') ')') | (&('(') '(') | (&('\\') '\\') | (&('\r') '\r') | (&('\n') '\n') | (&('\t') '\t') | (&(' ') ' ')) .)+>> */ nil, /* 3 value <- <<('\'' (!('"' / '\'') .)* '\'')>> */ func() bool { - position85, tokenIndex85, depth85 := position, tokenIndex, depth + position85, tokenIndex85 := position, tokenIndex { position86 := position - depth++ { position87 := position - depth++ if buffer[position] != rune('\'') { goto l85 } position++ l88: { - position89, tokenIndex89, depth89 := position, tokenIndex, depth + position89, tokenIndex89 := position, tokenIndex { - position90, tokenIndex90, depth90 := position, tokenIndex, depth + position90, tokenIndex90 := position, tokenIndex { - position91, tokenIndex91, depth91 := position, tokenIndex, depth + position91, tokenIndex91 := position, tokenIndex if buffer[position] != rune('"') { goto l92 } position++ goto l91 l92: - position, tokenIndex, depth = position91, tokenIndex91, depth91 + position, tokenIndex = position91, tokenIndex91 if buffer[position] != rune('\'') { goto l90 } @@ -1351,129 +1142,120 @@ func (p *QueryParser) Init() { l91: goto l89 l90: - position, tokenIndex, depth = position90, tokenIndex90, depth90 + position, tokenIndex = position90, tokenIndex90 } if !matchDot() { goto l89 } goto l88 l89: - position, tokenIndex, depth = position89, tokenIndex89, depth89 + position, tokenIndex = position89, tokenIndex89 } if buffer[position] != rune('\'') { goto l85 } position++ - depth-- add(rulePegText, position87) } - depth-- add(rulevalue, position86) } return true l85: - position, tokenIndex, depth = position85, tokenIndex85, depth85 + position, tokenIndex = position85, tokenIndex85 return false }, /* 4 number <- <<('0' / ([1-9] digit* ('.' digit*)?))>> */ func() bool { - position93, tokenIndex93, depth93 := position, tokenIndex, depth + position93, tokenIndex93 := position, tokenIndex { position94 := position - depth++ { position95 := position - depth++ { - position96, tokenIndex96, depth96 := position, tokenIndex, depth + position96, tokenIndex96 := position, tokenIndex if buffer[position] != rune('0') { goto l97 } position++ goto l96 l97: - position, tokenIndex, depth = position96, tokenIndex96, depth96 + position, tokenIndex = position96, tokenIndex96 if c := buffer[position]; c < rune('1') || c > rune('9') { goto l93 } position++ l98: { - position99, tokenIndex99, depth99 := position, tokenIndex, depth + position99, tokenIndex99 := position, tokenIndex if !_rules[ruledigit]() { goto l99 } goto l98 l99: - position, tokenIndex, depth = position99, tokenIndex99, depth99 + position, tokenIndex = position99, tokenIndex99 } { - position100, tokenIndex100, depth100 := position, tokenIndex, depth + position100, tokenIndex100 := position, tokenIndex if buffer[position] != rune('.') { goto l100 } position++ l102: { - position103, tokenIndex103, depth103 := position, tokenIndex, depth + position103, tokenIndex103 := position, tokenIndex if !_rules[ruledigit]() { goto l103 } goto l102 l103: - position, tokenIndex, depth = position103, tokenIndex103, depth103 + position, tokenIndex = position103, tokenIndex103 } goto l101 l100: - position, tokenIndex, depth = position100, tokenIndex100, depth100 + position, tokenIndex = position100, tokenIndex100 } l101: } l96: - depth-- add(rulePegText, position95) } - depth-- add(rulenumber, position94) } return true l93: - position, tokenIndex, depth = position93, tokenIndex93, depth93 + position, tokenIndex = position93, tokenIndex93 return false }, /* 5 digit <- <[0-9]> */ func() bool { - position104, tokenIndex104, depth104 := position, tokenIndex, depth + position104, tokenIndex104 := position, tokenIndex { position105 := position - depth++ if c := buffer[position]; c < rune('0') || c > rune('9') { goto l104 } position++ - depth-- add(ruledigit, position105) } return true l104: - position, tokenIndex, depth = position104, tokenIndex104, depth104 + position, tokenIndex = position104, tokenIndex104 return false }, /* 6 time <- <(('t' / 'T') ('i' / 'I') ('m' / 'M') ('e' / 'E') ' ' <(year '-' month '-' day 'T' digit digit ':' digit digit ':' digit digit ((('-' / '+') digit digit ':' digit digit) / 'Z'))>)> */ func() bool { - position106, tokenIndex106, depth106 := position, tokenIndex, depth + position106, tokenIndex106 := position, tokenIndex { position107 := position - depth++ { - position108, tokenIndex108, depth108 := position, tokenIndex, depth + position108, tokenIndex108 := position, tokenIndex if buffer[position] != rune('t') { goto l109 } position++ goto l108 l109: - position, tokenIndex, depth = position108, tokenIndex108, depth108 + position, tokenIndex = position108, tokenIndex108 if buffer[position] != rune('T') { goto l106 } @@ -1481,14 +1263,14 @@ func (p *QueryParser) Init() { } l108: { - position110, tokenIndex110, depth110 := position, tokenIndex, depth + position110, tokenIndex110 := position, tokenIndex if buffer[position] != rune('i') { goto l111 } position++ goto l110 l111: - position, tokenIndex, depth = position110, tokenIndex110, depth110 + position, tokenIndex = position110, tokenIndex110 if buffer[position] != rune('I') { goto l106 } @@ -1496,14 +1278,14 @@ func (p *QueryParser) Init() { } l110: { - position112, tokenIndex112, depth112 := position, tokenIndex, depth + position112, tokenIndex112 := position, tokenIndex if buffer[position] != rune('m') { goto l113 } position++ goto l112 l113: - position, tokenIndex, depth = position112, tokenIndex112, depth112 + position, tokenIndex = position112, tokenIndex112 if buffer[position] != rune('M') { goto l106 } @@ -1511,14 +1293,14 @@ func (p *QueryParser) Init() { } l112: { - position114, tokenIndex114, depth114 := position, tokenIndex, depth + position114, tokenIndex114 := position, tokenIndex if buffer[position] != rune('e') { goto l115 } position++ goto l114 l115: - position, tokenIndex, depth = position114, tokenIndex114, depth114 + position, tokenIndex = position114, tokenIndex114 if buffer[position] != rune('E') { goto l106 } @@ -1531,7 +1313,6 @@ func (p *QueryParser) Init() { position++ { position116 := position - depth++ if !_rules[ruleyear]() { goto l106 } @@ -1580,16 +1361,16 @@ func (p *QueryParser) Init() { goto l106 } { - position117, tokenIndex117, depth117 := position, tokenIndex, depth + position117, tokenIndex117 := position, tokenIndex { - position119, tokenIndex119, depth119 := position, tokenIndex, depth + position119, tokenIndex119 := position, tokenIndex if buffer[position] != rune('-') { goto l120 } position++ goto l119 l120: - position, tokenIndex, depth = position119, tokenIndex119, depth119 + position, tokenIndex = position119, tokenIndex119 if buffer[position] != rune('+') { goto l118 } @@ -1614,39 +1395,36 @@ func (p *QueryParser) Init() { } goto l117 l118: - position, tokenIndex, depth = position117, tokenIndex117, depth117 + position, tokenIndex = position117, tokenIndex117 if buffer[position] != rune('Z') { goto l106 } position++ } l117: - depth-- add(rulePegText, position116) } - depth-- add(ruletime, position107) } return true l106: - position, tokenIndex, depth = position106, tokenIndex106, depth106 + position, tokenIndex = position106, tokenIndex106 return false }, /* 7 date <- <(('d' / 'D') ('a' / 'A') ('t' / 'T') ('e' / 'E') ' ' <(year '-' month '-' day)>)> */ func() bool { - position121, tokenIndex121, depth121 := position, tokenIndex, depth + position121, tokenIndex121 := position, tokenIndex { position122 := position - depth++ { - position123, tokenIndex123, depth123 := position, tokenIndex, depth + position123, tokenIndex123 := position, tokenIndex if buffer[position] != rune('d') { goto l124 } position++ goto l123 l124: - position, tokenIndex, depth = position123, tokenIndex123, depth123 + position, tokenIndex = position123, tokenIndex123 if buffer[position] != rune('D') { goto l121 } @@ -1654,14 +1432,14 @@ func (p *QueryParser) Init() { } l123: { - position125, tokenIndex125, depth125 := position, tokenIndex, depth + position125, tokenIndex125 := position, tokenIndex if buffer[position] != rune('a') { goto l126 } position++ goto l125 l126: - position, tokenIndex, depth = position125, tokenIndex125, depth125 + position, tokenIndex = position125, tokenIndex125 if buffer[position] != rune('A') { goto l121 } @@ -1669,14 +1447,14 @@ func (p *QueryParser) Init() { } l125: { - position127, tokenIndex127, depth127 := position, tokenIndex, depth + position127, tokenIndex127 := position, tokenIndex if buffer[position] != rune('t') { goto l128 } position++ goto l127 l128: - position, tokenIndex, depth = position127, tokenIndex127, depth127 + position, tokenIndex = position127, tokenIndex127 if buffer[position] != rune('T') { goto l121 } @@ -1684,14 +1462,14 @@ func (p *QueryParser) Init() { } l127: { - position129, tokenIndex129, depth129 := position, tokenIndex, depth + position129, tokenIndex129 := position, tokenIndex if buffer[position] != rune('e') { goto l130 } position++ goto l129 l130: - position, tokenIndex, depth = position129, tokenIndex129, depth129 + position, tokenIndex = position129, tokenIndex129 if buffer[position] != rune('E') { goto l121 } @@ -1704,7 +1482,6 @@ func (p *QueryParser) Init() { position++ { position131 := position - depth++ if !_rules[ruleyear]() { goto l121 } @@ -1722,32 +1499,29 @@ func (p *QueryParser) Init() { if !_rules[ruleday]() { goto l121 } - depth-- add(rulePegText, position131) } - depth-- add(ruledate, position122) } return true l121: - position, tokenIndex, depth = position121, tokenIndex121, depth121 + position, tokenIndex = position121, tokenIndex121 return false }, /* 8 year <- <(('1' / '2') digit digit digit)> */ func() bool { - position132, tokenIndex132, depth132 := position, tokenIndex, depth + position132, tokenIndex132 := position, tokenIndex { position133 := position - depth++ { - position134, tokenIndex134, depth134 := position, tokenIndex, depth + position134, tokenIndex134 := position, tokenIndex if buffer[position] != rune('1') { goto l135 } position++ goto l134 l135: - position, tokenIndex, depth = position134, tokenIndex134, depth134 + position, tokenIndex = position134, tokenIndex134 if buffer[position] != rune('2') { goto l132 } @@ -1763,29 +1537,27 @@ func (p *QueryParser) Init() { if !_rules[ruledigit]() { goto l132 } - depth-- add(ruleyear, position133) } return true l132: - position, tokenIndex, depth = position132, tokenIndex132, depth132 + position, tokenIndex = position132, tokenIndex132 return false }, /* 9 month <- <(('0' / '1') digit)> */ func() bool { - position136, tokenIndex136, depth136 := position, tokenIndex, depth + position136, tokenIndex136 := position, tokenIndex { position137 := position - depth++ { - position138, tokenIndex138, depth138 := position, tokenIndex, depth + position138, tokenIndex138 := position, tokenIndex if buffer[position] != rune('0') { goto l139 } position++ goto l138 l139: - position, tokenIndex, depth = position138, tokenIndex138, depth138 + position, tokenIndex = position138, tokenIndex138 if buffer[position] != rune('1') { goto l136 } @@ -1795,20 +1567,18 @@ func (p *QueryParser) Init() { if !_rules[ruledigit]() { goto l136 } - depth-- add(rulemonth, position137) } return true l136: - position, tokenIndex, depth = position136, tokenIndex136, depth136 + position, tokenIndex = position136, tokenIndex136 return false }, /* 10 day <- <(((&('3') '3') | (&('2') '2') | (&('1') '1') | (&('0') '0')) digit)> */ func() bool { - position140, tokenIndex140, depth140 := position, tokenIndex, depth + position140, tokenIndex140 := position, tokenIndex { position141 := position - depth++ { switch buffer[position] { case '3': @@ -1816,37 +1586,32 @@ func (p *QueryParser) Init() { goto l140 } position++ - break case '2': if buffer[position] != rune('2') { goto l140 } position++ - break case '1': if buffer[position] != rune('1') { goto l140 } position++ - break default: if buffer[position] != rune('0') { goto l140 } position++ - break } } if !_rules[ruledigit]() { goto l140 } - depth-- add(ruleday, position141) } return true l140: - position, tokenIndex, depth = position140, tokenIndex140, depth140 + position, tokenIndex = position140, tokenIndex140 return false }, /* 11 and <- <(('a' / 'A') ('n' / 'N') ('d' / 'D'))> */ @@ -1868,4 +1633,5 @@ func (p *QueryParser) Init() { nil, } p.rules = _rules + return nil } diff --git a/libs/tempfile/tempfile_test.go b/libs/tempfile/tempfile_test.go index 9d07f806b..6ccdb22dd 100644 --- a/libs/tempfile/tempfile_test.go +++ b/libs/tempfile/tempfile_test.go @@ -5,7 +5,6 @@ package tempfile import ( "bytes" "fmt" - "io/ioutil" "os" testing "testing" @@ -21,13 +20,13 @@ func TestWriteFileAtomic(t *testing.T) { perm os.FileMode = 0600 ) - f, err := ioutil.TempFile("/tmp", "write-atomic-test-") + f, err := os.CreateTemp("/tmp", "write-atomic-test-") if err != nil { t.Fatal(err) } defer os.Remove(f.Name()) - if err = ioutil.WriteFile(f.Name(), old, 0600); err != nil { + if err = os.WriteFile(f.Name(), old, 0600); err != nil { t.Fatal(err) } @@ -35,7 +34,7 @@ func TestWriteFileAtomic(t *testing.T) { t.Fatal(err) } - rData, err := ioutil.ReadFile(f.Name()) + rData, err := os.ReadFile(f.Name()) if err != nil { t.Fatal(err) } @@ -80,11 +79,11 @@ func TestWriteFileAtomicDuplicateFile(t *testing.T) { err = WriteFileAtomic(fileToWrite, []byte(expectedString), 0777) require.NoError(t, err) // Check that the first atomic file was untouched - firstAtomicFileBytes, err := ioutil.ReadFile(fname) + firstAtomicFileBytes, err := os.ReadFile(fname) require.NoError(t, err, "Error reading first atomic file") require.Equal(t, []byte(testString), firstAtomicFileBytes, "First atomic file was overwritten") // Check that the resultant file is correct - resultantFileBytes, err := ioutil.ReadFile(fileToWrite) + resultantFileBytes, err := os.ReadFile(fileToWrite) require.NoError(t, err, "Error reading resultant file") require.Equal(t, []byte(expectedString), resultantFileBytes, "Written file had incorrect bytes") @@ -131,14 +130,14 @@ func TestWriteFileAtomicManyDuplicates(t *testing.T) { for i := 0; i < atomicWriteFileMaxNumConflicts+2; i++ { fileRand := randWriteFileSuffix() fname := "/tmp/" + atomicWriteFilePrefix + fileRand - firstAtomicFileBytes, err := ioutil.ReadFile(fname) + firstAtomicFileBytes, err := os.ReadFile(fname) require.Nil(t, err, "Error reading first atomic file") require.Equal(t, []byte(fmt.Sprintf(testString, i)), firstAtomicFileBytes, "atomic write file %d was overwritten", i) } // Check that the resultant file is correct - resultantFileBytes, err := ioutil.ReadFile(fileToWrite) + resultantFileBytes, err := os.ReadFile(fileToWrite) require.Nil(t, err, "Error reading resultant file") require.Equal(t, []byte(expectedString), resultantFileBytes, "Written file had incorrect bytes") } diff --git a/light/example_test.go b/light/example_test.go index b599778b8..f49b34a5d 100644 --- a/light/example_test.go +++ b/light/example_test.go @@ -3,7 +3,6 @@ package light_test import ( "context" "fmt" - "io/ioutil" stdlog "log" "os" "testing" @@ -25,7 +24,7 @@ func ExampleClient_Update() { // give Tendermint time to generate some blocks time.Sleep(5 * time.Second) - dbDir, err := ioutil.TempDir("", "light-client-example") + dbDir, err := os.MkdirTemp("", "light-client-example") if err != nil { stdlog.Fatal(err) } @@ -93,7 +92,7 @@ func ExampleClient_VerifyLightBlockAtHeight() { // give Tendermint time to generate some blocks time.Sleep(5 * time.Second) - dbDir, err := ioutil.TempDir("", "light-client-example") + dbDir, err := os.MkdirTemp("", "light-client-example") if err != nil { stdlog.Fatal(err) } diff --git a/light/rpc/client.go b/light/rpc/client.go index 6c2461b47..6fc1adbca 100644 --- a/light/rpc/client.go +++ b/light/rpc/client.go @@ -27,7 +27,7 @@ var errNegOrZeroHeight = errors.New("negative or zero height") type KeyPathFunc func(path string, key []byte) (merkle.KeyPath, error) // LightClient is an interface that contains functionality needed by Client from the light client. -//go:generate mockery --case underscore --name LightClient +//go:generate ../../scripts/mockery_generate.sh LightClient type LightClient interface { ChainID() string Update(ctx context.Context, now time.Time) (*types.LightBlock, error) diff --git a/light/rpc/mocks/light_client.go b/light/rpc/mocks/light_client.go index 9dfdcd001..fabf73b01 100644 --- a/light/rpc/mocks/light_client.go +++ b/light/rpc/mocks/light_client.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/mempool/mempool.go b/mempool/mempool.go index f776fbc9b..bf6eb2e72 100644 --- a/mempool/mempool.go +++ b/mempool/mempool.go @@ -23,7 +23,7 @@ const ( MaxActiveIDs = math.MaxUint16 ) -//go:generate mockery --case underscore --name Mempool +//go:generate ../scripts/mockery_generate.sh Mempool // Mempool defines the mempool interface. // diff --git a/mempool/metrics.gen.go b/mempool/metrics.gen.go new file mode 100644 index 000000000..100c5e71c --- /dev/null +++ b/mempool/metrics.gen.go @@ -0,0 +1,67 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package mempool + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "size", + Help: "Number of uncommitted transactions in the mempool.", + }, labels).With(labelsAndValues...), + TxSizeBytes: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "tx_size_bytes", + Help: "Histogram of transaction sizes in bytes.", + + Buckets: stdprometheus.ExponentialBuckets(1, 3, 7), + }, labels).With(labelsAndValues...), + FailedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "failed_txs", + Help: "Number of failed transactions.", + }, labels).With(labelsAndValues...), + RejectedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "rejected_txs", + Help: "Number of rejected transactions.", + }, labels).With(labelsAndValues...), + EvictedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "evicted_txs", + Help: "Number of evicted transactions.", + }, labels).With(labelsAndValues...), + RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "recheck_times", + Help: "Number of times transactions are rechecked in the mempool.", + }, labels).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + Size: discard.NewGauge(), + TxSizeBytes: discard.NewHistogram(), + FailedTxs: discard.NewCounter(), + RejectedTxs: discard.NewCounter(), + EvictedTxs: discard.NewCounter(), + RecheckTimes: discard.NewCounter(), + } +} diff --git a/mempool/metrics.go b/mempool/metrics.go index 5d3022e80..85ca8c0cf 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -2,9 +2,6 @@ package mempool import ( "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/discard" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" ) const ( @@ -13,14 +10,16 @@ const ( MetricsSubsystem = "mempool" ) +//go:generate go run ../scripts/metricsgen -struct=Metrics + // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. type Metrics struct { - // Size of the mempool. + // Number of uncommitted transactions in the mempool. Size metrics.Gauge - // Histogram of transaction sizes, in bytes. - TxSizeBytes metrics.Histogram + // Histogram of transaction sizes in bytes. + TxSizeBytes metrics.Histogram `metrics_buckettype:"exp" metrics_bucketsizes:"1,3,7"` // Number of failed transactions. FailedTxs metrics.Counter @@ -29,80 +28,16 @@ type Metrics struct { // transactions that passed CheckTx but failed to make it into the mempool // due to resource limits, e.g. mempool is full and no lower priority // transactions exist in the mempool. + //metrics:Number of rejected transactions. RejectedTxs metrics.Counter // EvictedTxs defines the number of evicted transactions. These are valid // transactions that passed CheckTx and existed in the mempool but were later // evicted to make room for higher priority valid transactions that passed // CheckTx. + //metrics:Number of evicted transactions. EvictedTxs metrics.Counter // Number of times transactions are rechecked in the mempool. RecheckTimes metrics.Counter } - -// PrometheusMetrics returns Metrics build using Prometheus client library. -// Optionally, labels can be provided along with their values ("foo", -// "fooValue"). -func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { - labels := []string{} - for i := 0; i < len(labelsAndValues); i += 2 { - labels = append(labels, labelsAndValues[i]) - } - return &Metrics{ - Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "size", - Help: "Size of the mempool (number of uncommitted transactions).", - }, labels).With(labelsAndValues...), - - TxSizeBytes: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "tx_size_bytes", - Help: "Transaction sizes in bytes.", - Buckets: stdprometheus.ExponentialBuckets(1, 3, 17), - }, labels).With(labelsAndValues...), - - FailedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "failed_txs", - Help: "Number of failed transactions.", - }, labels).With(labelsAndValues...), - - RejectedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "rejected_txs", - Help: "Number of rejected transactions.", - }, labels).With(labelsAndValues...), - - EvictedTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "evicted_txs", - Help: "Number of evicted transactions.", - }, labels).With(labelsAndValues...), - - RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "recheck_times", - Help: "Number of times transactions are rechecked in the mempool.", - }, labels).With(labelsAndValues...), - } -} - -// NopMetrics returns no-op Metrics. -func NopMetrics() *Metrics { - return &Metrics{ - Size: discard.NewGauge(), - TxSizeBytes: discard.NewHistogram(), - FailedTxs: discard.NewCounter(), - RejectedTxs: discard.NewCounter(), - EvictedTxs: discard.NewCounter(), - RecheckTimes: discard.NewCounter(), - } -} diff --git a/mempool/mocks/mempool.go b/mempool/mocks/mempool.go index 94c024ac2..fa16a2633 100644 --- a/mempool/mocks/mempool.go +++ b/mempool/mocks/mempool.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/node/node.go b/node/node.go index 84ed26748..51fe490d8 100644 --- a/node/node.go +++ b/node/node.go @@ -16,9 +16,7 @@ import ( dbm "github.com/tendermint/tm-db" abci "github.com/tendermint/tendermint/abci/types" - bcv0 "github.com/tendermint/tendermint/blockchain/v0" - bcv1 "github.com/tendermint/tendermint/blockchain/v1" - bcv2 "github.com/tendermint/tendermint/blockchain/v2" + bc "github.com/tendermint/tendermint/blockchain" cfg "github.com/tendermint/tendermint/config" cs "github.com/tendermint/tendermint/consensus" "github.com/tendermint/tendermint/crypto" @@ -69,6 +67,8 @@ type DBContext struct { // DBProvider takes a DBContext and returns an instantiated DB. type DBProvider func(*DBContext) (dbm.DB, error) +const readHeaderTimeout = 10 * time.Second + // DefaultDBProvider returns a database using the DBBackend and DBDir // specified in the ctx.Config. func DefaultDBProvider(ctx *DBContext) (dbm.DB, error) { @@ -113,19 +113,20 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { } // MetricsProvider returns a consensus, p2p and mempool Metrics. -type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) +type MetricsProvider func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics, *proxy.Metrics) // DefaultMetricsProvider returns Metrics build using Prometheus client library // if Prometheus is enabled. Otherwise, it returns no-op Metrics. func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider { - return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) { + return func(chainID string) (*cs.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics, *proxy.Metrics) { if config.Prometheus { return cs.PrometheusMetrics(config.Namespace, "chain_id", chainID), p2p.PrometheusMetrics(config.Namespace, "chain_id", chainID), mempl.PrometheusMetrics(config.Namespace, "chain_id", chainID), - sm.PrometheusMetrics(config.Namespace, "chain_id", chainID) + sm.PrometheusMetrics(config.Namespace, "chain_id", chainID), + proxy.PrometheusMetrics(config.Namespace, "chain_id", chainID) } - return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics() + return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics(), proxy.NopMetrics() } } @@ -246,8 +247,8 @@ func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.Block return } -func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger) (proxy.AppConns, error) { - proxyApp := proxy.NewAppConns(clientCreator) +func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger, metrics *proxy.Metrics) (proxy.AppConns, error) { + proxyApp := proxy.NewAppConns(clientCreator, metrics) proxyApp.SetLogger(logger.With("module", "proxy")) if err := proxyApp.Start(); err != nil { return nil, fmt.Errorf("error starting proxy app connections: %v", err) @@ -447,11 +448,9 @@ func createBlockchainReactor(config *cfg.Config, switch config.FastSync.Version { case "v0": - bcReactor = bcv0.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) - case "v1": - bcReactor = bcv1.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) - case "v2": - bcReactor = bcv2.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) + bcReactor = bc.NewReactor(state.Copy(), blockExec, blockStore, fastSync) + case "v1", "v2": + return nil, fmt.Errorf("fast sync version %s has been deprecated. Please use v0", config.FastSync.Version) default: return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version) } @@ -722,8 +721,10 @@ func NewNode(config *cfg.Config, return nil, err } + csMetrics, p2pMetrics, memplMetrics, smMetrics, abciMetrics := metricsProvider(genDoc.ChainID) + // Create the proxyApp and establish connections to the ABCI app (consensus, mempool, query). - proxyApp, err := createAndStartProxyAppConns(clientCreator, logger) + proxyApp, err := createAndStartProxyAppConns(clientCreator, logger, abciMetrics) if err != nil { return nil, err } @@ -788,8 +789,6 @@ func NewNode(config *cfg.Config, logNodeStartupInfo(state, pubKey, logger, consensusLogger) - csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID) - // Make MempoolReactor mempool, mempoolReactor := createMempoolAndMempoolReactor(config, proxyApp, state, memplMetrics, logger) @@ -1220,6 +1219,7 @@ func (n *Node) startPrometheusServer(addr string) *http.Server { promhttp.HandlerOpts{MaxRequestsInFlight: n.config.Instrumentation.MaxOpenConnections}, ), ), + ReadHeaderTimeout: readHeaderTimeout, } go func() { if err := srv.ListenAndServe(); err != http.ErrServerClosed { @@ -1325,18 +1325,6 @@ func makeNodeInfo( txIndexerStatus = "off" } - var bcChannel byte - switch config.FastSync.Version { - case "v0": - bcChannel = bcv0.BlockchainChannel - case "v1": - bcChannel = bcv1.BlockchainChannel - case "v2": - bcChannel = bcv2.BlockchainChannel - default: - return p2p.DefaultNodeInfo{}, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version) - } - nodeInfo := p2p.DefaultNodeInfo{ ProtocolVersion: p2p.NewProtocolVersion( version.P2PProtocol, // global @@ -1347,7 +1335,7 @@ func makeNodeInfo( Network: genDoc.ChainID, Version: version.TMCoreSemVer, Channels: []byte{ - bcChannel, + bc.BlockchainChannel, cs.StateChannel, cs.DataChannel, cs.VoteChannel, cs.VoteSetBitsChannel, mempl.MempoolChannel, evidence.EvidenceChannel, diff --git a/node/node_test.go b/node/node_test.go index 5382c94df..56f8ecf68 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -123,7 +123,7 @@ func TestNodeSetAppVersion(t *testing.T) { require.NoError(t, err) // default config uses the kvstore app - var appVersion uint64 = kvstore.ProtocolVersion + var appVersion = kvstore.ProtocolVersion // check version is set in state state, err := n.stateStore.Load() @@ -226,7 +226,7 @@ func TestCreateProposalBlock(t *testing.T) { config := cfg.ResetTestRoot("node_create_proposal") defer os.RemoveAll(config.RootDir) cc := proxy.NewLocalClientCreator(kvstore.NewApplication()) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -334,7 +334,7 @@ func TestMaxProposalBlockSize(t *testing.T) { config := cfg.ResetTestRoot("node_create_proposal") defer os.RemoveAll(config.RootDir) cc := proxy.NewLocalClientCreator(kvstore.NewApplication()) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests diff --git a/p2p/key.go b/p2p/key.go index f0f3dfd03..f98e88eee 100644 --- a/p2p/key.go +++ b/p2p/key.go @@ -4,7 +4,7 @@ import ( "bytes" "encoding/hex" "fmt" - "io/ioutil" + "os" "github.com/tendermint/tendermint/crypto" "github.com/tendermint/tendermint/crypto/ed25519" @@ -70,7 +70,7 @@ func LoadOrGenNodeKey(filePath string) (*NodeKey, error) { // LoadNodeKey loads NodeKey located in filePath. func LoadNodeKey(filePath string) (*NodeKey, error) { - jsonBytes, err := ioutil.ReadFile(filePath) + jsonBytes, err := os.ReadFile(filePath) if err != nil { return nil, err } @@ -88,7 +88,7 @@ func (nodeKey *NodeKey) SaveAs(filePath string) error { if err != nil { return err } - err = ioutil.WriteFile(filePath, jsonBytes, 0600) + err = os.WriteFile(filePath, jsonBytes, 0600) if err != nil { return err } diff --git a/p2p/metrics.gen.go b/p2p/metrics.gen.go new file mode 100644 index 000000000..b2b0b25c8 --- /dev/null +++ b/p2p/metrics.gen.go @@ -0,0 +1,58 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package p2p + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "peers", + Help: "Number of peers.", + }, labels).With(labelsAndValues...), + PeerReceiveBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "peer_receive_bytes_total", + Help: "Number of bytes received from a given peer.", + }, labels).With(labelsAndValues...), + PeerSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "peer_send_bytes_total", + Help: "Number of bytes sent to a given peer.", + }, labels).With(labelsAndValues...), + PeerPendingSendBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "peer_pending_send_bytes", + Help: "Pending bytes to be sent to a given peer.", + }, labels).With(labelsAndValues...), + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "num_txs", + Help: "Number of transactions submitted by each peer.", + }, labels).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + Peers: discard.NewGauge(), + PeerReceiveBytesTotal: discard.NewCounter(), + PeerSendBytesTotal: discard.NewCounter(), + PeerPendingSendBytes: discard.NewGauge(), + NumTxs: discard.NewGauge(), + } +} diff --git a/p2p/metrics.go b/p2p/metrics.go index 675dd9c7c..67d6ae668 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -2,9 +2,6 @@ package p2p import ( "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/discard" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" ) const ( @@ -13,6 +10,8 @@ const ( MetricsSubsystem = "p2p" ) +//go:generate go run ../scripts/metricsgen -struct=Metrics + // Metrics contains metrics exposed by this package. type Metrics struct { // Number of peers. @@ -26,56 +25,3 @@ type Metrics struct { // Number of transactions submitted by each peer. NumTxs metrics.Gauge } - -// PrometheusMetrics returns Metrics build using Prometheus client library. -// Optionally, labels can be provided along with their values ("foo", -// "fooValue"). -func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { - labels := []string{} - for i := 0; i < len(labelsAndValues); i += 2 { - labels = append(labels, labelsAndValues[i]) - } - return &Metrics{ - Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "peers", - Help: "Number of peers.", - }, labels).With(labelsAndValues...), - PeerReceiveBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "peer_receive_bytes_total", - Help: "Number of bytes received from a given peer.", - }, append(labels, "peer_id", "chID")).With(labelsAndValues...), - PeerSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "peer_send_bytes_total", - Help: "Number of bytes sent to a given peer.", - }, append(labels, "peer_id", "chID")).With(labelsAndValues...), - PeerPendingSendBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "peer_pending_send_bytes", - Help: "Number of pending bytes to be sent to a given peer.", - }, append(labels, "peer_id")).With(labelsAndValues...), - NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "num_txs", - Help: "Number of transactions submitted by each peer.", - }, append(labels, "peer_id")).With(labelsAndValues...), - } -} - -// NopMetrics returns no-op Metrics. -func NopMetrics() *Metrics { - return &Metrics{ - Peers: discard.NewGauge(), - PeerReceiveBytesTotal: discard.NewCounter(), - PeerSendBytesTotal: discard.NewCounter(), - PeerPendingSendBytes: discard.NewGauge(), - NumTxs: discard.NewGauge(), - } -} diff --git a/p2p/mocks/peer.go b/p2p/mocks/peer.go index 94ec169f9..e195c78bb 100644 --- a/p2p/mocks/peer.go +++ b/p2p/mocks/peer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/p2p/peer.go b/p2p/peer.go index 21ebac678..751ca3cf2 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -12,7 +12,7 @@ import ( tmconn "github.com/tendermint/tendermint/p2p/conn" ) -//go:generate mockery --case underscore --name Peer +//go:generate ../scripts/mockery_generate.sh Peer const metricsTickerDuration = 10 * time.Second diff --git a/p2p/pex/addrbook_test.go b/p2p/pex/addrbook_test.go index c04ad9c3a..e7888662a 100644 --- a/p2p/pex/addrbook_test.go +++ b/p2p/pex/addrbook_test.go @@ -3,7 +3,6 @@ package pex import ( "encoding/hex" "fmt" - "io/ioutil" "math" "net" "os" @@ -718,7 +717,7 @@ func assertMOldAndNNewAddrsInSelection(t *testing.T, m, n int, addrs []*p2p.NetA } func createTempFileName(prefix string) string { - f, err := ioutil.TempFile("", prefix) + f, err := os.CreateTemp("", prefix) if err != nil { panic(err) } diff --git a/p2p/pex/pex_reactor_test.go b/p2p/pex/pex_reactor_test.go index 4ed1254ef..fba3b1951 100644 --- a/p2p/pex/pex_reactor_test.go +++ b/p2p/pex/pex_reactor_test.go @@ -3,7 +3,6 @@ package pex import ( "encoding/hex" "fmt" - "io/ioutil" "os" "path/filepath" "testing" @@ -73,7 +72,7 @@ func TestPEXReactorRunning(t *testing.T) { switches := make([]*p2p.Switch, N) // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -208,7 +207,7 @@ func TestPEXReactorAddrsMessageAbuse(t *testing.T) { func TestCheckSeeds(t *testing.T) { // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -247,7 +246,7 @@ func TestCheckSeeds(t *testing.T) { func TestPEXReactorUsesSeedsIfNeeded(t *testing.T) { // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -267,7 +266,7 @@ func TestPEXReactorUsesSeedsIfNeeded(t *testing.T) { func TestConnectionSpeedForPeerReceivedFromSeed(t *testing.T) { // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -296,7 +295,7 @@ func TestConnectionSpeedForPeerReceivedFromSeed(t *testing.T) { func TestPEXReactorSeedMode(t *testing.T) { // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -335,7 +334,7 @@ func TestPEXReactorSeedMode(t *testing.T) { func TestPEXReactorDoesNotDisconnectFromPersistentPeerInSeedMode(t *testing.T) { // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -373,7 +372,7 @@ func TestPEXReactorDoesNotDisconnectFromPersistentPeerInSeedMode(t *testing.T) { func TestPEXReactorDialsPeerUpToMaxAttemptsInSeedMode(t *testing.T) { // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -409,7 +408,7 @@ func TestPEXReactorSeedModeFlushStop(t *testing.T) { switches := make([]*p2p.Switch, N) // directory to store address books - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") require.Nil(t, err) defer os.RemoveAll(dir) @@ -646,7 +645,7 @@ func testCreatePeerWithSeed(dir string, id int, seed *p2p.Switch) *p2p.Switch { func createReactor(conf *ReactorConfig) (r *Reactor, book AddrBook) { // directory to store address book - dir, err := ioutil.TempDir("", "pex_reactor") + dir, err := os.MkdirTemp("", "pex_reactor") if err != nil { panic(err) } diff --git a/p2p/switch_test.go b/p2p/switch_test.go index 36420d333..2fa467891 100644 --- a/p2p/switch_test.go +++ b/p2p/switch_test.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "io" "net" "net/http" "net/http/httptest" @@ -400,7 +400,7 @@ func TestSwitchStopPeerForError(t *testing.T) { resp, err := http.Get(s.URL) require.NoError(t, err) defer resp.Body.Close() - buf, _ := ioutil.ReadAll(resp.Body) + buf, _ := io.ReadAll(resp.Body) return string(buf) } diff --git a/p2p/transport_test.go b/p2p/transport_test.go index 7638de4cb..90b074256 100644 --- a/p2p/transport_test.go +++ b/p2p/transport_test.go @@ -79,8 +79,8 @@ func TestTransportMultiplexConnFilter(t *testing.T) { } _, err = mt.Accept(peerConfig{}) - if err, ok := err.(ErrRejected); ok { - if !err.IsFiltered() { + if e, ok := err.(ErrRejected); ok { + if !e.IsFiltered() { t.Errorf("expected peer to be filtered, got %v", err) } } else { @@ -386,8 +386,8 @@ func TestTransportMultiplexValidateNodeInfo(t *testing.T) { } _, err := mt.Accept(peerConfig{}) - if err, ok := err.(ErrRejected); ok { - if !err.IsNodeInfoInvalid() { + if e, ok := err.(ErrRejected); ok { + if !e.IsNodeInfoInvalid() { t.Errorf("expected NodeInfo to be invalid, got %v", err) } } else { @@ -425,9 +425,9 @@ func TestTransportMultiplexRejectMissmatchID(t *testing.T) { } _, err := mt.Accept(peerConfig{}) - if err, ok := err.(ErrRejected); ok { - if !err.IsAuthFailure() { - t.Errorf("expected auth failure, got %v", err) + if e, ok := err.(ErrRejected); ok { + if !e.IsAuthFailure() { + t.Errorf("expected auth failure, got %v", e) } } else { t.Errorf("expected ErrRejected, got %v", err) @@ -453,9 +453,9 @@ func TestTransportMultiplexDialRejectWrongID(t *testing.T) { _, err := dialer.Dial(*addr, peerConfig{}) if err != nil { t.Logf("connection failed: %v", err) - if err, ok := err.(ErrRejected); ok { - if !err.IsAuthFailure() { - t.Errorf("expected auth failure, got %v", err) + if e, ok := err.(ErrRejected); ok { + if !e.IsAuthFailure() { + t.Errorf("expected auth failure, got %v", e) } } else { t.Errorf("expected ErrRejected, got %v", err) @@ -490,9 +490,9 @@ func TestTransportMultiplexRejectIncompatible(t *testing.T) { }() _, err := mt.Accept(peerConfig{}) - if err, ok := err.(ErrRejected); ok { - if !err.IsIncompatible() { - t.Errorf("expected to reject incompatible, got %v", err) + if e, ok := err.(ErrRejected); ok { + if !e.IsIncompatible() { + t.Errorf("expected to reject incompatible, got %v", e) } } else { t.Errorf("expected ErrRejected, got %v", err) @@ -517,9 +517,9 @@ func TestTransportMultiplexRejectSelf(t *testing.T) { }() if err := <-errc; err != nil { - if err, ok := err.(ErrRejected); ok { - if !err.IsSelf() { - t.Errorf("expected to reject self, got: %v", err) + if e, ok := err.(ErrRejected); ok { + if !e.IsSelf() { + t.Errorf("expected to reject self, got: %v", e) } } else { t.Errorf("expected ErrRejected, got %v", err) diff --git a/p2p/trust/store_test.go b/p2p/trust/store_test.go index df0f14a04..c583d58aa 100644 --- a/p2p/trust/store_test.go +++ b/p2p/trust/store_test.go @@ -5,7 +5,6 @@ package trust import ( "fmt" - "io/ioutil" "os" "testing" @@ -17,7 +16,7 @@ import ( ) func TestTrustMetricStoreSaveLoad(t *testing.T) { - dir, err := ioutil.TempDir("", "trust_test") + dir, err := os.MkdirTemp("", "trust_test") require.NoError(t, err) defer os.Remove(dir) diff --git a/p2p/upnp/upnp.go b/p2p/upnp/upnp.go index c00530aca..e2c8f3fcf 100644 --- a/p2p/upnp/upnp.go +++ b/p2p/upnp/upnp.go @@ -10,7 +10,7 @@ import ( "encoding/xml" "errors" "fmt" - "io/ioutil" + "io" "net" "net/http" "strconv" @@ -312,7 +312,7 @@ func (n *upnpNAT) getExternalIPAddress() (info statusInfo, err error) { return } var envelope Envelope - data, err := ioutil.ReadAll(response.Body) + data, err := io.ReadAll(response.Body) if err != nil { return } @@ -374,7 +374,7 @@ func (n *upnpNAT) AddPortMapping( // TODO: check response to see if the port was forwarded // log.Println(message, response) // JAE: - // body, err := ioutil.ReadAll(response.Body) + // body, err := io.ReadAll(response.Body) // fmt.Println(string(body), err) mappedExternalPort = externalPort _ = response diff --git a/privval/file.go b/privval/file.go index fa33bb0ae..ea727e2ad 100644 --- a/privval/file.go +++ b/privval/file.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "os" "time" "github.com/gogo/protobuf/proto" @@ -188,7 +188,7 @@ func LoadFilePVEmptyState(keyFilePath, stateFilePath string) *FilePV { // If loadState is true, we load from the stateFilePath. Otherwise, we use an empty LastSignState. func loadFilePV(keyFilePath, stateFilePath string, loadState bool) *FilePV { - keyJSONBytes, err := ioutil.ReadFile(keyFilePath) + keyJSONBytes, err := os.ReadFile(keyFilePath) if err != nil { tmos.Exit(err.Error()) } @@ -206,7 +206,7 @@ func loadFilePV(keyFilePath, stateFilePath string, loadState bool) *FilePV { pvState := FilePVLastSignState{} if loadState { - stateJSONBytes, err := ioutil.ReadFile(stateFilePath) + stateJSONBytes, err := os.ReadFile(stateFilePath) if err != nil { tmos.Exit(err.Error()) } diff --git a/privval/file_test.go b/privval/file_test.go index 69bd76874..c847af2fd 100644 --- a/privval/file_test.go +++ b/privval/file_test.go @@ -3,7 +3,6 @@ package privval import ( "encoding/base64" "fmt" - "io/ioutil" "os" "testing" "time" @@ -23,9 +22,9 @@ import ( func TestGenLoadValidator(t *testing.T) { assert := assert.New(t) - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") require.Nil(t, err) - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") require.Nil(t, err) privVal := GenFilePV(tempKeyFile.Name(), tempStateFile.Name()) @@ -41,9 +40,9 @@ func TestGenLoadValidator(t *testing.T) { } func TestResetValidator(t *testing.T) { - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") require.Nil(t, err) - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") require.Nil(t, err) privVal := GenFilePV(tempKeyFile.Name(), tempStateFile.Name()) @@ -72,9 +71,9 @@ func TestResetValidator(t *testing.T) { func TestLoadOrGenValidator(t *testing.T) { assert := assert.New(t) - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") require.Nil(t, err) - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") require.Nil(t, err) tempKeyFilePath := tempKeyFile.Name() @@ -159,9 +158,9 @@ func TestUnmarshalValidatorKey(t *testing.T) { func TestSignVote(t *testing.T) { assert := assert.New(t) - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") require.Nil(t, err) - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") require.Nil(t, err) privVal := GenFilePV(tempKeyFile.Name(), tempStateFile.Name()) @@ -212,9 +211,9 @@ func TestSignVote(t *testing.T) { func TestSignProposal(t *testing.T) { assert := assert.New(t) - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") require.Nil(t, err) - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") require.Nil(t, err) privVal := GenFilePV(tempKeyFile.Name(), tempStateFile.Name()) @@ -260,9 +259,9 @@ func TestSignProposal(t *testing.T) { } func TestDifferByTimestamp(t *testing.T) { - tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_") + tempKeyFile, err := os.CreateTemp("", "priv_validator_key_") require.Nil(t, err) - tempStateFile, err := ioutil.TempFile("", "priv_validator_state_") + tempStateFile, err := os.CreateTemp("", "priv_validator_state_") require.Nil(t, err) privVal := GenFilePV(tempKeyFile.Name(), tempStateFile.Name()) diff --git a/privval/socket_listeners_test.go b/privval/socket_listeners_test.go index 5e95ec10c..08a285bdf 100644 --- a/privval/socket_listeners_test.go +++ b/privval/socket_listeners_test.go @@ -1,7 +1,6 @@ package privval import ( - "io/ioutil" "net" "os" "testing" @@ -29,7 +28,7 @@ type listenerTestCase struct { // testUnixAddr will attempt to obtain a platform-independent temporary file // name for a Unix socket func testUnixAddr() (string, error) { - f, err := ioutil.TempFile("", "tendermint-privval-test-*") + f, err := os.CreateTemp("", "tendermint-privval-test-*") if err != nil { return "", err } diff --git a/proto/README.md b/proto/README.md new file mode 100644 index 000000000..ebecd82d1 --- /dev/null +++ b/proto/README.md @@ -0,0 +1,23 @@ +# Protocol Buffers + +This sections defines the types and messages shared across implementations. The definition of the data structures are located in the [core/data_structures](../spec/core/data_structures.md) for the core data types and ABCI definitions are located in the [ABCI](../spec/abci/README.md) section. + +## Process of Updates + +The `.proto` files within this section are core to the protocol and updates must be treated as such. + +### Steps + +1. Make an issue with the proposed change. + - Within in the issue members from both the Tendermint-go and Tendermint-rs team will leave comments. If there is not consensus on the change an [RFC](../rfc/README.md) may be requested. + 1a. Submission of an RFC as a pull request should be made to facilitate further discussion. + 1b. Merge the RFC. +2. Make the necessary changes to the `.proto` file(s), [core data structures](../spec/core/data_structures.md) and/or [ABCI protocol](../spec/abci/apps.md). +3. Open issues within Tendermint-go and Tendermint-rs repos. This is used to notify the teams that a change occurred in the spec. + 1. Tag the issue with a spec version label. This will notify the team the changed has been made on master but has not entered a release. + +### Versioning + +The spec repo aims to be versioned. Once it has been versioned, updates to the protobuf files will live on master. After a certain amount of time, decided on by Tendermint-go and Tendermint-rs team leads, a release will be made on the spec repo. The spec may contain minor releases as well, depending on the implementation these changes may lead to a breaking change. If so, the implementation team should open an issue within the spec repo requiring a major release of the spec. + +If the steps above were followed each implementation should have issues tagged with a spec change label. Once all issues have been completed the team should signify their readiness for release. diff --git a/proxy/app_conn.go b/proxy/app_conn.go index 9614767cb..b0c98430d 100644 --- a/proxy/app_conn.go +++ b/proxy/app_conn.go @@ -1,11 +1,14 @@ package proxy import ( + "time" + + "github.com/go-kit/kit/metrics" abcicli "github.com/tendermint/tendermint/abci/client" "github.com/tendermint/tendermint/abci/types" ) -//go:generate mockery --case underscore --name AppConnConsensus|AppConnMempool|AppConnQuery|AppConnSnapshot +//go:generate ../scripts/mockery_generate.sh AppConnConsensus|AppConnMempool|AppConnQuery|AppConnSnapshot //---------------------------------------------------------------------------------------- // Enforce which abci msgs can be sent on a connection at the type level @@ -57,13 +60,15 @@ type AppConnSnapshot interface { // Implements AppConnConsensus (subset of abcicli.Client) type appConnConsensus struct { + metrics *Metrics appConn abcicli.Client } var _ AppConnConsensus = (*appConnConsensus)(nil) -func NewAppConnConsensus(appConn abcicli.Client) AppConnConsensus { +func NewAppConnConsensus(appConn abcicli.Client, metrics *Metrics) AppConnConsensus { return &appConnConsensus{ + metrics: metrics, appConn: appConn, } } @@ -77,31 +82,38 @@ func (app *appConnConsensus) Error() error { } func (app *appConnConsensus) InitChainSync(req types.RequestInitChain) (*types.ResponseInitChain, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "init_chain", "type", "sync"))() return app.appConn.InitChainSync(req) } func (app *appConnConsensus) PrepareProposalSync( req types.RequestPrepareProposal) (*types.ResponsePrepareProposal, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "prepare_proposal", "type", "sync"))() return app.appConn.PrepareProposalSync(req) } func (app *appConnConsensus) ProcessProposalSync(req types.RequestProcessProposal) (*types.ResponseProcessProposal, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "process_proposal", "type", "sync"))() return app.appConn.ProcessProposalSync(req) } func (app *appConnConsensus) BeginBlockSync(req types.RequestBeginBlock) (*types.ResponseBeginBlock, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "begin_block", "type", "sync"))() return app.appConn.BeginBlockSync(req) } func (app *appConnConsensus) DeliverTxAsync(req types.RequestDeliverTx) *abcicli.ReqRes { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "deliver_tx", "type", "async"))() return app.appConn.DeliverTxAsync(req) } func (app *appConnConsensus) EndBlockSync(req types.RequestEndBlock) (*types.ResponseEndBlock, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "end_block", "type", "sync"))() return app.appConn.EndBlockSync(req) } func (app *appConnConsensus) CommitSync() (*types.ResponseCommit, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "commit", "type", "sync"))() return app.appConn.CommitSync() } @@ -109,11 +121,13 @@ func (app *appConnConsensus) CommitSync() (*types.ResponseCommit, error) { // Implements AppConnMempool (subset of abcicli.Client) type appConnMempool struct { + metrics *Metrics appConn abcicli.Client } -func NewAppConnMempool(appConn abcicli.Client) AppConnMempool { +func NewAppConnMempool(appConn abcicli.Client, metrics *Metrics) AppConnMempool { return &appConnMempool{ + metrics: metrics, appConn: appConn, } } @@ -127,18 +141,22 @@ func (app *appConnMempool) Error() error { } func (app *appConnMempool) FlushAsync() *abcicli.ReqRes { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "flush", "type", "async"))() return app.appConn.FlushAsync() } func (app *appConnMempool) FlushSync() error { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "flush", "type", "sync"))() return app.appConn.FlushSync() } func (app *appConnMempool) CheckTxAsync(req types.RequestCheckTx) *abcicli.ReqRes { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "check_tx", "type", "async"))() return app.appConn.CheckTxAsync(req) } func (app *appConnMempool) CheckTxSync(req types.RequestCheckTx) (*types.ResponseCheckTx, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "check_tx", "type", "sync"))() return app.appConn.CheckTxSync(req) } @@ -146,11 +164,13 @@ func (app *appConnMempool) CheckTxSync(req types.RequestCheckTx) (*types.Respons // Implements AppConnQuery (subset of abcicli.Client) type appConnQuery struct { + metrics *Metrics appConn abcicli.Client } -func NewAppConnQuery(appConn abcicli.Client) AppConnQuery { +func NewAppConnQuery(appConn abcicli.Client, metrics *Metrics) AppConnQuery { return &appConnQuery{ + metrics: metrics, appConn: appConn, } } @@ -160,14 +180,17 @@ func (app *appConnQuery) Error() error { } func (app *appConnQuery) EchoSync(msg string) (*types.ResponseEcho, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "echo", "type", "sync"))() return app.appConn.EchoSync(msg) } func (app *appConnQuery) InfoSync(req types.RequestInfo) (*types.ResponseInfo, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "info", "type", "sync"))() return app.appConn.InfoSync(req) } func (app *appConnQuery) QuerySync(reqQuery types.RequestQuery) (*types.ResponseQuery, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "query", "type", "sync"))() return app.appConn.QuerySync(reqQuery) } @@ -175,11 +198,13 @@ func (app *appConnQuery) QuerySync(reqQuery types.RequestQuery) (*types.Response // Implements AppConnSnapshot (subset of abcicli.Client) type appConnSnapshot struct { + metrics *Metrics appConn abcicli.Client } -func NewAppConnSnapshot(appConn abcicli.Client) AppConnSnapshot { +func NewAppConnSnapshot(appConn abcicli.Client, metrics *Metrics) AppConnSnapshot { return &appConnSnapshot{ + metrics: metrics, appConn: appConn, } } @@ -189,19 +214,32 @@ func (app *appConnSnapshot) Error() error { } func (app *appConnSnapshot) ListSnapshotsSync(req types.RequestListSnapshots) (*types.ResponseListSnapshots, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "list_snapshots", "type", "sync"))() return app.appConn.ListSnapshotsSync(req) } func (app *appConnSnapshot) OfferSnapshotSync(req types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "offer_snapshot", "type", "sync"))() return app.appConn.OfferSnapshotSync(req) } func (app *appConnSnapshot) LoadSnapshotChunkSync( req types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "load_snapshot_chunk", "type", "sync"))() return app.appConn.LoadSnapshotChunkSync(req) } func (app *appConnSnapshot) ApplySnapshotChunkSync( req types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) { + defer addTimeSample(app.metrics.MethodTimingSeconds.With("method", "apply_snapshot_chunk", "type", "sync"))() return app.appConn.ApplySnapshotChunkSync(req) } + +// addTimeSample returns a function that, when called, adds an observation to m. +// The observation added to m is the number of seconds ellapsed since addTimeSample +// was initially called. addTimeSample is meant to be called in a defer to calculate +// the amount of time a function takes to complete. +func addTimeSample(m metrics.Histogram) func() { + start := time.Now() + return func() { m.Observe(time.Since(start).Seconds()) } +} diff --git a/proxy/client.go b/proxy/client.go index 08887f4d1..ca47a2d28 100644 --- a/proxy/client.go +++ b/proxy/client.go @@ -4,14 +4,13 @@ import ( "fmt" abcicli "github.com/tendermint/tendermint/abci/client" - "github.com/tendermint/tendermint/abci/example/counter" "github.com/tendermint/tendermint/abci/example/kvstore" "github.com/tendermint/tendermint/abci/types" tmsync "github.com/tendermint/tendermint/libs/sync" e2e "github.com/tendermint/tendermint/test/e2e/app" ) -//go:generate mockery --case underscore --name ClientCreator +//go:generate ../scripts/mockery_generate.sh ClientCreator // ClientCreator creates new ABCI clients. type ClientCreator interface { @@ -70,14 +69,10 @@ func (r *remoteClientCreator) NewABCIClient() (abcicli.Client, error) { } // DefaultClientCreator returns a default ClientCreator, which will create a -// local client if addr is one of: 'counter', 'counter_serial', 'kvstore', +// local client if addr is one of: 'kvstore', // 'persistent_kvstore' or 'noop', otherwise - a remote client. func DefaultClientCreator(addr, transport, dbDir string) ClientCreator { switch addr { - case "counter": - return NewLocalClientCreator(counter.NewApplication(false)) - case "counter_serial": - return NewLocalClientCreator(counter.NewApplication(true)) case "kvstore": return NewLocalClientCreator(kvstore.NewApplication()) case "persistent_kvstore": diff --git a/proxy/metrics.gen.go b/proxy/metrics.gen.go new file mode 100644 index 000000000..f4387ca9c --- /dev/null +++ b/proxy/metrics.gen.go @@ -0,0 +1,32 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package proxy + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + MethodTimingSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "method_timing_seconds", + Help: "Timing for each ABCI method.", + + Buckets: []float64{.0001, .0004, .002, .009, .02, .1, .65, 2, 6, 25}, + }, append(labels, "method", "type")).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + MethodTimingSeconds: discard.NewHistogram(), + } +} diff --git a/proxy/metrics.go b/proxy/metrics.go new file mode 100644 index 000000000..7858c3bc5 --- /dev/null +++ b/proxy/metrics.go @@ -0,0 +1,19 @@ +package proxy + +import ( + "github.com/go-kit/kit/metrics" +) + +const ( + // MetricsSubsystem is a subsystem shared by all metrics exposed by this + // package. + MetricsSubsystem = "abci_connection" +) + +//go:generate go run ../scripts/metricsgen -struct=Metrics + +// Metrics contains the prometheus metrics exposed by the proxy package. +type Metrics struct { + // Timing for each ABCI method. + MethodTimingSeconds metrics.Histogram `metrics_bucketsizes:".0001,.0004,.002,.009,.02,.1,.65,2,6,25" metrics_labels:"method, type"` +} diff --git a/proxy/mocks/app_conn_consensus.go b/proxy/mocks/app_conn_consensus.go index 7803a3189..eccf74fc3 100644 --- a/proxy/mocks/app_conn_consensus.go +++ b/proxy/mocks/app_conn_consensus.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/proxy/mocks/app_conn_mempool.go b/proxy/mocks/app_conn_mempool.go index e33bc4b3a..05e23dd43 100644 --- a/proxy/mocks/app_conn_mempool.go +++ b/proxy/mocks/app_conn_mempool.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/proxy/mocks/app_conn_query.go b/proxy/mocks/app_conn_query.go index 5cbd2dfd4..544ab765e 100644 --- a/proxy/mocks/app_conn_query.go +++ b/proxy/mocks/app_conn_query.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/proxy/mocks/app_conn_snapshot.go b/proxy/mocks/app_conn_snapshot.go index 6f2c81bb4..e3d5cb6cd 100644 --- a/proxy/mocks/app_conn_snapshot.go +++ b/proxy/mocks/app_conn_snapshot.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/proxy/mocks/client_creator.go b/proxy/mocks/client_creator.go index dd757f97b..eced0aeff 100644 --- a/proxy/mocks/client_creator.go +++ b/proxy/mocks/client_creator.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/proxy/multi_app_conn.go b/proxy/multi_app_conn.go index a7a6f7014..37a5c166e 100644 --- a/proxy/multi_app_conn.go +++ b/proxy/multi_app_conn.go @@ -32,8 +32,8 @@ type AppConns interface { } // NewAppConns calls NewMultiAppConn. -func NewAppConns(clientCreator ClientCreator) AppConns { - return NewMultiAppConn(clientCreator) +func NewAppConns(clientCreator ClientCreator, metrics *Metrics) AppConns { + return NewMultiAppConn(clientCreator, metrics) } // multiAppConn implements AppConns. @@ -44,6 +44,7 @@ func NewAppConns(clientCreator ClientCreator) AppConns { type multiAppConn struct { service.BaseService + metrics *Metrics consensusConn AppConnConsensus mempoolConn AppConnMempool queryConn AppConnQuery @@ -58,8 +59,9 @@ type multiAppConn struct { } // NewMultiAppConn makes all necessary abci connections to the application. -func NewMultiAppConn(clientCreator ClientCreator) AppConns { +func NewMultiAppConn(clientCreator ClientCreator, metrics *Metrics) AppConns { multiAppConn := &multiAppConn{ + metrics: metrics, clientCreator: clientCreator, } multiAppConn.BaseService = *service.NewBaseService(nil, "multiAppConn", multiAppConn) @@ -88,7 +90,7 @@ func (app *multiAppConn) OnStart() error { return err } app.queryConnClient = c - app.queryConn = NewAppConnQuery(c) + app.queryConn = NewAppConnQuery(c, app.metrics) c, err = app.abciClientFor(connSnapshot) if err != nil { @@ -96,7 +98,7 @@ func (app *multiAppConn) OnStart() error { return err } app.snapshotConnClient = c - app.snapshotConn = NewAppConnSnapshot(c) + app.snapshotConn = NewAppConnSnapshot(c, app.metrics) c, err = app.abciClientFor(connMempool) if err != nil { @@ -104,7 +106,7 @@ func (app *multiAppConn) OnStart() error { return err } app.mempoolConnClient = c - app.mempoolConn = NewAppConnMempool(c) + app.mempoolConn = NewAppConnMempool(c, app.metrics) c, err = app.abciClientFor(connConsensus) if err != nil { @@ -112,7 +114,7 @@ func (app *multiAppConn) OnStart() error { return err } app.consensusConnClient = c - app.consensusConn = NewAppConnConsensus(c) + app.consensusConn = NewAppConnConsensus(c, app.metrics) // Kill Tendermint if the ABCI application crashes. go app.killTMOnClientError() diff --git a/proxy/multi_app_conn_test.go b/proxy/multi_app_conn_test.go index 34b0d0830..82ff19281 100644 --- a/proxy/multi_app_conn_test.go +++ b/proxy/multi_app_conn_test.go @@ -28,7 +28,7 @@ func TestAppConns_Start_Stop(t *testing.T) { clientCreatorMock.On("NewABCIClient").Return(clientMock, nil).Times(4) - appConns := NewAppConns(clientCreatorMock) + appConns := NewAppConns(clientCreatorMock, NopMetrics()) err := appConns.Start() require.NoError(t, err) @@ -68,7 +68,7 @@ func TestAppConns_Failure(t *testing.T) { clientCreatorMock.On("NewABCIClient").Return(clientMock, nil) - appConns := NewAppConns(clientCreatorMock) + appConns := NewAppConns(clientCreatorMock, NopMetrics()) err := appConns.Start() require.NoError(t, err) diff --git a/rpc/client/main_test.go b/rpc/client/main_test.go index c97311c81..4c0534868 100644 --- a/rpc/client/main_test.go +++ b/rpc/client/main_test.go @@ -1,7 +1,6 @@ package client_test import ( - "io/ioutil" "os" "testing" @@ -14,7 +13,7 @@ var node *nm.Node func TestMain(m *testing.M) { // start a tendermint node (and kvstore) in the background to test against - dir, err := ioutil.TempDir("/tmp", "rpc-client-test") + dir, err := os.MkdirTemp("/tmp", "rpc-client-test") if err != nil { panic(err) } diff --git a/rpc/jsonrpc/client/http_json_client.go b/rpc/jsonrpc/client/http_json_client.go index fbfe073e0..8fbf9fa4f 100644 --- a/rpc/jsonrpc/client/http_json_client.go +++ b/rpc/jsonrpc/client/http_json_client.go @@ -5,7 +5,7 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" + "io" "net" "net/http" "net/url" @@ -217,7 +217,7 @@ func (c *Client) Call( defer httpResponse.Body.Close() - responseBytes, err := ioutil.ReadAll(httpResponse.Body) + responseBytes, err := io.ReadAll(httpResponse.Body) if err != nil { return nil, fmt.Errorf("failed to read response body: %w", err) } @@ -265,7 +265,7 @@ func (c *Client) sendBatch(ctx context.Context, requests []*jsonRPCBufferedReque defer httpResponse.Body.Close() - responseBytes, err := ioutil.ReadAll(httpResponse.Body) + responseBytes, err := io.ReadAll(httpResponse.Body) if err != nil { return nil, fmt.Errorf("read response body: %w", err) } diff --git a/rpc/jsonrpc/client/http_json_client_test.go b/rpc/jsonrpc/client/http_json_client_test.go index 4b82ff1eb..03134dff5 100644 --- a/rpc/jsonrpc/client/http_json_client_test.go +++ b/rpc/jsonrpc/client/http_json_client_test.go @@ -1,7 +1,7 @@ package client import ( - "io/ioutil" + "io" "log" "net/http" "net/http/httptest" @@ -21,7 +21,7 @@ func TestHTTPClientMakeHTTPDialer(t *testing.T) { defer tsTLS.Close() // This silences a TLS handshake error, caused by the dialer just immediately // disconnecting, which we can just ignore. - tsTLS.Config.ErrorLog = log.New(ioutil.Discard, "", 0) + tsTLS.Config.ErrorLog = log.New(io.Discard, "", 0) for _, testURL := range []string{ts.URL, tsTLS.URL} { u, err := newParsedURL(testURL) diff --git a/rpc/jsonrpc/client/http_uri_client.go b/rpc/jsonrpc/client/http_uri_client.go index 3f376ddb0..0bece04ec 100644 --- a/rpc/jsonrpc/client/http_uri_client.go +++ b/rpc/jsonrpc/client/http_uri_client.go @@ -3,7 +3,7 @@ package client import ( "context" "fmt" - "io/ioutil" + "io" "net/http" "strings" @@ -76,7 +76,7 @@ func (c *URIClient) Call(ctx context.Context, method string, } defer resp.Body.Close() - responseBytes, err := ioutil.ReadAll(resp.Body) + responseBytes, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("read response body: %w", err) } diff --git a/rpc/jsonrpc/server/http_json_handler.go b/rpc/jsonrpc/server/http_json_handler.go index 28dfcbf8a..13b4eefe5 100644 --- a/rpc/jsonrpc/server/http_json_handler.go +++ b/rpc/jsonrpc/server/http_json_handler.go @@ -4,7 +4,7 @@ import ( "bytes" "encoding/json" "fmt" - "io/ioutil" + "io" "net/http" "reflect" "sort" @@ -19,7 +19,7 @@ import ( // jsonrpc calls grab the given method's function info and runs reflect.Call func makeJSONRPCHandler(funcMap map[string]*RPCFunc, logger log.Logger) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - b, err := ioutil.ReadAll(r.Body) + b, err := io.ReadAll(r.Body) if err != nil { res := types.RPCInvalidRequestError(nil, fmt.Errorf("error reading request body: %w", err), diff --git a/rpc/jsonrpc/server/http_json_handler_test.go b/rpc/jsonrpc/server/http_json_handler_test.go index a5c14e59a..fbcb470c0 100644 --- a/rpc/jsonrpc/server/http_json_handler_test.go +++ b/rpc/jsonrpc/server/http_json_handler_test.go @@ -3,7 +3,7 @@ package server import ( "bytes" "encoding/json" - "io/ioutil" + "io" "net/http" "net/http/httptest" "strings" @@ -66,7 +66,7 @@ func TestRPCParams(t *testing.T) { defer res.Body.Close() // Always expecting back a JSONRPCResponse assert.NotZero(t, res.StatusCode, "#%d: should always return code", i) - blob, err := ioutil.ReadAll(res.Body) + blob, err := io.ReadAll(res.Body) if err != nil { t.Errorf("#%d: err reading body: %v", i, err) continue @@ -113,7 +113,7 @@ func TestJSONRPCID(t *testing.T) { res := rec.Result() // Always expecting back a JSONRPCResponse assert.NotZero(t, res.StatusCode, "#%d: should always return code", i) - blob, err := ioutil.ReadAll(res.Body) + blob, err := io.ReadAll(res.Body) if err != nil { t.Errorf("#%d: err reading body: %v", i, err) continue @@ -143,7 +143,7 @@ func TestRPCNotification(t *testing.T) { // Always expecting back a JSONRPCResponse require.True(t, statusOK(res.StatusCode), "should always return 2XX") - blob, err := ioutil.ReadAll(res.Body) + blob, err := io.ReadAll(res.Body) res.Body.Close() require.Nil(t, err, "reading from the body should not give back an error") require.Equal(t, len(blob), 0, "a notification SHOULD NOT be responded to by the server") @@ -179,7 +179,7 @@ func TestRPCNotificationInBatch(t *testing.T) { res := rec.Result() // Always expecting back a JSONRPCResponse assert.True(t, statusOK(res.StatusCode), "#%d: should always return 2XX", i) - blob, err := ioutil.ReadAll(res.Body) + blob, err := io.ReadAll(res.Body) if err != nil { t.Errorf("#%d: err reading body: %v", i, err) continue diff --git a/rpc/jsonrpc/server/http_server.go b/rpc/jsonrpc/server/http_server.go index f653e6cc6..6eaa0ab93 100644 --- a/rpc/jsonrpc/server/http_server.go +++ b/rpc/jsonrpc/server/http_server.go @@ -53,10 +53,11 @@ func DefaultConfig() *Config { func Serve(listener net.Listener, handler http.Handler, logger log.Logger, config *Config) error { logger.Info("serve", "msg", log.NewLazySprintf("Starting RPC HTTP server on %s", listener.Addr())) s := &http.Server{ - Handler: RecoverAndLogHandler(maxBytesHandler{h: handler, n: config.MaxBodyBytes}, logger), - ReadTimeout: config.ReadTimeout, - WriteTimeout: config.WriteTimeout, - MaxHeaderBytes: config.MaxHeaderBytes, + Handler: RecoverAndLogHandler(maxBytesHandler{h: handler, n: config.MaxBodyBytes}, logger), + ReadTimeout: config.ReadTimeout, + ReadHeaderTimeout: config.ReadTimeout, + WriteTimeout: config.WriteTimeout, + MaxHeaderBytes: config.MaxHeaderBytes, } err := s.Serve(listener) logger.Info("RPC HTTP server stopped", "err", err) @@ -78,10 +79,11 @@ func ServeTLS( logger.Info("serve tls", "msg", log.NewLazySprintf("Starting RPC HTTPS server on %s (cert: %q, key: %q)", listener.Addr(), certFile, keyFile)) s := &http.Server{ - Handler: RecoverAndLogHandler(maxBytesHandler{h: handler, n: config.MaxBodyBytes}, logger), - ReadTimeout: config.ReadTimeout, - WriteTimeout: config.WriteTimeout, - MaxHeaderBytes: config.MaxHeaderBytes, + Handler: RecoverAndLogHandler(maxBytesHandler{h: handler, n: config.MaxBodyBytes}, logger), + ReadTimeout: config.ReadTimeout, + ReadHeaderTimeout: config.ReadTimeout, + WriteTimeout: config.WriteTimeout, + MaxHeaderBytes: config.MaxHeaderBytes, } err := s.ServeTLS(listener, certFile, keyFile) diff --git a/rpc/jsonrpc/server/http_server_test.go b/rpc/jsonrpc/server/http_server_test.go index c662e070f..6e2024b8d 100644 --- a/rpc/jsonrpc/server/http_server_test.go +++ b/rpc/jsonrpc/server/http_server_test.go @@ -4,7 +4,7 @@ import ( "crypto/tls" "errors" "fmt" - "io/ioutil" + "io" "net" "net/http" "net/http/httptest" @@ -102,7 +102,7 @@ func TestServeTLS(t *testing.T) { defer res.Body.Close() assert.Equal(t, http.StatusOK, res.StatusCode) - body, err := ioutil.ReadAll(res.Body) + body, err := io.ReadAll(res.Body) require.NoError(t, err) assert.Equal(t, []byte("some body"), body) } @@ -115,7 +115,7 @@ func TestWriteRPCResponseHTTP(t *testing.T) { err := WriteRPCResponseHTTP(w, types.NewRPCSuccessResponse(id, &sampleResult{"hello"})) require.NoError(t, err) resp := w.Result() - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) _ = resp.Body.Close() require.NoError(t, err) assert.Equal(t, 200, resp.StatusCode) @@ -135,7 +135,7 @@ func TestWriteRPCResponseHTTP(t *testing.T) { types.NewRPCSuccessResponse(id, &sampleResult{"world"})) require.NoError(t, err) resp = w.Result() - body, err = ioutil.ReadAll(resp.Body) + body, err = io.ReadAll(resp.Body) _ = resp.Body.Close() require.NoError(t, err) @@ -167,7 +167,7 @@ func TestWriteRPCResponseHTTPError(t *testing.T) { types.RPCInternalError(types.JSONRPCIntID(-1), errors.New("foo"))) require.NoError(t, err) resp := w.Result() - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) _ = resp.Body.Close() require.NoError(t, err) assert.Equal(t, http.StatusInternalServerError, resp.StatusCode) diff --git a/scripts/metricsgen/metricsdiff/metricsdiff.go b/scripts/metricsgen/metricsdiff/metricsdiff.go new file mode 100644 index 000000000..5ed72ff97 --- /dev/null +++ b/scripts/metricsgen/metricsdiff/metricsdiff.go @@ -0,0 +1,197 @@ +// metricsdiff is a tool for generating a diff between two different files containing +// prometheus metrics. metricsdiff outputs which metrics have been added, removed, +// or have different sets of labels between the two files. +package main + +import ( + "flag" + "fmt" + "io" + "log" + "os" + "path/filepath" + "sort" + "strings" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" +) + +func init() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `Usage: %[1]s + +Generate the diff between the two files of Prometheus metrics. +The input should have the format output by a Prometheus HTTP endpoint. +The tool indicates which metrics have been added, removed, or use different +label sets from path1 to path2. + +`, filepath.Base(os.Args[0])) + flag.PrintDefaults() + } +} + +// Diff contains the set of metrics that were modified between two files +// containing prometheus metrics output. +type Diff struct { + Adds []string + Removes []string + + Changes []LabelDiff +} + +// LabelDiff describes the label changes between two versions of the same metric. +type LabelDiff struct { + Metric string + Adds []string + Removes []string +} + +type parsedMetric struct { + name string + labels []string +} + +type metricsList []parsedMetric + +func main() { + flag.Parse() + if flag.NArg() != 2 { + log.Fatalf("Usage is '%s ', got %d arguments", + filepath.Base(os.Args[0]), flag.NArg()) + } + fa, err := os.Open(flag.Arg(0)) + if err != nil { + log.Fatalf("Open: %v", err) + } + defer fa.Close() + fb, err := os.Open(flag.Arg(1)) + if err != nil { + log.Fatalf("Open: %v", err) + } + defer fb.Close() + md, err := DiffFromReaders(fa, fb) + if err != nil { + log.Fatalf("Generating diff: %v", err) + } + fmt.Print(md) +} + +// DiffFromReaders parses the metrics present in the readers a and b and +// determines which metrics were added and removed in b. +func DiffFromReaders(a, b io.Reader) (Diff, error) { + var parser expfmt.TextParser + amf, err := parser.TextToMetricFamilies(a) + if err != nil { + return Diff{}, err + } + bmf, err := parser.TextToMetricFamilies(b) + if err != nil { + return Diff{}, err + } + + md := Diff{} + aList := toList(amf) + bList := toList(bmf) + + i, j := 0, 0 + for i < len(aList) || j < len(bList) { + for j < len(bList) && (i >= len(aList) || bList[j].name < aList[i].name) { + md.Adds = append(md.Adds, bList[j].name) + j++ + } + for i < len(aList) && j < len(bList) && aList[i].name == bList[j].name { + adds, removes := listDiff(aList[i].labels, bList[j].labels) + if len(adds) > 0 || len(removes) > 0 { + md.Changes = append(md.Changes, LabelDiff{ + Metric: aList[i].name, + Adds: adds, + Removes: removes, + }) + } + i++ + j++ + } + for i < len(aList) && (j >= len(bList) || aList[i].name < bList[j].name) { + md.Removes = append(md.Removes, aList[i].name) + i++ + } + } + return md, nil +} + +func toList(l map[string]*dto.MetricFamily) metricsList { + r := make([]parsedMetric, len(l)) + var idx int + for name, family := range l { + r[idx] = parsedMetric{ + name: name, + labels: labelsToStringList(family.Metric[0].Label), + } + idx++ + } + sort.Sort(metricsList(r)) + return r +} + +func labelsToStringList(ls []*dto.LabelPair) []string { + r := make([]string, len(ls)) + for i, l := range ls { + r[i] = l.GetName() + } + return sort.StringSlice(r) +} + +func listDiff(a, b []string) ([]string, []string) { + adds, removes := []string{}, []string{} + i, j := 0, 0 + for i < len(a) || j < len(b) { + for j < len(b) && (i >= len(a) || b[j] < a[i]) { + adds = append(adds, b[j]) + j++ + } + for i < len(a) && j < len(b) && a[i] == b[j] { + i++ + j++ + } + for i < len(a) && (j >= len(b) || a[i] < b[j]) { + removes = append(removes, a[i]) + i++ + } + } + return adds, removes +} + +func (m metricsList) Len() int { return len(m) } +func (m metricsList) Less(i, j int) bool { return m[i].name < m[j].name } +func (m metricsList) Swap(i, j int) { m[i], m[j] = m[j], m[i] } + +func (m Diff) String() string { + var s strings.Builder + if len(m.Adds) > 0 || len(m.Removes) > 0 { + fmt.Fprintln(&s, "Metric changes:") + } + if len(m.Adds) > 0 { + for _, add := range m.Adds { + fmt.Fprintf(&s, "+++ %s\n", add) + } + } + if len(m.Removes) > 0 { + for _, rem := range m.Removes { + fmt.Fprintf(&s, "--- %s\n", rem) + } + } + if len(m.Changes) > 0 { + fmt.Fprintln(&s, "Label changes:") + for _, ld := range m.Changes { + fmt.Fprintf(&s, "Metric: %s\n", ld.Metric) + for _, add := range ld.Adds { + fmt.Fprintf(&s, "+++ %s\n", add) + } + for _, rem := range ld.Removes { + fmt.Fprintf(&s, "--- %s\n", rem) + } + } + } + return s.String() +} diff --git a/scripts/metricsgen/metricsdiff/metricsdiff_test.go b/scripts/metricsgen/metricsdiff/metricsdiff_test.go new file mode 100644 index 000000000..ec27ef1e9 --- /dev/null +++ b/scripts/metricsgen/metricsdiff/metricsdiff_test.go @@ -0,0 +1,62 @@ +package main_test + +import ( + "bytes" + "io" + "testing" + + "github.com/stretchr/testify/require" + metricsdiff "github.com/tendermint/tendermint/scripts/metricsgen/metricsdiff" +) + +func TestDiff(t *testing.T) { + for _, tc := range []struct { + name string + aContents string + bContents string + + want string + }{ + { + name: "labels", + aContents: ` + metric_one{label_one="content", label_two="content"} 0 + `, + bContents: ` + metric_one{label_three="content", label_four="content"} 0 + `, + want: `Label changes: +Metric: metric_one ++++ label_three ++++ label_four +--- label_one +--- label_two +`, + }, + { + name: "metrics", + aContents: ` + metric_one{label_one="content"} 0 + `, + bContents: ` + metric_two{label_two="content"} 0 + `, + want: `Metric changes: ++++ metric_two +--- metric_one +`, + }, + } { + t.Run(tc.name, func(t *testing.T) { + bufA := bytes.NewBuffer([]byte{}) + bufB := bytes.NewBuffer([]byte{}) + _, err := io.WriteString(bufA, tc.aContents) + require.NoError(t, err) + _, err = io.WriteString(bufB, tc.bContents) + require.NoError(t, err) + md, err := metricsdiff.DiffFromReaders(bufA, bufB) + require.NoError(t, err) + require.Equal(t, tc.want, md.String()) + }) + } +} diff --git a/scripts/metricsgen/metricsgen.go b/scripts/metricsgen/metricsgen.go new file mode 100644 index 000000000..0f564e66a --- /dev/null +++ b/scripts/metricsgen/metricsgen.go @@ -0,0 +1,347 @@ +// metricsgen is a code generation tool for creating constructors for Tendermint +// metrics types. +package main + +import ( + "bytes" + "flag" + "fmt" + "go/ast" + "go/format" + "go/parser" + "go/token" + "go/types" + "io" + "io/fs" + "log" + "os" + "path" + "path/filepath" + "reflect" + "regexp" + "strconv" + "strings" + "text/template" +) + +func init() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `Usage: %[1]s -struct + +Generate constructors for the metrics type specified by -struct contained in +the current directory. The tool creates a new file in the current directory +containing the generated code. + +Options: +`, filepath.Base(os.Args[0])) + flag.PrintDefaults() + } +} + +const metricsPackageName = "github.com/go-kit/kit/metrics" + +const ( + metricNameTag = "metrics_name" + labelsTag = "metrics_labels" + bucketTypeTag = "metrics_buckettype" + bucketSizeTag = "metrics_bucketsizes" +) + +var ( + dir = flag.String("dir", ".", "Path to the directory containing the target package") + strct = flag.String("struct", "Metrics", "Struct to parse for metrics") +) + +var bucketType = map[string]string{ + "exprange": "stdprometheus.ExponentialBucketsRange", + "exp": "stdprometheus.ExponentialBuckets", + "lin": "stdprometheus.LinearBuckets", +} + +var tmpl = template.Must(template.New("tmpl").Parse(`// Code generated by metricsgen. DO NOT EDIT. + +package {{ .Package }} + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + {{ range $metric := .ParsedMetrics }} + {{- $metric.FieldName }}: prometheus.New{{ $metric.TypeName }}From(stdprometheus.{{$metric.TypeName }}Opts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "{{$metric.MetricName }}", + Help: "{{ $metric.Description }}", + {{ if ne $metric.HistogramOptions.BucketType "" }} + Buckets: {{ $metric.HistogramOptions.BucketType }}({{ $metric.HistogramOptions.BucketSizes }}), + {{ else if ne $metric.HistogramOptions.BucketSizes "" }} + Buckets: []float64{ {{ $metric.HistogramOptions.BucketSizes }} }, + {{ end }} + {{- if eq (len $metric.Labels) 0 }} + }, labels).With(labelsAndValues...), + {{ else }} + }, append(labels, {{$metric.Labels}})).With(labelsAndValues...), + {{ end }} + {{- end }} + } +} + + +func NopMetrics() *Metrics { + return &Metrics{ + {{- range $metric := .ParsedMetrics }} + {{ $metric.FieldName }}: discard.New{{ $metric.TypeName }}(), + {{- end }} + } +} +`)) + +// ParsedMetricField is the data parsed for a single field of a metric struct. +type ParsedMetricField struct { + TypeName string + FieldName string + MetricName string + Description string + Labels string + + HistogramOptions HistogramOpts +} + +type HistogramOpts struct { + BucketType string + BucketSizes string +} + +// TemplateData is all of the data required for rendering a metric file template. +type TemplateData struct { + Package string + ParsedMetrics []ParsedMetricField +} + +func main() { + flag.Parse() + if *strct == "" { + log.Fatal("You must specify a non-empty -struct") + } + td, err := ParseMetricsDir(".", *strct) + if err != nil { + log.Fatalf("Parsing file: %v", err) + } + out := filepath.Join(*dir, "metrics.gen.go") + f, err := os.Create(out) + if err != nil { + log.Fatalf("Opening file: %v", err) + } + err = GenerateMetricsFile(f, td) + if err != nil { + log.Fatalf("Generating code: %v", err) + } +} +func ignoreTestFiles(f fs.FileInfo) bool { + return !strings.Contains(f.Name(), "_test.go") +} + +// ParseMetricsDir parses the dir and scans for a struct matching structName, +// ignoring all test files. ParseMetricsDir iterates the fields of the metrics +// struct and builds a TemplateData using the data obtained from the abstract syntax tree. +func ParseMetricsDir(dir string, structName string) (TemplateData, error) { + fs := token.NewFileSet() + d, err := parser.ParseDir(fs, dir, ignoreTestFiles, parser.ParseComments) + if err != nil { + return TemplateData{}, err + } + if len(d) > 1 { + return TemplateData{}, fmt.Errorf("multiple packages found in %s", dir) + } + if len(d) == 0 { + return TemplateData{}, fmt.Errorf("no go pacakges found in %s", dir) + } + + // Grab the package name. + var pkgName string + var pkg *ast.Package + for pkgName, pkg = range d { + } + td := TemplateData{ + Package: pkgName, + } + // Grab the metrics struct + m, mPkgName, err := findMetricsStruct(pkg.Files, structName) + if err != nil { + return TemplateData{}, err + } + for _, f := range m.Fields.List { + if !isMetric(f.Type, mPkgName) { + continue + } + pmf := parseMetricField(f) + td.ParsedMetrics = append(td.ParsedMetrics, pmf) + } + + return td, err +} + +// GenerateMetricsFile executes the metrics file template, writing the result +// into the io.Writer. +func GenerateMetricsFile(w io.Writer, td TemplateData) error { + b := []byte{} + buf := bytes.NewBuffer(b) + err := tmpl.Execute(buf, td) + if err != nil { + return err + } + b, err = format.Source(buf.Bytes()) + if err != nil { + return err + } + _, err = io.Copy(w, bytes.NewBuffer(b)) + if err != nil { + return err + } + return nil +} + +func findMetricsStruct(files map[string]*ast.File, structName string) (*ast.StructType, string, error) { + var ( + st *ast.StructType + ) + for _, file := range files { + mPkgName, err := extractMetricsPackageName(file.Imports) + if err != nil { + return nil, "", fmt.Errorf("unable to determine metrics package name: %v", err) + } + if !ast.FilterFile(file, func(name string) bool { + return name == structName + }) { + continue + } + ast.Inspect(file, func(n ast.Node) bool { + switch f := n.(type) { + case *ast.TypeSpec: + if f.Name.Name == structName { + var ok bool + st, ok = f.Type.(*ast.StructType) + if !ok { + err = fmt.Errorf("found identifier for %q of wrong type", structName) + } + } + return false + default: + return true + } + }) + if err != nil { + return nil, "", err + } + if st != nil { + return st, mPkgName, nil + } + } + return nil, "", fmt.Errorf("target struct %q not found in dir", structName) +} + +func parseMetricField(f *ast.Field) ParsedMetricField { + pmf := ParsedMetricField{ + Description: extractHelpMessage(f.Doc), + MetricName: extractFieldName(f.Names[0].String(), f.Tag), + FieldName: f.Names[0].String(), + TypeName: extractTypeName(f.Type), + Labels: extractLabels(f.Tag), + } + if pmf.TypeName == "Histogram" { + pmf.HistogramOptions = extractHistogramOptions(f.Tag) + } + return pmf +} + +func extractTypeName(e ast.Expr) string { + return strings.TrimPrefix(path.Ext(types.ExprString(e)), ".") +} + +func extractHelpMessage(cg *ast.CommentGroup) string { + if cg == nil { + return "" + } + var help []string //nolint: prealloc + for _, c := range cg.List { + mt := strings.TrimPrefix(c.Text, "//metrics:") + if mt != c.Text { + return strings.TrimSpace(mt) + } + help = append(help, strings.TrimSpace(strings.TrimPrefix(c.Text, "//"))) + } + return strings.Join(help, " ") +} + +func isMetric(e ast.Expr, mPkgName string) bool { + return strings.Contains(types.ExprString(e), fmt.Sprintf("%s.", mPkgName)) +} + +func extractLabels(bl *ast.BasicLit) string { + if bl != nil { + t := reflect.StructTag(strings.Trim(bl.Value, "`")) + if v := t.Get(labelsTag); v != "" { + var res []string + for _, s := range strings.Split(v, ",") { + res = append(res, strconv.Quote(strings.TrimSpace(s))) + } + return strings.Join(res, ",") + } + } + return "" +} + +func extractFieldName(name string, tag *ast.BasicLit) string { + if tag != nil { + t := reflect.StructTag(strings.Trim(tag.Value, "`")) + if v := t.Get(metricNameTag); v != "" { + return v + } + } + return toSnakeCase(name) +} + +func extractHistogramOptions(tag *ast.BasicLit) HistogramOpts { + h := HistogramOpts{} + if tag != nil { + t := reflect.StructTag(strings.Trim(tag.Value, "`")) + if v := t.Get(bucketTypeTag); v != "" { + h.BucketType = bucketType[v] + } + if v := t.Get(bucketSizeTag); v != "" { + h.BucketSizes = v + } + } + return h +} + +func extractMetricsPackageName(imports []*ast.ImportSpec) (string, error) { + for _, i := range imports { + u, err := strconv.Unquote(i.Path.Value) + if err != nil { + return "", err + } + if u == metricsPackageName { + if i.Name != nil { + return i.Name.Name, nil + } + return path.Base(u), nil + } + } + return "", nil +} + +var capitalChange = regexp.MustCompile("([a-z0-9])([A-Z])") + +func toSnakeCase(str string) string { + snake := capitalChange.ReplaceAllString(str, "${1}_${2}") + return strings.ToLower(snake) +} diff --git a/scripts/metricsgen/metricsgen_test.go b/scripts/metricsgen/metricsgen_test.go new file mode 100644 index 000000000..d543623e9 --- /dev/null +++ b/scripts/metricsgen/metricsgen_test.go @@ -0,0 +1,258 @@ +package main_test + +import ( + "bytes" + "fmt" + "go/parser" + "go/token" + "io" + "os" + "path" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + metricsgen "github.com/tendermint/tendermint/scripts/metricsgen" +) + +const testDataDir = "./testdata" + +func TestSimpleTemplate(t *testing.T) { + m := metricsgen.ParsedMetricField{ + TypeName: "Histogram", + FieldName: "MyMetric", + MetricName: "request_count", + Description: "how many requests were made since the start of the process", + Labels: "first, second, third", + } + td := metricsgen.TemplateData{ + Package: "mypack", + ParsedMetrics: []metricsgen.ParsedMetricField{m}, + } + b := bytes.NewBuffer([]byte{}) + err := metricsgen.GenerateMetricsFile(b, td) + if err != nil { + t.Fatalf("unable to parse template %v", err) + } +} + +func TestFromData(t *testing.T) { + infos, err := os.ReadDir(testDataDir) + if err != nil { + t.Fatalf("unable to open file %v", err) + } + for _, dir := range infos { + t.Run(dir.Name(), func(t *testing.T) { + if !dir.IsDir() { + t.Fatalf("expected file %s to be directory", dir.Name()) + } + dirName := path.Join(testDataDir, dir.Name()) + pt, err := metricsgen.ParseMetricsDir(dirName, "Metrics") + if err != nil { + t.Fatalf("unable to parse from dir %q: %v", dir, err) + } + outFile := path.Join(dirName, "out.go") + if err != nil { + t.Fatalf("unable to open file %s: %v", outFile, err) + } + of, err := os.Create(outFile) + if err != nil { + t.Fatalf("unable to open file %s: %v", outFile, err) + } + defer os.Remove(outFile) + if err := metricsgen.GenerateMetricsFile(of, pt); err != nil { + t.Fatalf("unable to generate metrics file %s: %v", outFile, err) + } + if _, err := parser.ParseFile(token.NewFileSet(), outFile, nil, parser.AllErrors); err != nil { + t.Fatalf("unable to parse generated file %s: %v", outFile, err) + } + bNew, err := os.ReadFile(outFile) + if err != nil { + t.Fatalf("unable to read generated file %s: %v", outFile, err) + } + goldenFile := path.Join(dirName, "metrics.gen.go") + bOld, err := os.ReadFile(goldenFile) + if err != nil { + t.Fatalf("unable to read file %s: %v", goldenFile, err) + } + if !bytes.Equal(bNew, bOld) { + t.Fatalf("newly generated code in file %s does not match golden file %s\n"+ + "if the output of the metricsgen tool is expected to change run the following make target: \n"+ + "\tmake metrics", outFile, goldenFile) + } + }) + } +} + +func TestParseMetricsStruct(t *testing.T) { + const pkgName = "mypkg" + metricsTests := []struct { + name string + shouldError bool + metricsStruct string + expected metricsgen.TemplateData + }{ + { + name: "basic", + metricsStruct: `type Metrics struct { + myGauge metrics.Gauge + }`, + expected: metricsgen.TemplateData{ + Package: pkgName, + ParsedMetrics: []metricsgen.ParsedMetricField{ + { + TypeName: "Gauge", + FieldName: "myGauge", + MetricName: "my_gauge", + }, + }, + }, + }, + { + name: "histogram", + metricsStruct: "type Metrics struct {\n" + + "myHistogram metrics.Histogram `metrics_buckettype:\"exp\" metrics_bucketsizes:\"1, 100, .8\"`\n" + + "}", + expected: metricsgen.TemplateData{ + Package: pkgName, + ParsedMetrics: []metricsgen.ParsedMetricField{ + { + TypeName: "Histogram", + FieldName: "myHistogram", + MetricName: "my_histogram", + + HistogramOptions: metricsgen.HistogramOpts{ + BucketType: "stdprometheus.ExponentialBuckets", + BucketSizes: "1, 100, .8", + }, + }, + }, + }, + }, + { + name: "labeled name", + metricsStruct: "type Metrics struct {\n" + + "myCounter metrics.Counter `metrics_name:\"new_name\"`\n" + + "}", + expected: metricsgen.TemplateData{ + Package: pkgName, + ParsedMetrics: []metricsgen.ParsedMetricField{ + { + TypeName: "Counter", + FieldName: "myCounter", + MetricName: "new_name", + }, + }, + }, + }, + { + name: "metric labels", + metricsStruct: "type Metrics struct {\n" + + "myCounter metrics.Counter `metrics_labels:\"label1,label2\"`\n" + + "}", + expected: metricsgen.TemplateData{ + Package: pkgName, + ParsedMetrics: []metricsgen.ParsedMetricField{ + { + TypeName: "Counter", + FieldName: "myCounter", + MetricName: "my_counter", + Labels: "\"label1\",\"label2\"", + }, + }, + }, + }, + { + name: "ignore non-metric field", + metricsStruct: `type Metrics struct { + myCounter metrics.Counter + nonMetric string + }`, + expected: metricsgen.TemplateData{ + Package: pkgName, + ParsedMetrics: []metricsgen.ParsedMetricField{ + { + TypeName: "Counter", + FieldName: "myCounter", + MetricName: "my_counter", + }, + }, + }, + }, + } + for _, testCase := range metricsTests { + t.Run(testCase.name, func(t *testing.T) { + dir, err := os.MkdirTemp(os.TempDir(), "metricsdir") + if err != nil { + t.Fatalf("unable to create directory: %v", err) + } + defer os.Remove(dir) + f, err := os.Create(filepath.Join(dir, "metrics.go")) + if err != nil { + t.Fatalf("unable to open file: %v", err) + } + pkgLine := fmt.Sprintf("package %s\n", pkgName) + importClause := ` + import( + "github.com/go-kit/kit/metrics" + ) + ` + + _, err = io.WriteString(f, pkgLine) + require.NoError(t, err) + _, err = io.WriteString(f, importClause) + require.NoError(t, err) + _, err = io.WriteString(f, testCase.metricsStruct) + require.NoError(t, err) + + td, err := metricsgen.ParseMetricsDir(dir, "Metrics") + if testCase.shouldError { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, testCase.expected, td) + } + }) + } +} + +func TestParseAliasedMetric(t *testing.T) { + aliasedData := ` + package mypkg + + import( + mymetrics "github.com/go-kit/kit/metrics" + ) + type Metrics struct { + m mymetrics.Gauge + } + ` + dir, err := os.MkdirTemp(os.TempDir(), "metricsdir") + if err != nil { + t.Fatalf("unable to create directory: %v", err) + } + defer os.Remove(dir) + f, err := os.Create(filepath.Join(dir, "metrics.go")) + if err != nil { + t.Fatalf("unable to open file: %v", err) + } + _, err = io.WriteString(f, aliasedData) + if err != nil { + t.Fatalf("unable to write to file: %v", err) + } + td, err := metricsgen.ParseMetricsDir(dir, "Metrics") + require.NoError(t, err) + + expected := + metricsgen.TemplateData{ + Package: "mypkg", + ParsedMetrics: []metricsgen.ParsedMetricField{ + { + TypeName: "Gauge", + FieldName: "m", + MetricName: "m", + }, + }, + } + require.Equal(t, expected, td) +} diff --git a/scripts/metricsgen/testdata/basic/metrics.gen.go b/scripts/metricsgen/testdata/basic/metrics.gen.go new file mode 100644 index 000000000..d541cb2db --- /dev/null +++ b/scripts/metricsgen/testdata/basic/metrics.gen.go @@ -0,0 +1,30 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package basic + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "height", + Help: "simple metric that tracks the height of the chain.", + }, labels).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + Height: discard.NewGauge(), + } +} diff --git a/scripts/metricsgen/testdata/basic/metrics.go b/scripts/metricsgen/testdata/basic/metrics.go new file mode 100644 index 000000000..1a361f90f --- /dev/null +++ b/scripts/metricsgen/testdata/basic/metrics.go @@ -0,0 +1,11 @@ +package basic + +import "github.com/go-kit/kit/metrics" + +//go:generate go run ../../../../scripts/metricsgen -struct=Metrics + +// Metrics contains metrics exposed by this package. +type Metrics struct { + // simple metric that tracks the height of the chain. + Height metrics.Gauge +} diff --git a/scripts/metricsgen/testdata/commented/metrics.gen.go b/scripts/metricsgen/testdata/commented/metrics.gen.go new file mode 100644 index 000000000..c1346da38 --- /dev/null +++ b/scripts/metricsgen/testdata/commented/metrics.gen.go @@ -0,0 +1,30 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package commented + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + Field: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "field", + Help: "Height of the chain. We expect multi-line comments to parse correctly.", + }, labels).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + Field: discard.NewGauge(), + } +} diff --git a/scripts/metricsgen/testdata/commented/metrics.go b/scripts/metricsgen/testdata/commented/metrics.go new file mode 100644 index 000000000..174f1e233 --- /dev/null +++ b/scripts/metricsgen/testdata/commented/metrics.go @@ -0,0 +1,11 @@ +package commented + +import "github.com/go-kit/kit/metrics" + +//go:generate go run ../../../../scripts/metricsgen -struct=Metrics + +type Metrics struct { + // Height of the chain. + // We expect multi-line comments to parse correctly. + Field metrics.Gauge +} diff --git a/scripts/metricsgen/testdata/tags/metrics.gen.go b/scripts/metricsgen/testdata/tags/metrics.gen.go new file mode 100644 index 000000000..43779c7a1 --- /dev/null +++ b/scripts/metricsgen/testdata/tags/metrics.gen.go @@ -0,0 +1,55 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package tags + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + WithLabels: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "with_labels", + Help: "", + }, append(labels, "step", "time")).With(labelsAndValues...), + WithExpBuckets: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "with_exp_buckets", + Help: "", + + Buckets: stdprometheus.ExponentialBuckets(.1, 100, 8), + }, labels).With(labelsAndValues...), + WithBuckets: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "with_buckets", + Help: "", + + Buckets: []float64{1, 2, 3, 4, 5}, + }, labels).With(labelsAndValues...), + Named: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "metric_with_name", + Help: "", + }, labels).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + WithLabels: discard.NewCounter(), + WithExpBuckets: discard.NewHistogram(), + WithBuckets: discard.NewHistogram(), + Named: discard.NewCounter(), + } +} diff --git a/scripts/metricsgen/testdata/tags/metrics.go b/scripts/metricsgen/testdata/tags/metrics.go new file mode 100644 index 000000000..8562dcf43 --- /dev/null +++ b/scripts/metricsgen/testdata/tags/metrics.go @@ -0,0 +1,12 @@ +package tags + +import "github.com/go-kit/kit/metrics" + +//go:generate go run ../../../../scripts/metricsgen -struct=Metrics + +type Metrics struct { + WithLabels metrics.Counter `metrics_labels:"step,time"` + WithExpBuckets metrics.Histogram `metrics_buckettype:"exp" metrics_bucketsizes:".1,100,8"` + WithBuckets metrics.Histogram `metrics_bucketsizes:"1, 2, 3, 4, 5"` + Named metrics.Counter `metrics_name:"metric_with_name"` +} diff --git a/scripts/mockery_generate.sh b/scripts/mockery_generate.sh index 2d6f40e63..2509e0cdb 100755 --- a/scripts/mockery_generate.sh +++ b/scripts/mockery_generate.sh @@ -1,15 +1,6 @@ #!/bin/sh # # Invoke Mockery v2 to update generated mocks for the given type. -# -# This script runs a locally-installed "mockery" if available, otherwise it -# runs the published Docker container. This legerdemain is so that the CI build -# and a local build can work off the same script. -# -if ! which mockery ; then - mockery() { - docker run --rm -v "$PWD":/w --workdir=/w vektra/mockery:v2.12.3 - } -fi -mockery --disable-version-string --case underscore --name "$@" +go run github.com/vektra/mockery/v2 --disable-version-string --case underscore --name "$*" + diff --git a/spec/README.md b/spec/README.md new file mode 100644 index 000000000..e7563175c --- /dev/null +++ b/spec/README.md @@ -0,0 +1,93 @@ +--- +order: 1 +title: Overview +parent: + title: Spec + order: 7 +--- + +# Tendermint Spec + +This is a markdown specification of the Tendermint blockchain. +It defines the base data structures, how they are validated, +and how they are communicated over the network. + +If you find discrepancies between the spec and the code that +do not have an associated issue or pull request on github, +please submit them to our [bug bounty](https://tendermint.com/security)! + +## Contents + +- [Overview](#overview) + +### Data Structures + +- [Encoding and Digests](./core/encoding.md) +- [Blockchain](./core/data_structures.md) +- [State](./core/state.md) + +### Consensus Protocol + +- [Consensus Algorithm](./consensus/consensus.md) +- [Creating a proposal](./consensus/creating-proposal.md) +- [Time](./consensus/bft-time.md) +- [Light-Client](./consensus/light-client/README.md) + +### P2P and Network Protocols + +- [The Base P2P Layer](./p2p/node.md): multiplex the protocols ("reactors") on authenticated and encrypted TCP connections +- [Peer Exchange (PEX)](./p2p/messages/pex.md): gossip known peer addresses so peers can find each other +- [Block Sync](./p2p/messages/block-sync.md): gossip blocks so peers can catch up quickly +- [Consensus](./p2p/messages/consensus.md): gossip votes and block parts so new blocks can be committed +- [Mempool](./p2p/messages/mempool.md): gossip transactions so they get included in blocks +- [Evidence](./p2p/messages/evidence.md): sending invalid evidence will stop the peer + +### RPC + +- [RPC SPEC](./rpc/README.md): Specification of the Tendermint remote procedure call interface. + +### Software + +- [ABCI](./abci/README.md): Details about interactions between the + application and consensus engine over ABCI +- [Write-Ahead Log](./consensus/wal.md): Details about how the consensus + engine preserves data and recovers from crash failures + +## Overview + +Tendermint provides Byzantine Fault Tolerant State Machine Replication using +hash-linked batches of transactions. Such transaction batches are called "blocks". +Hence, Tendermint defines a "blockchain". + +Each block in Tendermint has a unique index - its Height. +Height's in the blockchain are monotonic. +Each block is committed by a known set of weighted Validators. +Membership and weighting within this validator set may change over time. +Tendermint guarantees the safety and liveness of the blockchain +so long as less than 1/3 of the total weight of the Validator set +is malicious or faulty. + +A commit in Tendermint is a set of signed messages from more than 2/3 of +the total weight of the current Validator set. Validators take turns proposing +blocks and voting on them. Once enough votes are received, the block is considered +committed. These votes are included in the _next_ block as proof that the previous block +was committed - they cannot be included in the current block, as that block has already been +created. + +Once a block is committed, it can be executed against an application. +The application returns results for each of the transactions in the block. +The application can also return changes to be made to the validator set, +as well as a cryptographic digest of its latest state. + +Tendermint is designed to enable efficient verification and authentication +of the latest state of the blockchain. To achieve this, it embeds +cryptographic commitments to certain information in the block "header". +This information includes the contents of the block (eg. the transactions), +the validator set committing the block, as well as the various results returned by the application. +Note, however, that block execution only occurs _after_ a block is committed. +Thus, application results can only be included in the _next_ block. + +Also note that information like the transaction results and the validator set are never +directly included in the block - only their cryptographic digests (Merkle roots) are. +Hence, verification of a block requires a separate data structure to store this information. +We call this the `State`. Block verification also requires access to the previous block. diff --git a/spec/abci/README.md b/spec/abci/README.md new file mode 100644 index 000000000..957742be7 --- /dev/null +++ b/spec/abci/README.md @@ -0,0 +1,27 @@ +--- +order: 1 +parent: + title: ABCI + order: 2 +--- + +# ABCI + +ABCI stands for "**A**pplication **B**lock**c**hain **I**nterface". +ABCI is the interface between Tendermint (a state-machine replication engine) +and your application (the actual state machine). It consists of a set of +_methods_, each with a corresponding `Request` and `Response`message type. +To perform state-machine replication, Tendermint calls the ABCI methods on the +ABCI application by sending the `Request*` messages and receiving the `Response*` messages in return. + +All ABCI messages and methods are defined in [protocol buffers](https://github.com/tendermint/spec/blob/master/proto/abci/types.proto). +This allows Tendermint to run with applications written in many programming languages. + +This specification is split as follows: + +- [Methods and Types](./abci.md) - complete details on all ABCI methods and + message types +- [Applications](./apps.md) - how to manage ABCI application state and other + details about building ABCI applications +- [Client and Server](./client-server.md) - for those looking to implement their + own ABCI application servers diff --git a/spec/abci/abci.md b/spec/abci/abci.md new file mode 100644 index 000000000..634daec68 --- /dev/null +++ b/spec/abci/abci.md @@ -0,0 +1,775 @@ +--- +order: 1 +title: Method and Types +--- + +# Methods and Types + +## Connections + +ABCI applications can run either within the _same_ process as the Tendermint +state-machine replication engine, or as a _separate_ process from the state-machine +replication engine. When run within the same process, Tendermint will call the ABCI +application methods directly as Go method calls. + +When Tendermint and the ABCI application are run as separate processes, Tendermint +opens four connections to the application for ABCI methods. The connections each +handle a subset of the ABCI method calls. These subsets are defined as follows: + +#### **Consensus** connection + +* Driven by a consensus protocol and is responsible for block execution. +* Handles the `InitChain`, `BeginBlock`, `DeliverTx`, `EndBlock`, and `Commit` method +calls. + +#### **Mempool** connection + +* For validating new transactions, before they're shared or included in a block. +* Handles the `CheckTx` calls. + +#### **Info** connection + +* For initialization and for queries from the user. +* Handles the `Info` and `Query` calls. + +#### **Snapshot** connection + +* For serving and restoring [state sync snapshots](apps.md#state-sync). +* Handles the `ListSnapshots`, `LoadSnapshotChunk`, `OfferSnapshot`, and `ApplySnapshotChunk` calls. + +Additionally, there is a `Flush` method that is called on every connection, +and an `Echo` method that is just for debugging. + +More details on managing state across connections can be found in the section on +[ABCI Applications](apps.md). + +## Errors + +The `Query`, `CheckTx` and `DeliverTx` methods include a `Code` field in their `Response*`. +This field is meant to contain an application-specific response code. +A response code of `0` indicates no error. Any other response code +indicates to Tendermint that an error occurred. + +These methods also return a `Codespace` string to Tendermint. This field is +used to disambiguate `Code` values returned by different domains of the +application. The `Codespace` is a namespace for the `Code`. + +The `Echo`, `Info`, `InitChain`, `BeginBlock`, `EndBlock`, `Commit` methods +do not return errors. An error in any of these methods represents a critical +issue that Tendermint has no reasonable way to handle. If there is an error in one +of these methods, the application must crash to ensure that the error is safely +handled by an operator. + +The handling of non-zero response codes by Tendermint is described below + +### CheckTx + +The `CheckTx` ABCI method controls what transactions are considered for inclusion in a block. +When Tendermint receives a `ResponseCheckTx` with a non-zero `Code`, the associated +transaction will be not be added to Tendermint's mempool or it will be removed if +it is already included. + +### DeliverTx + +The `DeliverTx` ABCI method delivers transactions from Tendermint to the application. +When Tendermint recieves a `ResponseDeliverTx` with a non-zero `Code`, the response code is logged. +The transaction was already included in a block, so the `Code` does not influence +Tendermint consensus. + +### Query + +The `Query` ABCI method query queries the application for information about application state. +When Tendermint receives a `ResponseQuery` with a non-zero `Code`, this code is +returned directly to the client that initiated the query. + +## Events + +The `CheckTx`, `BeginBlock`, `DeliverTx`, `EndBlock` methods include an `Events` +field in their `Response*`. Applications may respond to these ABCI methods with a set of events. +Events allow applications to associate metadata about ABCI method execution with the +transactions and blocks this metadata relates to. +Events returned via these ABCI methods do not impact Tendermint consensus in any way +and instead exist to power subscriptions and queries of Tendermint state. + +An `Event` contains a `type` and a list of `EventAttributes`, which are key-value +string pairs denoting metadata about what happened during the method's execution. +`Event` values can be used to index transactions and blocks according to what happened +during their execution. Note that the set of events returned for a block from +`BeginBlock` and `EndBlock` are merged. In case both methods return the same +key, only the value defined in `EndBlock` is used. + +Each event has a `type` which is meant to categorize the event for a particular +`Response*` or `Tx`. A `Response*` or `Tx` may contain multiple events with duplicate +`type` values, where each distinct entry is meant to categorize attributes for a +particular event. Every key and value in an event's attributes must be UTF-8 +encoded strings along with the event type itself. + +```protobuf +message Event { + string type = 1; + repeated EventAttribute attributes = 2; +} +``` + +The attributes of an `Event` consist of a `key`, a `value`, and an `index` flag. The +index flag notifies the Tendermint indexer to index the attribute. The value of +the `index` flag is non-deterministic and may vary across different nodes in the network. + +```protobuf +message EventAttribute { + bytes key = 1; + bytes value = 2; + bool index = 3; // nondeterministic +} +``` + +Example: + +```go + abci.ResponseDeliverTx{ + // ... + Events: []abci.Event{ + { + Type: "validator.provisions", + Attributes: []abci.EventAttribute{ + abci.EventAttribute{Key: []byte("address"), Value: []byte("..."), Index: true}, + abci.EventAttribute{Key: []byte("amount"), Value: []byte("..."), Index: true}, + abci.EventAttribute{Key: []byte("balance"), Value: []byte("..."), Index: true}, + }, + }, + { + Type: "validator.provisions", + Attributes: []abci.EventAttribute{ + abci.EventAttribute{Key: []byte("address"), Value: []byte("..."), Index: true}, + abci.EventAttribute{Key: []byte("amount"), Value: []byte("..."), Index: false}, + abci.EventAttribute{Key: []byte("balance"), Value: []byte("..."), Index: false}, + }, + }, + { + Type: "validator.slashed", + Attributes: []abci.EventAttribute{ + abci.EventAttribute{Key: []byte("address"), Value: []byte("..."), Index: false}, + abci.EventAttribute{Key: []byte("amount"), Value: []byte("..."), Index: true}, + abci.EventAttribute{Key: []byte("reason"), Value: []byte("..."), Index: true}, + }, + }, + // ... + }, +} +``` + +## EvidenceType + +Tendermint's security model relies on the use of "evidence". Evidence is proof of +malicious behavior by a network participant. It is the responsibility of Tendermint +to detect such malicious behavior. When malicious behavior is detected, Tendermint +will gossip evidence of the behavior to other nodes and commit the evidence to +the chain once it is verified by all validators. This evidence will then be +passed it on to the application through the ABCI. It is the responsibility of the +application to handle the evidence and exercise punishment. + +EvidenceType has the following protobuf format: + +```proto +enum EvidenceType { + UNKNOWN = 0; + DUPLICATE_VOTE = 1; + LIGHT_CLIENT_ATTACK = 2; +} +``` + +There are two forms of evidence: Duplicate Vote and Light Client Attack. More +information can be found in either [data structures](https://github.com/tendermint/spec/blob/master/spec/core/data_structures.md) +or [accountability](https://github.com/tendermint/spec/blob/master/spec/light-client/accountability/) + +## Determinism + +ABCI applications must implement deterministic finite-state machines to be +securely replicated by the Tendermint consensus engine. This means block execution +over the Consensus Connection must be strictly deterministic: given the same +ordered set of requests, all nodes will compute identical responses, for all +BeginBlock, DeliverTx, EndBlock, and Commit. This is critical, because the +responses are included in the header of the next block, either via a Merkle root +or directly, so all nodes must agree on exactly what they are. + +For this reason, it is recommended that applications not be exposed to any +external user or process except via the ABCI connections to a consensus engine +like Tendermint Core. The application must only change its state based on input +from block execution (BeginBlock, DeliverTx, EndBlock, Commit), and not through +any other kind of request. This is the only way to ensure all nodes see the same +transactions and compute the same results. + +If there is some non-determinism in the state machine, consensus will eventually +fail as nodes disagree over the correct values for the block header. The +non-determinism must be fixed and the nodes restarted. + +Sources of non-determinism in applications may include: + +* Hardware failures + * Cosmic rays, overheating, etc. +* Node-dependent state + * Random numbers + * Time +* Underspecification + * Library version changes + * Race conditions + * Floating point numbers + * JSON serialization + * Iterating through hash-tables/maps/dictionaries +* External Sources + * Filesystem + * Network calls (eg. some external REST API service) + +See [#56](https://github.com/tendermint/abci/issues/56) for original discussion. + +Note that some methods (`Query, CheckTx, DeliverTx`) return +explicitly non-deterministic data in the form of `Info` and `Log` fields. The `Log` is +intended for the literal output from the application's logger, while the +`Info` is any additional info that should be returned. These are the only fields +that are not included in block header computations, so we don't need agreement +on them. All other fields in the `Response*` must be strictly deterministic. + +## Block Execution + +The first time a new blockchain is started, Tendermint calls +`InitChain`. From then on, the following sequence of methods is executed for each +block: + +`BeginBlock, [DeliverTx], EndBlock, Commit` + +where one `DeliverTx` is called for each transaction in the block. +The result is an updated application state. +Cryptographic commitments to the results of DeliverTx, EndBlock, and +Commit are included in the header of the next block. + +## State Sync + +State sync allows new nodes to rapidly bootstrap by discovering, fetching, and applying +state machine snapshots instead of replaying historical blocks. For more details, see the +[state sync section](../spec/p2p/messages/state-sync.md). + +New nodes will discover and request snapshots from other nodes in the P2P network. +A Tendermint node that receives a request for snapshots from a peer will call +`ListSnapshots` on its application to retrieve any local state snapshots. After receiving + snapshots from peers, the new node will offer each snapshot received from a peer +to its local application via the `OfferSnapshot` method. + +Snapshots may be quite large and are thus broken into smaller "chunks" that can be +assembled into the whole snapshot. Once the application accepts a snapshot and +begins restoring it, Tendermint will fetch snapshot "chunks" from existing nodes. +The node providing "chunks" will fetch them from its local application using +the `LoadSnapshotChunk` method. + +As the new node receives "chunks" it will apply them sequentially to the local +application with `ApplySnapshotChunk`. When all chunks have been applied, the application +`AppHash` is retrieved via an `Info` query. The `AppHash` is then compared to +the blockchain's `AppHash` which is verified via [light client verification](../spec/light-client/verification/README.md). + +## Messages + +### Echo + +* **Request**: + * `Message (string)`: A string to echo back +* **Response**: + * `Message (string)`: The input string +* **Usage**: + * Echo a string to test an abci client/server implementation + +### Flush + +* **Usage**: + * Signals that messages queued on the client should be flushed to + the server. It is called periodically by the client + implementation to ensure asynchronous requests are actually + sent, and is called immediately to make a synchronous request, + which returns when the Flush response comes back. + +### Info + +* **Request**: + + | Name | Type | Description | Field Number | + |---------------|--------|------------------------------------------|--------------| + | version | string | The Tendermint software semantic version | 1 | + | block_version | uint64 | The Tendermint Block Protocol version | 2 | + | p2p_version | uint64 | The Tendermint P2P Protocol version | 3 | + | abci_version | string | The Tendermint ABCI semantic version | 4 | + +* **Response**: + + | Name | Type | Description | Field Number | + |---------------------|--------|--------------------------------------------------|--------------| + | data | string | Some arbitrary information | 1 | + | version | string | The application software semantic version | 2 | + | app_version | uint64 | The application protocol version | 3 | + | last_block_height | int64 | Latest block for which the app has called Commit | 4 | + | last_block_app_hash | bytes | Latest result of Commit | 5 | + +* **Usage**: + * Return information about the application state. + * Used to sync Tendermint with the application during a handshake + that happens on startup. + * The returned `app_version` will be included in the Header of every block. + * Tendermint expects `last_block_app_hash` and `last_block_height` to + be updated during `Commit`, ensuring that `Commit` is never + called twice for the same block height. + +> Note: Semantic version is a reference to [semantic versioning](https://semver.org/). Semantic versions in info will be displayed as X.X.x. + +### InitChain + +* **Request**: + + | Name | Type | Description | Field Number | + |------------------|--------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------|--------------| + | time | [google.protobuf.Timestamp](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Timestamp) | Genesis time | 1 | + | chain_id | string | ID of the blockchain. | 2 | + | consensus_params | [ConsensusParams](#consensusparams) | Initial consensus-critical parameters. | 3 | + | validators | repeated [ValidatorUpdate](#validatorupdate) | Initial genesis validators, sorted by voting power. | 4 | + | app_state_bytes | bytes | Serialized initial application state. JSON bytes. | 5 | + | initial_height | int64 | Height of the initial block (typically `1`). | 6 | + +* **Response**: + + | Name | Type | Description | Field Number | + |------------------|----------------------------------------------|-------------------------------------------------|--------------| + | consensus_params | [ConsensusParams](#consensusparams) | Initial consensus-critical parameters (optional | 1 | + | validators | repeated [ValidatorUpdate](#validatorupdate) | Initial validator set (optional). | 2 | + | app_hash | bytes | Initial application hash. | 3 | + +* **Usage**: + * Called once upon genesis. + * If ResponseInitChain.Validators is empty, the initial validator set will be the RequestInitChain.Validators + * If ResponseInitChain.Validators is not empty, it will be the initial + validator set (regardless of what is in RequestInitChain.Validators). + * This allows the app to decide if it wants to accept the initial validator + set proposed by tendermint (ie. in the genesis file), or if it wants to use + a different one (perhaps computed based on some application specific + information in the genesis file). + +### Query + +* **Request**: + + | Name | Type | Description | Field Number | + |--------|--------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | data | bytes | Raw query bytes. Can be used with or in lieu of Path. | 1 | + | path | string | Path field of the request URI. Can be used with or in lieu of `data`. Apps MUST interpret `/store` as a query by key on the underlying store. The key SHOULD be specified in the `data` field. Apps SHOULD allow queries over specific types like `/accounts/...` or `/votes/...` | 2 | + | height | int64 | The block height for which you want the query (default=0 returns data for the latest committed block). Note that this is the height of the block containing the application's Merkle root hash, which represents the state as it was after committing the block at Height-1 | 3 | + | prove | bool | Return Merkle proof with response if possible | 4 | + +* **Response**: + + | Name | Type | Description | Field Number | + |-----------|-----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | code | uint32 | Response code. | 1 | + | log | string | The output of the application's logger. **May be non-deterministic.** | 3 | + | info | string | Additional information. **May be non-deterministic.** | 4 | + | index | int64 | The index of the key in the tree. | 5 | + | key | bytes | The key of the matching data. | 6 | + | value | bytes | The value of the matching data. | 7 | + | proof_ops | [ProofOps](#proofops) | Serialized proof for the value data, if requested, to be verified against the `app_hash` for the given Height. | 8 | + | height | int64 | The block height from which data was derived. Note that this is the height of the block containing the application's Merkle root hash, which represents the state as it was after committing the block at Height-1 | 9 | + | codespace | string | Namespace for the `code`. | 10 | + +* **Usage**: + * Query for data from the application at current or past height. + * Optionally return Merkle proof. + * Merkle proof includes self-describing `type` field to support many types + of Merkle trees and encoding formats. + +### BeginBlock + +* **Request**: + + | Name | Type | Description | Field Number | + |----------------------|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------| + | hash | bytes | The block's hash. This can be derived from the block header. | 1 | + | header | [Header](../core/data_structures.md#header) | The block header. | 2 | + | last_commit_info | [LastCommitInfo](#lastcommitinfo) | Info about the last commit, including the round, and the list of validators and which ones signed the last block. | 3 | + | byzantine_validators | repeated [Evidence](#evidence) | List of evidence of validators that acted maliciously. | 4 | + +* **Response**: + + | Name | Type | Description | Field Number | + |--------|---------------------------|-------------------------------------|--------------| + | events | repeated [Event](#events) | type & Key-Value events for indexing | 1 | + +* **Usage**: + * Signals the beginning of a new block. + * Called prior to any `DeliverTx` method calls. + * The header contains the height, timestamp, and more - it exactly matches the + Tendermint block header. We may seek to generalize this in the future. + * The `LastCommitInfo` and `ByzantineValidators` can be used to determine + rewards and punishments for the validators. + +### CheckTx + +* **Request**: + + | Name | Type | Description | Field Number | + |------|-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | tx | bytes | The request transaction bytes | 1 | + | type | CheckTxType | One of `CheckTx_New` or `CheckTx_Recheck`. `CheckTx_New` is the default and means that a full check of the tranasaction is required. `CheckTx_Recheck` types are used when the mempool is initiating a normal recheck of a transaction. | 2 | + +* **Response**: + + | Name | Type | Description | Field Number | + |------------|---------------------------|-----------------------------------------------------------------------|--------------| + | code | uint32 | Response code. | 1 | + | data | bytes | Result bytes, if any. | 2 | + | log | string | The output of the application's logger. **May be non-deterministic.** | 3 | + | info | string | Additional information. **May be non-deterministic.** | 4 | + | gas_wanted | int64 | Amount of gas requested for transaction. | 5 | + | gas_used | int64 | Amount of gas consumed by transaction. | 6 | + | events | repeated [Event](#events) | Type & Key-Value events for indexing transactions (eg. by account). | 7 | + | codespace | string | Namespace for the `code`. | 8 | + | sender | string | The transaction's sender (e.g. the signer) | 9 | + | priority | int64 | The transaction's priority (for mempool ordering) | 10 | + +* **Usage**: + + * Technically optional - not involved in processing blocks. + * Guardian of the mempool: every node runs `CheckTx` before letting a + transaction into its local mempool. + * The transaction may come from an external user or another node + * `CheckTx` validates the transaction against the current state of the application, + for example, checking signatures and account balances, but does not apply any + of the state changes described in the transaction. + not running code in a virtual machine. + * Transactions where `ResponseCheckTx.Code != 0` will be rejected - they will not be broadcast to + other nodes or included in a proposal block. + * Tendermint attributes no other value to the response code + +### DeliverTx + +* **Request**: + + | Name | Type | Description | Field Number | + |------|-------|--------------------------------|--------------| + | tx | bytes | The request transaction bytes. | 1 | + +* **Response**: + + | Name | Type | Description | Field Number | + |------------|---------------------------|-----------------------------------------------------------------------|--------------| + | code | uint32 | Response code. | 1 | + | data | bytes | Result bytes, if any. | 2 | + | log | string | The output of the application's logger. **May be non-deterministic.** | 3 | + | info | string | Additional information. **May be non-deterministic.** | 4 | + | gas_wanted | int64 | Amount of gas requested for transaction. | 5 | + | gas_used | int64 | Amount of gas consumed by transaction. | 6 | + | events | repeated [Event](#events) | Type & Key-Value events for indexing transactions (eg. by account). | 7 | + | codespace | string | Namespace for the `code`. | 8 | + +* **Usage**: + * [**Required**] The core method of the application. + * When `DeliverTx` is called, the application must execute the transaction in full before returning control to Tendermint. + * `ResponseDeliverTx.Code == 0` only if the transaction is fully valid. + +### EndBlock + +* **Request**: + + | Name | Type | Description | Field Number | + |--------|-------|------------------------------------|--------------| + | height | int64 | Height of the block just executed. | 1 | + +* **Response**: + + | Name | Type | Description | Field Number | + |-------------------------|----------------------------------------------|-----------------------------------------------------------------|--------------| + | validator_updates | repeated [ValidatorUpdate](#validatorupdate) | Changes to validator set (set voting power to 0 to remove). | 1 | + | consensus_param_updates | [ConsensusParams](#consensusparams) | Changes to consensus-critical time, size, and other parameters. | 2 | + | events | repeated [Event](#events) | Type & Key-Value events for indexing | 3 | + +* **Usage**: + * Signals the end of a block. + * Called after all the transactions for the current block have been delivered, prior to the block's `Commit` message. + * Optional `validator_updates` triggered by block `H`. These updates affect validation + for blocks `H+1`, `H+2`, and `H+3`. + * Heights following a validator update are affected in the following way: + * `H+1`: `NextValidatorsHash` includes the new `validator_updates` value. + * `H+2`: The validator set change takes effect and `ValidatorsHash` is updated. + * `H+3`: `LastCommitInfo` is changed to include the altered validator set. + * `consensus_param_updates` returned for block `H` apply to the consensus + params for block `H+1`. For more information on the consensus parameters, + see the [application spec entry on consensus parameters](../spec/abci/apps.md#consensus-parameters). + +### Commit + +* **Request**: + + | Name | Type | Description | Field Number | + |--------|-------|------------------------------------|--------------| + + Commit signals the application to persist application state. It takes no parameters. +* **Response**: + + | Name | Type | Description | Field Number | + |---------------|-------|------------------------------------------------------------------------|--------------| + | data | bytes | The Merkle root hash of the application state. | 2 | + | retain_height | int64 | Blocks below this height may be removed. Defaults to `0` (retain all). | 3 | + +* **Usage**: + * Signal the application to persist the application state. + * Return an (optional) Merkle root hash of the application state + * `ResponseCommit.Data` is included as the `Header.AppHash` in the next block + * it may be empty + * Later calls to `Query` can return proofs about the application state anchored + in this Merkle root hash + * Note developers can return whatever they want here (could be nothing, or a + constant string, etc.), so long as it is deterministic - it must not be a + function of anything that did not come from the + BeginBlock/DeliverTx/EndBlock methods. + * Use `RetainHeight` with caution! If all nodes in the network remove historical + blocks then this data is permanently lost, and no new nodes will be able to + join the network and bootstrap. Historical blocks may also be required for + other purposes, e.g. auditing, replay of non-persisted heights, light client + verification, and so on. + +### ListSnapshots + +* **Request**: + + | Name | Type | Description | Field Number | + |--------|-------|------------------------------------|--------------| + + Empty request asking the application for a list of snapshots. + +* **Response**: + + | Name | Type | Description | Field Number | + |-----------|--------------------------------|--------------------------------|--------------| + | snapshots | repeated [Snapshot](#snapshot) | List of local state snapshots. | 1 | + +* **Usage**: + * Used during state sync to discover available snapshots on peers. + * See `Snapshot` data type for details. + +### LoadSnapshotChunk + +* **Request**: + + | Name | Type | Description | Field Number | + |--------|--------|-----------------------------------------------------------------------|--------------| + | height | uint64 | The height of the snapshot the chunks belongs to. | 1 | + | format | uint32 | The application-specific format of the snapshot the chunk belongs to. | 2 | + | chunk | uint32 | The chunk index, starting from `0` for the initial chunk. | 3 | + +* **Response**: + + | Name | Type | Description | Field Number | + |-------|-------|-------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | chunk | bytes | The binary chunk contents, in an arbitray format. Chunk messages cannot be larger than 16 MB _including metadata_, so 10 MB is a good starting point. | 1 | + +* **Usage**: + * Used during state sync to retrieve snapshot chunks from peers. + +### OfferSnapshot + +* **Request**: + + | Name | Type | Description | Field Number | + |----------|-----------------------|--------------------------------------------------------------------------|--------------| + | snapshot | [Snapshot](#snapshot) | The snapshot offered for restoration. | 1 | + | app_hash | bytes | The light client-verified app hash for this height, from the blockchain. | 2 | + +* **Response**: + + | Name | Type | Description | Field Number | + |--------|-------------------|-----------------------------------|--------------| + | result | [Result](#result) | The result of the snapshot offer. | 1 | + +#### Result + +```proto + enum Result { + UNKNOWN = 0; // Unknown result, abort all snapshot restoration + ACCEPT = 1; // Snapshot is accepted, start applying chunks. + ABORT = 2; // Abort snapshot restoration, and don't try any other snapshots. + REJECT = 3; // Reject this specific snapshot, try others. + REJECT_FORMAT = 4; // Reject all snapshots with this `format`, try others. + REJECT_SENDER = 5; // Reject all snapshots from all senders of this snapshot, try others. + } +``` + +* **Usage**: + * `OfferSnapshot` is called when bootstrapping a node using state sync. The application may + accept or reject snapshots as appropriate. Upon accepting, Tendermint will retrieve and + apply snapshot chunks via `ApplySnapshotChunk`. The application may also choose to reject a + snapshot in the chunk response, in which case it should be prepared to accept further + `OfferSnapshot` calls. + * Only `AppHash` can be trusted, as it has been verified by the light client. Any other data + can be spoofed by adversaries, so applications should employ additional verification schemes + to avoid denial-of-service attacks. The verified `AppHash` is automatically checked against + the restored application at the end of snapshot restoration. + * For more information, see the `Snapshot` data type or the [state sync section](../spec/p2p/messages/state-sync.md). + +### ApplySnapshotChunk + +* **Request**: + + | Name | Type | Description | Field Number | + |--------|--------|-----------------------------------------------------------------------------|--------------| + | index | uint32 | The chunk index, starting from `0`. Tendermint applies chunks sequentially. | 1 | + | chunk | bytes | The binary chunk contents, as returned by `LoadSnapshotChunk`. | 2 | + | sender | string | The P2P ID of the node who sent this chunk. | 3 | + +* **Response**: + + | Name | Type | Description | Field Number | + |----------------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | result | Result (see below) | The result of applying this chunk. | 1 | + | refetch_chunks | repeated uint32 | Refetch and reapply the given chunks, regardless of `result`. Only the listed chunks will be refetched, and reapplied in sequential order. | 2 | + | reject_senders | repeated string | Reject the given P2P senders, regardless of `Result`. Any chunks already applied will not be refetched unless explicitly requested, but queued chunks from these senders will be discarded, and new chunks or other snapshots rejected. | 3 | + +```proto + enum Result { + UNKNOWN = 0; // Unknown result, abort all snapshot restoration + ACCEPT = 1; // The chunk was accepted. + ABORT = 2; // Abort snapshot restoration, and don't try any other snapshots. + RETRY = 3; // Reapply this chunk, combine with `RefetchChunks` and `RejectSenders` as appropriate. + RETRY_SNAPSHOT = 4; // Restart this snapshot from `OfferSnapshot`, reusing chunks unless instructed otherwise. + REJECT_SNAPSHOT = 5; // Reject this snapshot, try a different one. + } +``` + +* **Usage**: + * The application can choose to refetch chunks and/or ban P2P peers as appropriate. Tendermint + will not do this unless instructed by the application. + * The application may want to verify each chunk, e.g. by attaching chunk hashes in + `Snapshot.Metadata` and/or incrementally verifying contents against `AppHash`. + * When all chunks have been accepted, Tendermint will make an ABCI `Info` call to verify that + `LastBlockAppHash` and `LastBlockHeight` matches the expected values, and record the + `AppVersion` in the node state. It then switches to fast sync or consensus and joins the + network. + * If Tendermint is unable to retrieve the next chunk after some time (e.g. because no suitable + peers are available), it will reject the snapshot and try a different one via `OfferSnapshot`. + The application should be prepared to reset and accept it or abort as appropriate. + +## Data Types + +Most of the data structures used in ABCI are shared [common data structures](../spec/core/data_structures.md). In certain cases, ABCI uses different data structures which are documented here: + +### Validator + +* **Fields**: + + | Name | Type | Description | Field Number | + |---------|-------|---------------------------------------------------------------------|--------------| + | address | bytes | [Address](../core/data_structures.md#address) of validator | 1 | + | power | int64 | Voting power of the validator | 3 | + +* **Usage**: + * Validator identified by address + * Used in RequestBeginBlock as part of VoteInfo + * Does not include PubKey to avoid sending potentially large quantum pubkeys + over the ABCI + +### ValidatorUpdate + +* **Fields**: + + | Name | Type | Description | Field Number | + |---------|--------------------------------------------------|-------------------------------|--------------| + | pub_key | [Public Key](../core/data_structures.md#pub_key) | Public key of the validator | 1 | + | power | int64 | Voting power of the validator | 2 | + +* **Usage**: + * Validator identified by PubKey + * Used to tell Tendermint to update the validator set + +### VoteInfo + +* **Fields**: + + | Name | Type | Description | Field Number | + |-------------------|-------------------------|--------------------------------------------------------------|--------------| + | validator | [Validator](#validator) | A validator | 1 | + | signed_last_block | bool | Indicates whether or not the validator signed the last block | 2 | + +* **Usage**: + * Indicates whether a validator signed the last block, allowing for rewards + based on validator availability + +### Evidence + +* **Fields**: + + | Name | Type | Description | Field Number | + |--------------------|--------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------|--------------| + | type | [EvidenceType](#evidencetype) | Type of the evidence. An enum of possible evidence's. | 1 | + | validator | [Validator](#validator) | The offending validator | 2 | + | height | int64 | Height when the offense occurred | 3 | + | time | [google.protobuf.Timestamp](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Timestamp) | Time of the block that was committed at the height that the offense occurred | 4 | + | total_voting_power | int64 | Total voting power of the validator set at height `Height` | 5 | + +#### EvidenceType + +* **Fields** + + EvidenceType is an enum with the listed fields: + + | Name | Field Number | + |---------------------|--------------| + | UNKNOWN | 0 | + | DUPLICATE_VOTE | 1 | + | LIGHT_CLIENT_ATTACK | 2 | + +### LastCommitInfo + +* **Fields**: + + | Name | Type | Description | Field Number | + |-------|--------------------------------|-----------------------------------------------------------------------------------------------------------------------|--------------| + | round | int32 | Commit round. Reflects the total amount of rounds it took to come to consensus for the current block. | 1 | + | votes | repeated [VoteInfo](#voteinfo) | List of validators addresses in the last validator set with their voting power and whether or not they signed a vote. | 2 | + +### ConsensusParams + +* **Fields**: + + | Name | Type | Description | Field Number | + |-----------|---------------------------------------------------------------|------------------------------------------------------------------------------|--------------| + | block | [BlockParams](../core/data_structures.md#blockparams) | Parameters limiting the size of a block and time between consecutive blocks. | 1 | + | evidence | [EvidenceParams](../core/data_structures.md#evidenceparams) | Parameters limiting the validity of evidence of byzantine behavior. | 2 | + | validator | [ValidatorParams](../core/data_structures.md#validatorparams) | Parameters limiting the types of public keys validators can use. | 3 | + | version | [VersionsParams](../core/data_structures.md#versionparams) | The ABCI application version. | 4 | + +### ProofOps + +* **Fields**: + + | Name | Type | Description | Field Number | + |------|------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | ops | repeated [ProofOp](#proofop) | List of chained Merkle proofs, of possibly different types. The Merkle root of one op is the value being proven in the next op. The Merkle root of the final op should equal the ultimate root hash being verified against.. | 1 | + +### ProofOp + +* **Fields**: + + | Name | Type | Description | Field Number | + |------|--------|------------------------------------------------|--------------| + | type | string | Type of Merkle proof and how it's encoded. | 1 | + | key | bytes | Key in the Merkle tree that this proof is for. | 2 | + | data | bytes | Encoded Merkle proof for the key. | 3 | + +### Snapshot + +* **Fields**: + + | Name | Type | Description | Field Number | + |----------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| + | height | uint64 | The height at which the snapshot was taken (after commit). | 1 | + | format | uint32 | An application-specific snapshot format, allowing applications to version their snapshot data format and make backwards-incompatible changes. Tendermint does not interpret this. | 2 | + | chunks | uint32 | The number of chunks in the snapshot. Must be at least 1 (even if empty). | 3 | + | hash | bytes | TAn arbitrary snapshot hash. Must be equal only for identical snapshots across nodes. Tendermint does not interpret the hash, it only compares them. | 3 | + | metadata | bytes | Arbitrary application metadata, for example chunk hashes or other verification data. | 3 | + +* **Usage**: + * Used for state sync snapshots, see the [state sync section](../spec/p2p/messages/state-sync.md) for details. + * A snapshot is considered identical across nodes only if _all_ fields are equal (including + `Metadata`). Chunks may be retrieved from all nodes that have the same snapshot. + * When sent across the network, a snapshot message can be at most 4 MB. diff --git a/spec/abci/apps.md b/spec/abci/apps.md new file mode 100644 index 000000000..d1b85a740 --- /dev/null +++ b/spec/abci/apps.md @@ -0,0 +1,671 @@ +--- +order: 2 +title: Applications +--- + +# Applications + +Please ensure you've first read the spec for [ABCI Methods and Types](abci.md) + +Here we cover the following components of ABCI applications: + +- [Connection State](#connection-state) - the interplay between ABCI connections and application state + and the differences between `CheckTx` and `DeliverTx`. +- [Transaction Results](#transaction-results) - rules around transaction + results and validity +- [Validator Set Updates](#validator-updates) - how validator sets are + changed during `InitChain` and `EndBlock` +- [Query](#query) - standards for using the `Query` method and proofs about the + application state +- [Crash Recovery](#crash-recovery) - handshake protocol to synchronize + Tendermint and the application on startup. +- [State Sync](#state-sync) - rapid bootstrapping of new nodes by restoring state machine snapshots + +## Connection State + +Since Tendermint maintains four concurrent ABCI connections, it is typical +for an application to maintain a distinct state for each, and for the states to +be synchronized during `Commit`. + +### Concurrency + +In principle, each of the four ABCI connections operate concurrently with one +another. This means applications need to ensure access to state is +thread safe. In practice, both the +[default in-process ABCI client](https://github.com/tendermint/tendermint/blob/v0.34.4/abci/client/local_client.go#L18) +and the +[default Go ABCI +server](https://github.com/tendermint/tendermint/blob/v0.34.4/abci/server/socket_server.go#L32) +use global locks across all connections, so they are not +concurrent at all. This means if your app is written in Go, and compiled in-process with Tendermint +using the default `NewLocalClient`, or run out-of-process using the default `SocketServer`, +ABCI messages from all connections will be linearizable (received one at a +time). + +The existence of this global mutex means Go application developers can get +thread safety for application state by routing *all* reads and writes through the ABCI +system. Thus it may be *unsafe* to expose application state directly to an RPC +interface, and unless explicit measures are taken, all queries should be routed through the ABCI Query method. + +### BeginBlock + +The BeginBlock request can be used to run some code at the beginning of +every block. It also allows Tendermint to send the current block hash +and header to the application, before it sends any of the transactions. + +The app should remember the latest height and header (ie. from which it +has run a successful Commit) so that it can tell Tendermint where to +pick up from when it restarts. See information on the Handshake, below. + +### Commit + +Application state should only be persisted to disk during `Commit`. + +Before `Commit` is called, Tendermint locks and flushes the mempool so that no new messages will +be received on the mempool connection. This provides an opportunity to safely update all four connection +states to the latest committed state at once. + +When `Commit` completes, it unlocks the mempool. + +WARNING: if the ABCI app logic processing the `Commit` message sends a +`/broadcast_tx_sync` or `/broadcast_tx_commit` and waits for the response +before proceeding, it will deadlock. Executing those `broadcast_tx` calls +involves acquiring a lock that is held during the `Commit` call, so it's not +possible. If you make the call to the `broadcast_tx` endpoints concurrently, +that's no problem, it just can't be part of the sequential logic of the +`Commit` function. + +### Consensus Connection + +The Consensus Connection should maintain a `DeliverTxState` - the working state +for block execution. It should be updated by the calls to `BeginBlock`, `DeliverTx`, +and `EndBlock` during block execution and committed to disk as the "latest +committed state" during `Commit`. + +Updates made to the `DeliverTxState` by each method call must be readable by each subsequent method - +ie. the updates are linearizable. + +### Mempool Connection + +The mempool Connection should maintain a `CheckTxState` +to sequentially process pending transactions in the mempool that have +not yet been committed. It should be initialized to the latest committed state +at the end of every `Commit`. + +Before calling `Commit`, Tendermint will lock and flush the mempool connection, +ensuring that all existing CheckTx are responded to and no new ones can begin. +The `CheckTxState` may be updated concurrently with the `DeliverTxState`, as +messages may be sent concurrently on the Consensus and Mempool connections. + +After `Commit`, while still holding the mempool lock, CheckTx is run again on all transactions that remain in the +node's local mempool after filtering those included in the block. +An additional `Type` parameter is made available to the CheckTx function that +indicates whether an incoming transaction is new (`CheckTxType_New`), or a +recheck (`CheckTxType_Recheck`). + +Finally, after re-checking transactions in the mempool, Tendermint will unlock +the mempool connection. New transactions are once again able to be processed through CheckTx. + +Note that CheckTx is just a weak filter to keep invalid transactions out of the block chain. +CheckTx doesn't have to check everything that affects transaction validity; the +expensive things can be skipped. It's weak because a Byzantine node doesn't +care about CheckTx; it can propose a block full of invalid transactions if it wants. + +#### Replay Protection + +To prevent old transactions from being replayed, CheckTx must implement +replay protection. + +It is possible for old transactions to be sent to the application. So +it is important CheckTx implements some logic to handle them. + +### Query Connection + +The Info Connection should maintain a `QueryState` for answering queries from the user, +and for initialization when Tendermint first starts up (both described further +below). +It should always contain the latest committed state associated with the +latest committed block. + +`QueryState` should be set to the latest `DeliverTxState` at the end of every `Commit`, +after the full block has been processed and the state committed to disk. +Otherwise it should never be modified. + +Tendermint Core currently uses the Query connection to filter peers upon +connecting, according to IP address or node ID. For instance, +returning non-OK ABCI response to either of the following queries will +cause Tendermint to not connect to the corresponding peer: + +- `p2p/filter/addr/`, where `` is an IP address. +- `p2p/filter/id/`, where `` is the hex-encoded node ID (the hash of + the node's p2p pubkey). + +Note: these query formats are subject to change! + +### Snapshot Connection + +The Snapshot Connection is optional, and is only used to serve state sync snapshots for other nodes +and/or restore state sync snapshots to a local node being bootstrapped. + +For more information, see [the state sync section of this document](#state-sync). + +## Transaction Results + +The `Info` and `Log` fields are non-deterministic values for debugging/convenience purposes +that are otherwise ignored. + +The `Data` field must be strictly deterministic, but can be arbitrary data. + +### Gas + +Ethereum introduced the notion of `gas` as an abstract representation of the +cost of resources used by nodes when processing transactions. Every operation in the +Ethereum Virtual Machine uses some amount of gas, and gas can be accepted at a market-variable price. +Users propose a maximum amount of gas for their transaction; if the tx uses less, they get +the difference credited back. Tendermint adopts a similar abstraction, +though uses it only optionally and weakly, allowing applications to define +their own sense of the cost of execution. + +In Tendermint, the [ConsensusParams.Block.MaxGas](../proto/types/params.proto) limits the amount of `gas` that can be used in a block. +The default value is `-1`, meaning no limit, or that the concept of gas is +meaningless. + +Responses contain a `GasWanted` and `GasUsed` field. The former is the maximum +amount of gas the sender of a tx is willing to use, and the later is how much it actually +used. Applications should enforce that `GasUsed <= GasWanted` - ie. tx execution +should halt before it can use more resources than it requested. + +When `MaxGas > -1`, Tendermint enforces the following rules: + +- `GasWanted <= MaxGas` for all txs in the mempool +- `(sum of GasWanted in a block) <= MaxGas` when proposing a block + +If `MaxGas == -1`, no rules about gas are enforced. + +Note that Tendermint does not currently enforce anything about Gas in the consensus, only the mempool. +This means it does not guarantee that committed blocks satisfy these rules! +It is the application's responsibility to return non-zero response codes when gas limits are exceeded. + +The `GasUsed` field is ignored completely by Tendermint. That said, applications should enforce: + +- `GasUsed <= GasWanted` for any given transaction +- `(sum of GasUsed in a block) <= MaxGas` for every block + +In the future, we intend to add a `Priority` field to the responses that can be +used to explicitly prioritize txs in the mempool for inclusion in a block +proposal. See [#1861](https://github.com/tendermint/tendermint/issues/1861). + +### CheckTx + +If `Code != 0`, it will be rejected from the mempool and hence +not broadcasted to other peers and not included in a proposal block. + +`Data` contains the result of the CheckTx transaction execution, if any. It is +semantically meaningless to Tendermint. + +`Events` include any events for the execution, though since the transaction has not +been committed yet, they are effectively ignored by Tendermint. + +### DeliverTx + +DeliverTx is the workhorse of the blockchain. Tendermint sends the +DeliverTx requests asynchronously but in order, and relies on the +underlying socket protocol (ie. TCP) to ensure they are received by the +app in order. They have already been ordered in the global consensus by +the Tendermint protocol. + +If DeliverTx returns `Code != 0`, the transaction will be considered invalid, +though it is still included in the block. + +DeliverTx also returns a [Code, Data, and Log](../../proto/abci/types.proto#L189-L191). + +`Data` contains the result of the CheckTx transaction execution, if any. It is +semantically meaningless to Tendermint. + +Both the `Code` and `Data` are included in a structure that is hashed into the +`LastResultsHash` of the next block header. + +`Events` include any events for the execution, which Tendermint will use to index +the transaction by. This allows transactions to be queried according to what +events took place during their execution. + +## Updating the Validator Set + +The application may set the validator set during InitChain, and may update it during +EndBlock. + +Note that the maximum total power of the validator set is bounded by +`MaxTotalVotingPower = MaxInt64 / 8`. Applications are responsible for ensuring +they do not make changes to the validator set that cause it to exceed this +limit. + +Additionally, applications must ensure that a single set of updates does not contain any duplicates - +a given public key can only appear once within a given update. If an update includes +duplicates, the block execution will fail irrecoverably. + +### InitChain + +The `InitChain` method can return a list of validators. +If the list is empty, Tendermint will use the validators loaded in the genesis +file. +If the list returned by `InitChain` is not empty, Tendermint will use its contents as the validator set. +This way the application can set the initial validator set for the +blockchain. + +### EndBlock + +Updates to the Tendermint validator set can be made by returning +`ValidatorUpdate` objects in the `ResponseEndBlock`: + +```protobuf +message ValidatorUpdate { + tendermint.crypto.keys.PublicKey pub_key + int64 power +} + +message PublicKey { + oneof { + ed25519 bytes = 1; + } +``` + +The `pub_key` currently supports only one type: + +- `type = "ed25519"` + +The `power` is the new voting power for the validator, with the +following rules: + +- power must be non-negative +- if power is 0, the validator must already exist, and will be removed from the + validator set +- if power is non-0: + - if the validator does not already exist, it will be added to the validator + set with the given power + - if the validator does already exist, its power will be adjusted to the given power +- the total power of the new validator set must not exceed MaxTotalVotingPower + +Note the updates returned in block `H` will only take effect at block `H+2`. + +## Consensus Parameters + +ConsensusParams enforce certain limits in the blockchain, like the maximum size +of blocks, amount of gas used in a block, and the maximum acceptable age of +evidence. They can be set in InitChain and updated in EndBlock. + +### BlockParams.MaxBytes + +The maximum size of a complete Protobuf encoded block. +This is enforced by Tendermint consensus. + +This implies a maximum transaction size that is this MaxBytes, less the expected size of +the header, the validator set, and any included evidence in the block. + +Must have `0 < MaxBytes < 100 MB`. + +### BlockParams.MaxGas + +The maximum of the sum of `GasWanted` that will be allowed in a proposed block. +This is *not* enforced by Tendermint consensus. +It is left to the app to enforce (ie. if txs are included past the +limit, they should return non-zero codes). It is used by Tendermint to limit the +txs included in a proposed block. + +Must have `MaxGas >= -1`. +If `MaxGas == -1`, no limit is enforced. + +### EvidenceParams.MaxAgeDuration + +This is the maximum age of evidence in time units. +This is enforced by Tendermint consensus. + +If a block includes evidence older than this (AND the evidence was created more +than `MaxAgeNumBlocks` ago), the block will be rejected (validators won't vote +for it). + +Must have `MaxAgeDuration > 0`. + +### EvidenceParams.MaxAgeNumBlocks + +This is the maximum age of evidence in blocks. +This is enforced by Tendermint consensus. + +If a block includes evidence older than this (AND the evidence was created more +than `MaxAgeDuration` ago), the block will be rejected (validators won't vote +for it). + +Must have `MaxAgeNumBlocks > 0`. + +### EvidenceParams.MaxNum + +This is the maximum number of evidence that can be committed to a single block. + +The product of this and the `MaxEvidenceBytes` must not exceed the size of +a block minus it's overhead ( ~ `MaxBytes`). + +Must have `MaxNum > 0`. + +### Updates + +The application may set the ConsensusParams during InitChain, and update them during +EndBlock. If the ConsensusParams is empty, it will be ignored. Each field +that is not empty will be applied in full. For instance, if updating the +Block.MaxBytes, applications must also set the other Block fields (like +Block.MaxGas), even if they are unchanged, as they will otherwise cause the +value to be updated to 0. + +#### InitChain + +ResponseInitChain includes a ConsensusParams. +If ConsensusParams is nil, Tendermint will use the params loaded in the genesis +file. If ConsensusParams is not nil, Tendermint will use it. +This way the application can determine the initial consensus params for the +blockchain. + +#### EndBlock + +ResponseEndBlock includes a ConsensusParams. +If ConsensusParams nil, Tendermint will do nothing. +If ConsensusParam is not nil, Tendermint will use it. +This way the application can update the consensus params over time. + +Note the updates returned in block `H` will take effect right away for block +`H+1`. + +## Query + +Query is a generic method with lots of flexibility to enable diverse sets +of queries on application state. Tendermint makes use of Query to filter new peers +based on ID and IP, and exposes Query to the user over RPC. + +Note that calls to Query are not replicated across nodes, but rather query the +local node's state - hence they may return stale reads. For reads that require +consensus, use a transaction. + +The most important use of Query is to return Merkle proofs of the application state at some height +that can be used for efficient application-specific light-clients. + +Note Tendermint has technically no requirements from the Query +message for normal operation - that is, the ABCI app developer need not implement +Query functionality if they do not wish too. + +### Query Proofs + +The Tendermint block header includes a number of hashes, each providing an +anchor for some type of proof about the blockchain. The `ValidatorsHash` enables +quick verification of the validator set, the `DataHash` gives quick +verification of the transactions included in the block, etc. + +The `AppHash` is unique in that it is application specific, and allows for +application-specific Merkle proofs about the state of the application. +While some applications keep all relevant state in the transactions themselves +(like Bitcoin and its UTXOs), others maintain a separated state that is +computed deterministically *from* transactions, but is not contained directly in +the transactions themselves (like Ethereum contracts and accounts). +For such applications, the `AppHash` provides a much more efficient way to verify light-client proofs. + +ABCI applications can take advantage of more efficient light-client proofs for +their state as follows: + +- return the Merkle root of the deterministic application state in +`ResponseCommit.Data`. This Merkle root will be included as the `AppHash` in the next block. +- return efficient Merkle proofs about that application state in `ResponseQuery.Proof` + that can be verified using the `AppHash` of the corresponding block. + +For instance, this allows an application's light-client to verify proofs of +absence in the application state, something which is much less efficient to do using the block hash. + +Some applications (eg. Ethereum, Cosmos-SDK) have multiple "levels" of Merkle trees, +where the leaves of one tree are the root hashes of others. To support this, and +the general variability in Merkle proofs, the `ResponseQuery.Proof` has some minimal structure: + +```protobuf +message ProofOps { + repeated ProofOp ops +} + +message ProofOp { + string type = 1; + bytes key = 2; + bytes data = 3; +} +``` + +Each `ProofOp` contains a proof for a single key in a single Merkle tree, of the specified `type`. +This allows ABCI to support many different kinds of Merkle trees, encoding +formats, and proofs (eg. of presence and absence) just by varying the `type`. +The `data` contains the actual encoded proof, encoded according to the `type`. +When verifying the full proof, the root hash for one ProofOp is the value being +verified for the next ProofOp in the list. The root hash of the final ProofOp in +the list should match the `AppHash` being verified against. + +### Peer Filtering + +When Tendermint connects to a peer, it sends two queries to the ABCI application +using the following paths, with no additional data: + +- `/p2p/filter/addr/`, where `` denote the IP address and + the port of the connection +- `p2p/filter/id/`, where `` is the peer node ID (ie. the + pubkey.Address() for the peer's PubKey) + +If either of these queries return a non-zero ABCI code, Tendermint will refuse +to connect to the peer. + +### Paths + +Queries are directed at paths, and may optionally include additional data. + +The expectation is for there to be some number of high level paths +differentiating concerns, like `/p2p`, `/store`, and `/app`. Currently, +Tendermint only uses `/p2p`, for filtering peers. For more advanced use, see the +implementation of +[Query in the Cosmos-SDK](https://github.com/cosmos/cosmos-sdk/blob/v0.23.1/baseapp/baseapp.go#L333). + +## Crash Recovery + +On startup, Tendermint calls the `Info` method on the Info Connection to get the latest +committed state of the app. The app MUST return information consistent with the +last block it succesfully completed Commit for. + +If the app succesfully committed block H, then `last_block_height = H` and `last_block_app_hash = `. If the app +failed during the Commit of block H, then `last_block_height = H-1` and +`last_block_app_hash = `. + +We now distinguish three heights, and describe how Tendermint syncs itself with +the app. + +```md +storeBlockHeight = height of the last block Tendermint saw a commit for +stateBlockHeight = height of the last block for which Tendermint completed all + block processing and saved all ABCI results to disk +appBlockHeight = height of the last block for which ABCI app succesfully + completed Commit + +``` + +Note we always have `storeBlockHeight >= stateBlockHeight` and `storeBlockHeight >= appBlockHeight` +Note also Tendermint never calls Commit on an ABCI app twice for the same height. + +The procedure is as follows. + +First, some simple start conditions: + +If `appBlockHeight == 0`, then call InitChain. + +If `storeBlockHeight == 0`, we're done. + +Now, some sanity checks: + +If `storeBlockHeight < appBlockHeight`, error +If `storeBlockHeight < stateBlockHeight`, panic +If `storeBlockHeight > stateBlockHeight+1`, panic + +Now, the meat: + +If `storeBlockHeight == stateBlockHeight && appBlockHeight < storeBlockHeight`, +replay all blocks in full from `appBlockHeight` to `storeBlockHeight`. +This happens if we completed processing the block, but the app forgot its height. + +If `storeBlockHeight == stateBlockHeight && appBlockHeight == storeBlockHeight`, we're done. +This happens if we crashed at an opportune spot. + +If `storeBlockHeight == stateBlockHeight+1` +This happens if we started processing the block but didn't finish. + +If `appBlockHeight < stateBlockHeight` + replay all blocks in full from `appBlockHeight` to `storeBlockHeight-1`, + and replay the block at `storeBlockHeight` using the WAL. +This happens if the app forgot the last block it committed. + +If `appBlockHeight == stateBlockHeight`, + replay the last block (storeBlockHeight) in full. +This happens if we crashed before the app finished Commit + +If `appBlockHeight == storeBlockHeight` + update the state using the saved ABCI responses but dont run the block against the real app. +This happens if we crashed after the app finished Commit but before Tendermint saved the state. + +## State Sync + +A new node joining the network can simply join consensus at the genesis height and replay all +historical blocks until it is caught up. However, for large chains this can take a significant +amount of time, often on the order of days or weeks. + +State sync is an alternative mechanism for bootstrapping a new node, where it fetches a snapshot +of the state machine at a given height and restores it. Depending on the application, this can +be several orders of magnitude faster than replaying blocks. + +Note that state sync does not currently backfill historical blocks, so the node will have a +truncated block history - users are advised to consider the broader network implications of this in +terms of block availability and auditability. This functionality may be added in the future. + +For details on the specific ABCI calls and types, see the [methods and types section](abci.md). + +### Taking Snapshots + +Applications that want to support state syncing must take state snapshots at regular intervals. How +this is accomplished is entirely up to the application. A snapshot consists of some metadata and +a set of binary chunks in an arbitrary format: + +- `Height (uint64)`: The height at which the snapshot is taken. It must be taken after the given + height has been committed, and must not contain data from any later heights. + +- `Format (uint32)`: An arbitrary snapshot format identifier. This can be used to version snapshot + formats, e.g. to switch from Protobuf to MessagePack for serialization. The application can use + this when restoring to choose whether to accept or reject a snapshot. + +- `Chunks (uint32)`: The number of chunks in the snapshot. Each chunk contains arbitrary binary + data, and should be less than 16 MB; 10 MB is a good starting point. + +- `Hash ([]byte)`: An arbitrary hash of the snapshot. This is used to check whether a snapshot is + the same across nodes when downloading chunks. + +- `Metadata ([]byte)`: Arbitrary snapshot metadata, e.g. chunk hashes for verification or any other + necessary info. + +For a snapshot to be considered the same across nodes, all of these fields must be identical. When +sent across the network, snapshot metadata messages are limited to 4 MB. + +When a new node is running state sync and discovering snapshots, Tendermint will query an existing +application via the ABCI `ListSnapshots` method to discover available snapshots, and load binary +snapshot chunks via `LoadSnapshotChunk`. The application is free to choose how to implement this +and which formats to use, but must provide the following guarantees: + +- **Consistent:** A snapshot must be taken at a single isolated height, unaffected by + concurrent writes. This can be accomplished by using a data store that supports ACID + transactions with snapshot isolation. + +- **Asynchronous:** Taking a snapshot can be time-consuming, so it must not halt chain progress, + for example by running in a separate thread. + +- **Deterministic:** A snapshot taken at the same height in the same format must be identical + (at the byte level) across nodes, including all metadata. This ensures good availability of + chunks, and that they fit together across nodes. + +A very basic approach might be to use a datastore with MVCC transactions (such as RocksDB), +start a transaction immediately after block commit, and spawn a new thread which is passed the +transaction handle. This thread can then export all data items, serialize them using e.g. +Protobuf, hash the byte stream, split it into chunks, and store the chunks in the file system +along with some metadata - all while the blockchain is applying new blocks in parallel. + +A more advanced approach might include incremental verification of individual chunks against the +chain app hash, parallel or batched exports, compression, and so on. + +Old snapshots should be removed after some time - generally only the last two snapshots are needed +(to prevent the last one from being removed while a node is restoring it). + +### Bootstrapping a Node + +An empty node can be state synced by setting the configuration option `statesync.enabled = +true`. The node also needs the chain genesis file for basic chain info, and configuration for +light client verification of the restored snapshot: a set of Tendermint RPC servers, and a +trusted header hash and corresponding height from a trusted source, via the `statesync` +configuration section. + +Once started, the node will connect to the P2P network and begin discovering snapshots. These +will be offered to the local application via the `OfferSnapshot` ABCI method. Once a snapshot +is accepted Tendermint will fetch and apply the snapshot chunks. After all chunks have been +successfully applied, Tendermint verifies the app's `AppHash` against the chain using the light +client, then switches the node to normal consensus operation. + +#### Snapshot Discovery + +When the empty node join the P2P network, it asks all peers to report snapshots via the +`ListSnapshots` ABCI call (limited to 10 per node). After some time, the node picks the most +suitable snapshot (generally prioritized by height, format, and number of peers), and offers it +to the application via `OfferSnapshot`. The application can choose a number of responses, +including accepting or rejecting it, rejecting the offered format, rejecting the peer who sent +it, and so on. Tendermint will keep discovering and offering snapshots until one is accepted or +the application aborts. + +#### Snapshot Restoration + +Once a snapshot has been accepted via `OfferSnapshot`, Tendermint begins downloading chunks from +any peers that have the same snapshot (i.e. that have identical metadata fields). Chunks are +spooled in a temporary directory, and then given to the application in sequential order via +`ApplySnapshotChunk` until all chunks have been accepted. + +The method for restoring snapshot chunks is entirely up to the application. + +During restoration, the application can respond to `ApplySnapshotChunk` with instructions for how +to continue. This will typically be to accept the chunk and await the next one, but it can also +ask for chunks to be refetched (either the current one or any number of previous ones), P2P peers +to be banned, snapshots to be rejected or retried, and a number of other responses - see the ABCI +reference for details. + +If Tendermint fails to fetch a chunk after some time, it will reject the snapshot and try a +different one via `OfferSnapshot` - the application can choose whether it wants to support +restarting restoration, or simply abort with an error. + +#### Snapshot Verification + +Once all chunks have been accepted, Tendermint issues an `Info` ABCI call to retrieve the +`LastBlockAppHash`. This is compared with the trusted app hash from the chain, retrieved and +verified using the light client. Tendermint also checks that `LastBlockHeight` corresponds to the +height of the snapshot. + +This verification ensures that an application is valid before joining the network. However, the +snapshot restoration may take a long time to complete, so applications may want to employ additional +verification during the restore to detect failures early. This might e.g. include incremental +verification of each chunk against the app hash (using bundled Merkle proofs), checksums to +protect against data corruption by the disk or network, and so on. However, it is important to +note that the only trusted information available is the app hash, and all other snapshot metadata +can be spoofed by adversaries. + +Apps may also want to consider state sync denial-of-service vectors, where adversaries provide +invalid or harmful snapshots to prevent nodes from joining the network. The application can +counteract this by asking Tendermint to ban peers. As a last resort, node operators can use +P2P configuration options to whitelist a set of trusted peers that can provide valid snapshots. + +#### Transition to Consensus + +Once the snapshots have all been restored, Tendermint gathers additional information necessary for +bootstrapping the node (e.g. chain ID, consensus parameters, validator sets, and block headers) +from the genesis file and light client RPC servers. It also fetches and records the `AppVersion` +from the ABCI application. + +Once the state machine has been restored and Tendermint has gathered this additional +information, it transitions to block sync (if enabled) to fetch any remaining blocks up the chain +head, and then transitions to regular consensus operation. At this point the node operates like +any other node, apart from having a truncated block history at the height of the restored snapshot. diff --git a/spec/abci/client-server.md b/spec/abci/client-server.md new file mode 100644 index 000000000..1ffbe2e43 --- /dev/null +++ b/spec/abci/client-server.md @@ -0,0 +1,116 @@ +--- +order: 3 +title: Client and Server +--- + +# Client and Server + +This section is for those looking to implement their own ABCI Server, perhaps in +a new programming language. + +You are expected to have read [ABCI Methods and Types](./abci.md) and [ABCI +Applications](./apps.md). + +## Message Protocol + +The message protocol consists of pairs of requests and responses defined in the +[protobuf file](https://github.com/tendermint/tendermint/blob/master/proto/tendermint/abci/types.proto). + +Some messages have no fields, while others may include byte-arrays, strings, integers, +or custom protobuf types. + +For more details on protobuf, see the [documentation](https://developers.google.com/protocol-buffers/docs/overview). + +For each request, a server should respond with the corresponding +response, where the order of requests is preserved in the order of +responses. + +## Server Implementations + +To use ABCI in your programming language of choice, there must be a ABCI +server in that language. Tendermint supports three implementations of the ABCI, written in Go: + +- In-process ([Golang](https://github.com/tendermint/tendermint/tree/master/abci), [Rust](https://github.com/tendermint/rust-abci)) +- ABCI-socket +- GRPC + +The latter two can be tested using the `abci-cli` by setting the `--abci` flag +appropriately (ie. to `socket` or `grpc`). + +See examples, in various stages of maintenance, in +[Go](https://github.com/tendermint/tendermint/tree/master/abci/server), +[JavaScript](https://github.com/tendermint/js-abci), +[C++](https://github.com/mdyring/cpp-tmsp), and +[Java](https://github.com/jTendermint/jabci). + +### In Process + +The simplest implementation uses function calls within Golang. +This means ABCI applications written in Golang can be compiled with Tendermint Core and run as a single binary. + +### GRPC + +If GRPC is available in your language, this is the easiest approach, +though it will have significant performance overhead. + +To get started with GRPC, copy in the [protobuf +file](https://github.com/tendermint/tendermint/blob/master/proto/tendermint/abci/types.proto) +and compile it using the GRPC plugin for your language. For instance, +for golang, the command is `protoc --go_out=plugins=grpc:. types.proto`. +See the [grpc documentation for more details](http://www.grpc.io/docs/). +`protoc` will autogenerate all the necessary code for ABCI client and +server in your language, including whatever interface your application +must satisfy to be used by the ABCI server for handling requests. + +Note the length-prefixing used in the socket implementation (TSP) does not apply for GRPC. + +### TSP + +Tendermint Socket Protocol is an asynchronous, raw socket server which provides +ordered message passing over unix or tcp. Messages are serialized using +Protobuf3 and length-prefixed with an [unsigned +varint](https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints) + +If GRPC is not available in your language, or you require higher performance, or +otherwise enjoy programming, you may implement your own ABCI server using the +Tendermint Socket Protocol. The first step is still to auto-generate the +relevant data types and codec in your language using `protoc`. In addition to +being proto3 encoded, messages coming over the socket are length-prefixed to +facilitate use as a streaming protocol. proto3 doesn't have an official +length-prefix standard, so we use our own. The first byte in the prefix +represents the length of the Big Endian encoded length. The remaining bytes in +the prefix are the Big Endian encoded length. + +For example, if the proto3 encoded ABCI message is 0xDEADBEEF (4 bytes), the +length-prefixed message is 0x0104DEADBEEF. If the proto3 encoded ABCI message is +65535 bytes long, the length-prefixed message would be like 0x02FFFF.... + +The benefit of using this `varint` encoding over the old version (where integers +were encoded as `` is that it is the common way to +encode integers in Protobuf. It is also generally shorter. + +As noted above, this prefixing does not apply for GRPC. + +An ABCI server must also be able to support multiple connections, as +Tendermint uses four connections. + +### Async vs Sync + +The main ABCI server (ie. non-GRPC) provides ordered asynchronous messages. +This is useful for DeliverTx and CheckTx, since it allows Tendermint to forward +transactions to the app before it's finished processing previous ones. + +Thus, DeliverTx and CheckTx messages are sent asynchronously, while all other +messages are sent synchronously. + +## Client + +There are currently two use-cases for an ABCI client. One is a testing +tool, as in the `abci-cli`, which allows ABCI requests to be sent via +command line. The other is a consensus engine, such as Tendermint Core, +which makes requests to the application every time a new transaction is +received or a block is committed. + +It is unlikely that you will need to implement a client. For details of +our client, see +[here](https://github.com/tendermint/tendermint/tree/master/abci/client). diff --git a/spec/blockchain/blockchain.md b/spec/blockchain/blockchain.md new file mode 100644 index 000000000..fcc080ee7 --- /dev/null +++ b/spec/blockchain/blockchain.md @@ -0,0 +1,3 @@ +# Blockchain + +Deprecated see [core/data_structures.md](../core/data_structures.md) diff --git a/spec/blockchain/encoding.md b/spec/blockchain/encoding.md new file mode 100644 index 000000000..aa2c9ab3f --- /dev/null +++ b/spec/blockchain/encoding.md @@ -0,0 +1,3 @@ +# Encoding + +Deprecated see [core/data_structures.md](../core/encoding.md) diff --git a/spec/blockchain/readme.md b/spec/blockchain/readme.md new file mode 100644 index 000000000..10ad46690 --- /dev/null +++ b/spec/blockchain/readme.md @@ -0,0 +1,14 @@ +--- +order: 1 +parent: + title: Blockchain + order: false +--- + +# Blockchain + +This section describes the core types and functionality of the Tendermint protocol implementation. + +[Core Data Structures](../core/data_structures.md) +[Encoding](../core/encoding.md) +[State](../core/state.md) diff --git a/spec/blockchain/state.md b/spec/blockchain/state.md new file mode 100644 index 000000000..f4f1d9525 --- /dev/null +++ b/spec/blockchain/state.md @@ -0,0 +1,3 @@ +# State + +Deprecated see [core/state.md](../core/state.md) diff --git a/spec/consensus/bft-time.md b/spec/consensus/bft-time.md new file mode 100644 index 000000000..cec3b91ab --- /dev/null +++ b/spec/consensus/bft-time.md @@ -0,0 +1,55 @@ +--- +order: 2 +--- +# BFT Time + +Tendermint provides a deterministic, Byzantine fault-tolerant, source of time. +Time in Tendermint is defined with the Time field of the block header. + +It satisfies the following properties: + +- Time Monotonicity: Time is monotonically increasing, i.e., given +a header H1 for height h1 and a header H2 for height `h2 = h1 + 1`, `H1.Time < H2.Time`. +- Time Validity: Given a set of Commit votes that forms the `block.LastCommit` field, a range of +valid values for the Time field of the block header is defined only by +Precommit messages (from the LastCommit field) sent by correct processes, i.e., +a faulty process cannot arbitrarily increase the Time value. + +In the context of Tendermint, time is of type int64 and denotes UNIX time in milliseconds, i.e., +corresponds to the number of milliseconds since January 1, 1970. Before defining rules that need to be enforced by the +Tendermint consensus protocol, so the properties above holds, we introduce the following definition: + +- median of a Commit is equal to the median of `Vote.Time` fields of the `Vote` messages, +where the value of `Vote.Time` is counted number of times proportional to the process voting power. As in Tendermint +the voting power is not uniform (one process one vote), a vote message is actually an aggregator of the same votes whose +number is equal to the voting power of the process that has casted the corresponding votes message. + +Let's consider the following example: + +- we have four processes p1, p2, p3 and p4, with the following voting power distribution (p1, 23), (p2, 27), (p3, 10) +and (p4, 10). The total voting power is 70 (`N = 3f+1`, where `N` is the total voting power, and `f` is the maximum voting +power of the faulty processes), so we assume that the faulty processes have at most 23 of voting power. +Furthermore, we have the following vote messages in some LastCommit field (we ignore all fields except Time field): + - (p1, 100), (p2, 98), (p3, 1000), (p4, 500). We assume that p3 and p4 are faulty processes. Let's assume that the + `block.LastCommit` message contains votes of processes p2, p3 and p4. Median is then chosen the following way: + the value 98 is counted 27 times, the value 1000 is counted 10 times and the value 500 is counted also 10 times. + So the median value will be the value 98. No matter what set of messages with at least `2f+1` voting power we + choose, the median value will always be between the values sent by correct processes. + +We ensure Time Monotonicity and Time Validity properties by the following rules: + +- let rs denotes `RoundState` (consensus internal state) of some process. Then +`rs.ProposalBlock.Header.Time == median(rs.LastCommit) && +rs.Proposal.Timestamp == rs.ProposalBlock.Header.Time`. + +- Furthermore, when creating the `vote` message, the following rules for determining `vote.Time` field should hold: + + - if `rs.LockedBlock` is defined then + `vote.Time = max(rs.LockedBlock.Timestamp + time.Millisecond, time.Now())`, where `time.Now()` + denotes local Unix time in milliseconds + + - else if `rs.Proposal` is defined then + `vote.Time = max(rs.Proposal.Timestamp + time.Millisecond,, time.Now())`, + + - otherwise, `vote.Time = time.Now())`. In this case vote is for `nil` so it is not taken into account for + the timestamp of the next block. diff --git a/spec/consensus/consensus-paper/IEEEtran.bst b/spec/consensus/consensus-paper/IEEEtran.bst new file mode 100644 index 000000000..53fbc030a --- /dev/null +++ b/spec/consensus/consensus-paper/IEEEtran.bst @@ -0,0 +1,2417 @@ +%% +%% IEEEtran.bst +%% BibTeX Bibliography Style file for IEEE Journals and Conferences (unsorted) +%% Version 1.12 (2007/01/11) +%% +%% Copyright (c) 2003-2007 Michael Shell +%% +%% Original starting code base and algorithms obtained from the output of +%% Patrick W. Daly's makebst package as well as from prior versions of +%% IEEE BibTeX styles: +%% +%% 1. Howard Trickey and Oren Patashnik's ieeetr.bst (1985/1988) +%% 2. Silvano Balemi and Richard H. Roy's IEEEbib.bst (1993) +%% +%% Support sites: +%% http://www.michaelshell.org/tex/ieeetran/ +%% http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/ +%% and/or +%% http://www.ieee.org/ +%% +%% For use with BibTeX version 0.99a or later +%% +%% This is a numerical citation style. +%% +%%************************************************************************* +%% Legal Notice: +%% This code is offered as-is without any warranty either expressed or +%% implied; without even the implied warranty of MERCHANTABILITY or +%% FITNESS FOR A PARTICULAR PURPOSE! +%% User assumes all risk. +%% In no event shall IEEE or any contributor to this code be liable for +%% any damages or losses, including, but not limited to, incidental, +%% consequential, or any other damages, resulting from the use or misuse +%% of any information contained here. +%% +%% All comments are the opinions of their respective authors and are not +%% necessarily endorsed by the IEEE. +%% +%% This work is distributed under the LaTeX Project Public License (LPPL) +%% ( http://www.latex-project.org/ ) version 1.3, and may be freely used, +%% distributed and modified. A copy of the LPPL, version 1.3, is included +%% in the base LaTeX documentation of all distributions of LaTeX released +%% 2003/12/01 or later. +%% Retain all contribution notices and credits. +%% ** Modified files should be clearly indicated as such, including ** +%% ** renaming them and changing author support contact information. ** +%% +%% File list of work: IEEEabrv.bib, IEEEfull.bib, IEEEexample.bib, +%% IEEEtran.bst, IEEEtranS.bst, IEEEtranSA.bst, +%% IEEEtranN.bst, IEEEtranSN.bst, IEEEtran_bst_HOWTO.pdf +%%************************************************************************* +% +% +% Changelog: +% +% 1.00 (2002/08/13) Initial release +% +% 1.10 (2002/09/27) +% 1. Corrected minor bug for improperly formed warning message when a +% book was not given a title. Thanks to Ming Kin Lai for reporting this. +% 2. Added support for CTLname_format_string and CTLname_latex_cmd fields +% in the BST control entry type. +% +% 1.11 (2003/04/02) +% 1. Fixed bug with URLs containing underscores when using url.sty. Thanks +% to Ming Kin Lai for reporting this. +% +% 1.12 (2007/01/11) +% 1. Fixed bug with unwanted comma before "et al." when an entry contained +% more than two author names. Thanks to Pallav Gupta for reporting this. +% 2. Fixed bug with anomalous closing quote in tech reports that have a +% type, but without a number or address. Thanks to Mehrdad Mirreza for +% reporting this. +% 3. Use braces in \providecommand in begin.bib to better support +% latex2html. TeX style length assignments OK with recent versions +% of latex2html - 1.71 (2002/2/1) or later is strongly recommended. +% Use of the language field still causes trouble with latex2html. +% Thanks to Federico Beffa for reporting this. +% 4. Added IEEEtran.bst ID and version comment string to .bbl output. +% 5. Provide a \BIBdecl hook that allows the user to execute commands +% just prior to the first entry. +% 6. Use default urlstyle (is using url.sty) of "same" rather than rm to +% better work with a wider variety of bibliography styles. +% 7. Changed month abbreviations from Sept., July and June to Sep., Jul., +% and Jun., respectively, as IEEE now does. Thanks to Moritz Borgmann +% for reporting this. +% 8. Control entry types should not be considered when calculating longest +% label width. +% 9. Added alias www for electronic/online. +% 10. Added CTLname_url_prefix control entry type. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% DEFAULTS FOR THE CONTROLS OF THE BST STYLE %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% These are the defaults for the user adjustable controls. The values used +% here can be overridden by the user via IEEEtranBSTCTL entry type. + +% NOTE: The recommended LaTeX command to invoke a control entry type is: +% +%\makeatletter +%\def\bstctlcite{\@ifnextchar[{\@bstctlcite}{\@bstctlcite[@auxout]}} +%\def\@bstctlcite[#1]#2{\@bsphack +% \@for\@citeb:=#2\do{% +% \edef\@citeb{\expandafter\@firstofone\@citeb}% +% \if@filesw\immediate\write\csname #1\endcsname{\string\citation{\@citeb}}\fi}% +% \@esphack} +%\makeatother +% +% It is called at the start of the document, before the first \cite, like: +% \bstctlcite{IEEEexample:BSTcontrol} +% +% IEEEtran.cls V1.6 and later does provide this command. + + + +% #0 turns off the display of the number for articles. +% #1 enables +FUNCTION {default.is.use.number.for.article} { #1 } + + +% #0 turns off the display of the paper and type fields in @inproceedings. +% #1 enables +FUNCTION {default.is.use.paper} { #1 } + + +% #0 turns off the forced use of "et al." +% #1 enables +FUNCTION {default.is.forced.et.al} { #0 } + +% The maximum number of names that can be present beyond which an "et al." +% usage is forced. Be sure that num.names.shown.with.forced.et.al (below) +% is not greater than this value! +% Note: There are many instances of references in IEEE journals which have +% a very large number of authors as well as instances in which "et al." is +% used profusely. +FUNCTION {default.max.num.names.before.forced.et.al} { #10 } + +% The number of names that will be shown with a forced "et al.". +% Must be less than or equal to max.num.names.before.forced.et.al +FUNCTION {default.num.names.shown.with.forced.et.al} { #1 } + + +% #0 turns off the alternate interword spacing for entries with URLs. +% #1 enables +FUNCTION {default.is.use.alt.interword.spacing} { #1 } + +% If alternate interword spacing for entries with URLs is enabled, this is +% the interword spacing stretch factor that will be used. For example, the +% default "4" here means that the interword spacing in entries with URLs can +% stretch to four times normal. Does not have to be an integer. Note that +% the value specified here can be overridden by the user in their LaTeX +% code via a command such as: +% "\providecommand\BIBentryALTinterwordstretchfactor{1.5}" in addition to +% that via the IEEEtranBSTCTL entry type. +FUNCTION {default.ALTinterwordstretchfactor} { "4" } + + +% #0 turns off the "dashification" of repeated (i.e., identical to those +% of the previous entry) names. IEEE normally does this. +% #1 enables +FUNCTION {default.is.dash.repeated.names} { #1 } + + +% The default name format control string. +FUNCTION {default.name.format.string}{ "{f.~}{vv~}{ll}{, jj}" } + + +% The default LaTeX font command for the names. +FUNCTION {default.name.latex.cmd}{ "" } + + +% The default URL prefix. +FUNCTION {default.name.url.prefix}{ "[Online]. Available:" } + + +% Other controls that cannot be accessed via IEEEtranBSTCTL entry type. + +% #0 turns off the terminal startup banner/completed message so as to +% operate more quietly. +% #1 enables +FUNCTION {is.print.banners.to.terminal} { #1 } + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% FILE VERSION AND BANNER %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +FUNCTION{bst.file.version} { "1.12" } +FUNCTION{bst.file.date} { "2007/01/11" } +FUNCTION{bst.file.website} { "http://www.michaelshell.org/tex/ieeetran/bibtex/" } + +FUNCTION {banner.message} +{ is.print.banners.to.terminal + { "-- IEEEtran.bst version" " " * bst.file.version * + " (" * bst.file.date * ") " * "by Michael Shell." * + top$ + "-- " bst.file.website * + top$ + "-- See the " quote$ * "IEEEtran_bst_HOWTO.pdf" * quote$ * " manual for usage information." * + top$ + } + { skip$ } + if$ +} + +FUNCTION {completed.message} +{ is.print.banners.to.terminal + { "" + top$ + "Done." + top$ + } + { skip$ } + if$ +} + + + + +%%%%%%%%%%%%%%%%%%%%%% +%% STRING CONSTANTS %% +%%%%%%%%%%%%%%%%%%%%%% + +FUNCTION {bbl.and}{ "and" } +FUNCTION {bbl.etal}{ "et~al." } +FUNCTION {bbl.editors}{ "eds." } +FUNCTION {bbl.editor}{ "ed." } +FUNCTION {bbl.edition}{ "ed." } +FUNCTION {bbl.volume}{ "vol." } +FUNCTION {bbl.of}{ "of" } +FUNCTION {bbl.number}{ "no." } +FUNCTION {bbl.in}{ "in" } +FUNCTION {bbl.pages}{ "pp." } +FUNCTION {bbl.page}{ "p." } +FUNCTION {bbl.chapter}{ "ch." } +FUNCTION {bbl.paper}{ "paper" } +FUNCTION {bbl.part}{ "pt." } +FUNCTION {bbl.patent}{ "Patent" } +FUNCTION {bbl.patentUS}{ "U.S." } +FUNCTION {bbl.revision}{ "Rev." } +FUNCTION {bbl.series}{ "ser." } +FUNCTION {bbl.standard}{ "Std." } +FUNCTION {bbl.techrep}{ "Tech. Rep." } +FUNCTION {bbl.mthesis}{ "Master's thesis" } +FUNCTION {bbl.phdthesis}{ "Ph.D. dissertation" } +FUNCTION {bbl.st}{ "st" } +FUNCTION {bbl.nd}{ "nd" } +FUNCTION {bbl.rd}{ "rd" } +FUNCTION {bbl.th}{ "th" } + + +% This is the LaTeX spacer that is used when a larger than normal space +% is called for (such as just before the address:publisher). +FUNCTION {large.space} { "\hskip 1em plus 0.5em minus 0.4em\relax " } + +% The LaTeX code for dashes that are used to represent repeated names. +% Note: Some older IEEE journals used something like +% "\rule{0.275in}{0.5pt}\," which is fairly thick and runs right along +% the baseline. However, IEEE now uses a thinner, above baseline, +% six dash long sequence. +FUNCTION {repeated.name.dashes} { "------" } + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% PREDEFINED STRING MACROS %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +MACRO {jan} {"Jan."} +MACRO {feb} {"Feb."} +MACRO {mar} {"Mar."} +MACRO {apr} {"Apr."} +MACRO {may} {"May"} +MACRO {jun} {"Jun."} +MACRO {jul} {"Jul."} +MACRO {aug} {"Aug."} +MACRO {sep} {"Sep."} +MACRO {oct} {"Oct."} +MACRO {nov} {"Nov."} +MACRO {dec} {"Dec."} + + + +%%%%%%%%%%%%%%%%%% +%% ENTRY FIELDS %% +%%%%%%%%%%%%%%%%%% + +ENTRY + { address + assignee + author + booktitle + chapter + day + dayfiled + edition + editor + howpublished + institution + intype + journal + key + language + month + monthfiled + nationality + note + number + organization + pages + paper + publisher + school + series + revision + title + type + url + volume + year + yearfiled + CTLuse_article_number + CTLuse_paper + CTLuse_forced_etal + CTLmax_names_forced_etal + CTLnames_show_etal + CTLuse_alt_spacing + CTLalt_stretch_factor + CTLdash_repeated_names + CTLname_format_string + CTLname_latex_cmd + CTLname_url_prefix + } + {} + { label } + + + + +%%%%%%%%%%%%%%%%%%%%%%% +%% INTEGER VARIABLES %% +%%%%%%%%%%%%%%%%%%%%%%% + +INTEGERS { prev.status.punct this.status.punct punct.std + punct.no punct.comma punct.period + prev.status.space this.status.space space.std + space.no space.normal space.large + prev.status.quote this.status.quote quote.std + quote.no quote.close + prev.status.nline this.status.nline nline.std + nline.no nline.newblock + status.cap cap.std + cap.no cap.yes} + +INTEGERS { longest.label.width multiresult nameptr namesleft number.label numnames } + +INTEGERS { is.use.number.for.article + is.use.paper + is.forced.et.al + max.num.names.before.forced.et.al + num.names.shown.with.forced.et.al + is.use.alt.interword.spacing + is.dash.repeated.names} + + +%%%%%%%%%%%%%%%%%%%%%% +%% STRING VARIABLES %% +%%%%%%%%%%%%%%%%%%%%%% + +STRINGS { bibinfo + longest.label + oldname + s + t + ALTinterwordstretchfactor + name.format.string + name.latex.cmd + name.url.prefix} + + + + +%%%%%%%%%%%%%%%%%%%%%%%%% +%% LOW LEVEL FUNCTIONS %% +%%%%%%%%%%%%%%%%%%%%%%%%% + +FUNCTION {initialize.controls} +{ default.is.use.number.for.article 'is.use.number.for.article := + default.is.use.paper 'is.use.paper := + default.is.forced.et.al 'is.forced.et.al := + default.max.num.names.before.forced.et.al 'max.num.names.before.forced.et.al := + default.num.names.shown.with.forced.et.al 'num.names.shown.with.forced.et.al := + default.is.use.alt.interword.spacing 'is.use.alt.interword.spacing := + default.is.dash.repeated.names 'is.dash.repeated.names := + default.ALTinterwordstretchfactor 'ALTinterwordstretchfactor := + default.name.format.string 'name.format.string := + default.name.latex.cmd 'name.latex.cmd := + default.name.url.prefix 'name.url.prefix := +} + + +% This IEEEtran.bst features a very powerful and flexible mechanism for +% controlling the capitalization, punctuation, spacing, quotation, and +% newlines of the formatted entry fields. (Note: IEEEtran.bst does not need +% or use the newline/newblock feature, but it has been implemented for +% possible future use.) The output states of IEEEtran.bst consist of +% multiple independent attributes and, as such, can be thought of as being +% vectors, rather than the simple scalar values ("before.all", +% "mid.sentence", etc.) used in most other .bst files. +% +% The more flexible and complex design used here was motivated in part by +% IEEE's rather unusual bibliography style. For example, IEEE ends the +% previous field item with a period and large space prior to the publisher +% address; the @electronic entry types use periods as inter-item punctuation +% rather than the commas used by the other entry types; and URLs are never +% followed by periods even though they are the last item in the entry. +% Although it is possible to accommodate these features with the conventional +% output state system, the seemingly endless exceptions make for convoluted, +% unreliable and difficult to maintain code. +% +% IEEEtran.bst's output state system can be easily understood via a simple +% illustration of two most recently formatted entry fields (on the stack): +% +% CURRENT_ITEM +% "PREVIOUS_ITEM +% +% which, in this example, is to eventually appear in the bibliography as: +% +% "PREVIOUS_ITEM," CURRENT_ITEM +% +% It is the job of the output routine to take the previous item off of the +% stack (while leaving the current item at the top of the stack), apply its +% trailing punctuation (including closing quote marks) and spacing, and then +% to write the result to BibTeX's output buffer: +% +% "PREVIOUS_ITEM," +% +% Punctuation (and spacing) between items is often determined by both of the +% items rather than just the first one. The presence of quotation marks +% further complicates the situation because, in standard English, trailing +% punctuation marks are supposed to be contained within the quotes. +% +% IEEEtran.bst maintains two output state (aka "status") vectors which +% correspond to the previous and current (aka "this") items. Each vector +% consists of several independent attributes which track punctuation, +% spacing, quotation, and newlines. Capitalization status is handled by a +% separate scalar because the format routines, not the output routine, +% handle capitalization and, therefore, there is no need to maintain the +% capitalization attribute for both the "previous" and "this" items. +% +% When a format routine adds a new item, it copies the current output status +% vector to the previous output status vector and (usually) resets the +% current (this) output status vector to a "standard status" vector. Using a +% "standard status" vector in this way allows us to redefine what we mean by +% "standard status" at the start of each entry handler and reuse the same +% format routines under the various inter-item separation schemes. For +% example, the standard status vector for the @book entry type may use +% commas for item separators, while the @electronic type may use periods, +% yet both entry handlers exploit many of the exact same format routines. +% +% Because format routines have write access to the output status vector of +% the previous item, they can override the punctuation choices of the +% previous format routine! Therefore, it becomes trivial to implement rules +% such as "Always use a period and a large space before the publisher." By +% pushing the generation of the closing quote mark to the output routine, we +% avoid all the problems caused by having to close a quote before having all +% the information required to determine what the punctuation should be. +% +% The IEEEtran.bst output state system can easily be expanded if needed. +% For instance, it is easy to add a "space.tie" attribute value if the +% bibliography rules mandate that two items have to be joined with an +% unbreakable space. + +FUNCTION {initialize.status.constants} +{ #0 'punct.no := + #1 'punct.comma := + #2 'punct.period := + #0 'space.no := + #1 'space.normal := + #2 'space.large := + #0 'quote.no := + #1 'quote.close := + #0 'cap.no := + #1 'cap.yes := + #0 'nline.no := + #1 'nline.newblock := +} + +FUNCTION {std.status.using.comma} +{ punct.comma 'punct.std := + space.normal 'space.std := + quote.no 'quote.std := + nline.no 'nline.std := + cap.no 'cap.std := +} + +FUNCTION {std.status.using.period} +{ punct.period 'punct.std := + space.normal 'space.std := + quote.no 'quote.std := + nline.no 'nline.std := + cap.yes 'cap.std := +} + +FUNCTION {initialize.prev.this.status} +{ punct.no 'prev.status.punct := + space.no 'prev.status.space := + quote.no 'prev.status.quote := + nline.no 'prev.status.nline := + punct.no 'this.status.punct := + space.no 'this.status.space := + quote.no 'this.status.quote := + nline.no 'this.status.nline := + cap.yes 'status.cap := +} + +FUNCTION {this.status.std} +{ punct.std 'this.status.punct := + space.std 'this.status.space := + quote.std 'this.status.quote := + nline.std 'this.status.nline := +} + +FUNCTION {cap.status.std}{ cap.std 'status.cap := } + +FUNCTION {this.to.prev.status} +{ this.status.punct 'prev.status.punct := + this.status.space 'prev.status.space := + this.status.quote 'prev.status.quote := + this.status.nline 'prev.status.nline := +} + + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} + +FUNCTION {and} +{ { skip$ } + { pop$ #0 } + if$ +} + +FUNCTION {or} +{ { pop$ #1 } + { skip$ } + if$ +} + + +% convert the strings "yes" or "no" to #1 or #0 respectively +FUNCTION {yes.no.to.int} +{ "l" change.case$ duplicate$ + "yes" = + { pop$ #1 } + { duplicate$ "no" = + { pop$ #0 } + { "unknown boolean " quote$ * swap$ * quote$ * + " in " * cite$ * warning$ + #0 + } + if$ + } + if$ +} + + +% pushes true if the single char string on the stack is in the +% range of "0" to "9" +FUNCTION {is.num} +{ chr.to.int$ + duplicate$ "0" chr.to.int$ < not + swap$ "9" chr.to.int$ > not and +} + +% multiplies the integer on the stack by a factor of 10 +FUNCTION {bump.int.mag} +{ #0 'multiresult := + { duplicate$ #0 > } + { #1 - + multiresult #10 + + 'multiresult := + } + while$ +pop$ +multiresult +} + +% converts a single character string on the stack to an integer +FUNCTION {char.to.integer} +{ duplicate$ + is.num + { chr.to.int$ "0" chr.to.int$ - } + {"noninteger character " quote$ * swap$ * quote$ * + " in integer field of " * cite$ * warning$ + #0 + } + if$ +} + +% converts a string on the stack to an integer +FUNCTION {string.to.integer} +{ duplicate$ text.length$ 'namesleft := + #1 'nameptr := + #0 'numnames := + { nameptr namesleft > not } + { duplicate$ nameptr #1 substring$ + char.to.integer numnames bump.int.mag + + 'numnames := + nameptr #1 + + 'nameptr := + } + while$ +pop$ +numnames +} + + + + +% The output routines write out the *next* to the top (previous) item on the +% stack, adding punctuation and such as needed. Since IEEEtran.bst maintains +% the output status for the top two items on the stack, these output +% routines have to consider the previous output status (which corresponds to +% the item that is being output). Full independent control of punctuation, +% closing quote marks, spacing, and newblock is provided. +% +% "output.nonnull" does not check for the presence of a previous empty +% item. +% +% "output" does check for the presence of a previous empty item and will +% remove an empty item rather than outputing it. +% +% "output.warn" is like "output", but will issue a warning if it detects +% an empty item. + +FUNCTION {output.nonnull} +{ swap$ + prev.status.punct punct.comma = + { "," * } + { skip$ } + if$ + prev.status.punct punct.period = + { add.period$ } + { skip$ } + if$ + prev.status.quote quote.close = + { "''" * } + { skip$ } + if$ + prev.status.space space.normal = + { " " * } + { skip$ } + if$ + prev.status.space space.large = + { large.space * } + { skip$ } + if$ + write$ + prev.status.nline nline.newblock = + { newline$ "\newblock " write$ } + { skip$ } + if$ +} + +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} + +FUNCTION {output.warn} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} + +% "fin.entry" is the output routine that handles the last item of the entry +% (which will be on the top of the stack when "fin.entry" is called). + +FUNCTION {fin.entry} +{ this.status.punct punct.no = + { skip$ } + { add.period$ } + if$ + this.status.quote quote.close = + { "''" * } + { skip$ } + if$ +write$ +newline$ +} + + +FUNCTION {is.last.char.not.punct} +{ duplicate$ + "}" * add.period$ + #-1 #1 substring$ "." = +} + +FUNCTION {is.multiple.pages} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} + +FUNCTION {capitalize}{ "u" change.case$ "t" change.case$ } + +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "\emph{" swap$ * "}" * } + if$ +} + +FUNCTION {do.name.latex.cmd} +{ name.latex.cmd + empty$ + { skip$ } + { name.latex.cmd "{" * swap$ * "}" * } + if$ +} + +% IEEEtran.bst uses its own \BIBforeignlanguage command which directly +% invokes the TeX hyphenation patterns without the need of the Babel +% package. Babel does a lot more than switch hyphenation patterns and +% its loading can cause unintended effects in many class files (such as +% IEEEtran.cls). +FUNCTION {select.language} +{ duplicate$ empty$ 'pop$ + { language empty$ 'skip$ + { "\BIBforeignlanguage{" language * "}{" * swap$ * "}" * } + if$ + } + if$ +} + +FUNCTION {tie.or.space.prefix} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ +} + +FUNCTION {get.bbl.editor} +{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } + +FUNCTION {space.word}{ " " swap$ * " " * } + + +% Field Conditioners, Converters, Checkers and External Interfaces + +FUNCTION {empty.field.to.null.string} +{ duplicate$ empty$ + { pop$ "" } + { skip$ } + if$ +} + +FUNCTION {either.or.check} +{ empty$ + { pop$ } + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} + +FUNCTION {empty.entry.warn} +{ author empty$ title empty$ howpublished empty$ + month empty$ year empty$ note empty$ url empty$ + and and and and and and + { "all relevant fields are empty in " cite$ * warning$ } + 'skip$ + if$ +} + + +% The bibinfo system provides a way for the electronic parsing/acquisition +% of a bibliography's contents as is done by ReVTeX. For example, a field +% could be entered into the bibliography as: +% \bibinfo{volume}{2} +% Only the "2" would show up in the document, but the LaTeX \bibinfo command +% could do additional things with the information. IEEEtran.bst does provide +% a \bibinfo command via "\providecommand{\bibinfo}[2]{#2}". However, it is +% currently not used as the bogus bibinfo functions defined here output the +% entry values directly without the \bibinfo wrapper. The bibinfo functions +% themselves (and the calls to them) are retained for possible future use. +% +% bibinfo.check avoids acting on missing fields while bibinfo.warn will +% issue a warning message if a missing field is detected. Prior to calling +% the bibinfo functions, the user should push the field value and then its +% name string, in that order. + +FUNCTION {bibinfo.check} +{ swap$ duplicate$ missing$ + { pop$ pop$ "" } + { duplicate$ empty$ + { swap$ pop$ } + { swap$ pop$ } + if$ + } + if$ +} + +FUNCTION {bibinfo.warn} +{ swap$ duplicate$ missing$ + { swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ "" } + { duplicate$ empty$ + { swap$ "empty " swap$ * " in " * cite$ * warning$ } + { swap$ pop$ } + if$ + } + if$ +} + + +% IEEE separates large numbers with more than 4 digits into groups of +% three. IEEE uses a small space to separate these number groups. +% Typical applications include patent and page numbers. + +% number of consecutive digits required to trigger the group separation. +FUNCTION {large.number.trigger}{ #5 } + +% For numbers longer than the trigger, this is the blocksize of the groups. +% The blocksize must be less than the trigger threshold, and 2 * blocksize +% must be greater than the trigger threshold (can't do more than one +% separation on the initial trigger). +FUNCTION {large.number.blocksize}{ #3 } + +% What is actually inserted between the number groups. +FUNCTION {large.number.separator}{ "\," } + +% So as to save on integer variables by reusing existing ones, numnames +% holds the current number of consecutive digits read and nameptr holds +% the number that will trigger an inserted space. +FUNCTION {large.number.separate} +{ 't := + "" + #0 'numnames := + large.number.trigger 'nameptr := + { t empty$ not } + { t #-1 #1 substring$ is.num + { numnames #1 + 'numnames := } + { #0 'numnames := + large.number.trigger 'nameptr := + } + if$ + t #-1 #1 substring$ swap$ * + t #-2 global.max$ substring$ 't := + numnames nameptr = + { duplicate$ #1 nameptr large.number.blocksize - substring$ swap$ + nameptr large.number.blocksize - #1 + global.max$ substring$ + large.number.separator swap$ * * + nameptr large.number.blocksize - 'numnames := + large.number.blocksize #1 + 'nameptr := + } + { skip$ } + if$ + } + while$ +} + +% Converts all single dashes "-" to double dashes "--". +FUNCTION {n.dashify} +{ large.number.separate + 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + + +% This function detects entries with names that are identical to that of +% the previous entry and replaces the repeated names with dashes (if the +% "is.dash.repeated.names" user control is nonzero). +FUNCTION {name.or.dash} +{ 's := + oldname empty$ + { s 'oldname := s } + { s oldname = + { is.dash.repeated.names + { repeated.name.dashes } + { s 'oldname := s } + if$ + } + { s 'oldname := s } + if$ + } + if$ +} + +% Converts the number string on the top of the stack to +% "numerical ordinal form" (e.g., "7" to "7th"). There is +% no artificial limit to the upper bound of the numbers as the +% least significant digit always determines the ordinal form. +FUNCTION {num.to.ordinal} +{ duplicate$ #-1 #1 substring$ "1" = + { bbl.st * } + { duplicate$ #-1 #1 substring$ "2" = + { bbl.nd * } + { duplicate$ #-1 #1 substring$ "3" = + { bbl.rd * } + { bbl.th * } + if$ + } + if$ + } + if$ +} + +% If the string on the top of the stack begins with a number, +% (e.g., 11th) then replace the string with the leading number +% it contains. Otherwise retain the string as-is. s holds the +% extracted number, t holds the part of the string that remains +% to be scanned. +FUNCTION {extract.num} +{ duplicate$ 't := + "" 's := + { t empty$ not } + { t #1 #1 substring$ + t #2 global.max$ substring$ 't := + duplicate$ is.num + { s swap$ * 's := } + { pop$ "" 't := } + if$ + } + while$ + s empty$ + 'skip$ + { pop$ s } + if$ +} + +% Converts the word number string on the top of the stack to +% Arabic string form. Will be successful up to "tenth". +FUNCTION {word.to.num} +{ duplicate$ "l" change.case$ 's := + s "first" = + { pop$ "1" } + { skip$ } + if$ + s "second" = + { pop$ "2" } + { skip$ } + if$ + s "third" = + { pop$ "3" } + { skip$ } + if$ + s "fourth" = + { pop$ "4" } + { skip$ } + if$ + s "fifth" = + { pop$ "5" } + { skip$ } + if$ + s "sixth" = + { pop$ "6" } + { skip$ } + if$ + s "seventh" = + { pop$ "7" } + { skip$ } + if$ + s "eighth" = + { pop$ "8" } + { skip$ } + if$ + s "ninth" = + { pop$ "9" } + { skip$ } + if$ + s "tenth" = + { pop$ "10" } + { skip$ } + if$ +} + + +% Converts the string on the top of the stack to numerical +% ordinal (e.g., "11th") form. +FUNCTION {convert.edition} +{ duplicate$ empty$ 'skip$ + { duplicate$ #1 #1 substring$ is.num + { extract.num + num.to.ordinal + } + { word.to.num + duplicate$ #1 #1 substring$ is.num + { num.to.ordinal } + { "edition ordinal word " quote$ * edition * quote$ * + " may be too high (or improper) for conversion" * " in " * cite$ * warning$ + } + if$ + } + if$ + } + if$ +} + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% LATEX BIBLIOGRAPHY CODE %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +FUNCTION {start.entry} +{ newline$ + "\bibitem{" write$ + cite$ write$ + "}" write$ + newline$ + "" + initialize.prev.this.status +} + +% Here we write out all the LaTeX code that we will need. The most involved +% code sequences are those that control the alternate interword spacing and +% foreign language hyphenation patterns. The heavy use of \providecommand +% gives users a way to override the defaults. Special thanks to Javier Bezos, +% Johannes Braams, Robin Fairbairns, Heiko Oberdiek, Donald Arseneau and all +% the other gurus on comp.text.tex for their help and advice on the topic of +% \selectlanguage, Babel and BibTeX. +FUNCTION {begin.bib} +{ "% Generated by IEEEtran.bst, version: " bst.file.version * " (" * bst.file.date * ")" * + write$ newline$ + preamble$ empty$ 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" longest.label * "}" * + write$ newline$ + "\providecommand{\url}[1]{#1}" + write$ newline$ + "\csname url@samestyle\endcsname" + write$ newline$ + "\providecommand{\newblock}{\relax}" + write$ newline$ + "\providecommand{\bibinfo}[2]{#2}" + write$ newline$ + "\providecommand{\BIBentrySTDinterwordspacing}{\spaceskip=0pt\relax}" + write$ newline$ + "\providecommand{\BIBentryALTinterwordstretchfactor}{" + ALTinterwordstretchfactor * "}" * + write$ newline$ + "\providecommand{\BIBentryALTinterwordspacing}{\spaceskip=\fontdimen2\font plus " + write$ newline$ + "\BIBentryALTinterwordstretchfactor\fontdimen3\font minus \fontdimen4\font\relax}" + write$ newline$ + "\providecommand{\BIBforeignlanguage}[2]{{%" + write$ newline$ + "\expandafter\ifx\csname l@#1\endcsname\relax" + write$ newline$ + "\typeout{** WARNING: IEEEtran.bst: No hyphenation pattern has been}%" + write$ newline$ + "\typeout{** loaded for the language `#1'. Using the pattern for}%" + write$ newline$ + "\typeout{** the default language instead.}%" + write$ newline$ + "\else" + write$ newline$ + "\language=\csname l@#1\endcsname" + write$ newline$ + "\fi" + write$ newline$ + "#2}}" + write$ newline$ + "\providecommand{\BIBdecl}{\relax}" + write$ newline$ + "\BIBdecl" + write$ newline$ +} + +FUNCTION {end.bib} +{ newline$ "\end{thebibliography}" write$ newline$ } + +FUNCTION {if.url.alt.interword.spacing} +{ is.use.alt.interword.spacing + {url empty$ 'skip$ {"\BIBentryALTinterwordspacing" write$ newline$} if$} + { skip$ } + if$ +} + +FUNCTION {if.url.std.interword.spacing} +{ is.use.alt.interword.spacing + {url empty$ 'skip$ {"\BIBentrySTDinterwordspacing" write$ newline$} if$} + { skip$ } + if$ +} + + + + +%%%%%%%%%%%%%%%%%%%%%%%% +%% LONGEST LABEL PASS %% +%%%%%%%%%%%%%%%%%%%%%%%% + +FUNCTION {initialize.longest.label} +{ "" 'longest.label := + #1 'number.label := + #0 'longest.label.width := +} + +FUNCTION {longest.label.pass} +{ type$ "ieeetranbstctl" = + { skip$ } + { number.label int.to.str$ 'label := + number.label #1 + 'number.label := + label width$ longest.label.width > + { label 'longest.label := + label width$ 'longest.label.width := + } + { skip$ } + if$ + } + if$ +} + + + + +%%%%%%%%%%%%%%%%%%%%% +%% FORMAT HANDLERS %% +%%%%%%%%%%%%%%%%%%%%% + +%% Lower Level Formats (used by higher level formats) + +FUNCTION {format.address.org.or.pub.date} +{ 't := + "" + year empty$ + { "empty year in " cite$ * warning$ } + { skip$ } + if$ + address empty$ t empty$ and + year empty$ and month empty$ and + { skip$ } + { this.to.prev.status + this.status.std + cap.status.std + address "address" bibinfo.check * + t empty$ + { skip$ } + { punct.period 'prev.status.punct := + space.large 'prev.status.space := + address empty$ + { skip$ } + { ": " * } + if$ + t * + } + if$ + year empty$ month empty$ and + { skip$ } + { t empty$ address empty$ and + { skip$ } + { ", " * } + if$ + month empty$ + { year empty$ + { skip$ } + { year "year" bibinfo.check * } + if$ + } + { month "month" bibinfo.check * + year empty$ + { skip$ } + { " " * year "year" bibinfo.check * } + if$ + } + if$ + } + if$ + } + if$ +} + + +FUNCTION {format.names} +{ 'bibinfo := + duplicate$ empty$ 'skip$ { + this.to.prev.status + this.status.std + 's := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + name.format.string + format.name$ + bibinfo bibinfo.check + 't := + nameptr #1 > + { nameptr num.names.shown.with.forced.et.al #1 + = + numnames max.num.names.before.forced.et.al > + is.forced.et.al and and + { "others" 't := + #1 'namesleft := + } + { skip$ } + if$ + namesleft #1 > + { ", " * t do.name.latex.cmd * } + { s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + t "others" = + { " " * bbl.etal emphasize * } + { numnames #2 > + { "," * } + { skip$ } + if$ + bbl.and + space.word * t do.name.latex.cmd * + } + if$ + } + if$ + } + { t do.name.latex.cmd } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ + cap.status.std + } if$ +} + + + + +%% Higher Level Formats + +%% addresses/locations + +FUNCTION {format.address} +{ address duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + } + if$ +} + + + +%% author/editor names + +FUNCTION {format.authors}{ author "author" format.names } + +FUNCTION {format.editors} +{ editor "editor" format.names duplicate$ empty$ 'skip$ + { ", " * + get.bbl.editor + capitalize + * + } + if$ +} + + + +%% date + +FUNCTION {format.date} +{ + month "month" bibinfo.check duplicate$ empty$ + year "year" bibinfo.check duplicate$ empty$ + { swap$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + "there's a month but no year in " cite$ * warning$ } + if$ + * + } + { this.to.prev.status + this.status.std + cap.status.std + swap$ 'skip$ + { + swap$ + " " * swap$ + } + if$ + * + } + if$ +} + +FUNCTION {format.date.electronic} +{ month "month" bibinfo.check duplicate$ empty$ + year "year" bibinfo.check duplicate$ empty$ + { swap$ + { pop$ } + { "there's a month but no year in " cite$ * warning$ + pop$ ")" * "(" swap$ * + this.to.prev.status + punct.no 'this.status.punct := + space.normal 'this.status.space := + quote.no 'this.status.quote := + cap.yes 'status.cap := + } + if$ + } + { swap$ + { swap$ pop$ ")" * "(" swap$ * } + { "(" swap$ * ", " * swap$ * ")" * } + if$ + this.to.prev.status + punct.no 'this.status.punct := + space.normal 'this.status.space := + quote.no 'this.status.quote := + cap.yes 'status.cap := + } + if$ +} + + + +%% edition/title + +% Note: IEEE considers the edition to be closely associated with +% the title of a book. So, in IEEEtran.bst the edition is normally handled +% within the formatting of the title. The format.edition function is +% retained here for possible future use. +FUNCTION {format.edition} +{ edition duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + convert.edition + status.cap + { "t" } + { "l" } + if$ change.case$ + "edition" bibinfo.check + "~" * bbl.edition * + cap.status.std + } + if$ +} + +% This is used to format the booktitle of a conference proceedings. +% Here we use the "intype" field to provide the user a way to +% override the word "in" (e.g., with things like "presented at") +% Use of intype stops the emphasis of the booktitle to indicate that +% we no longer mean the written conference proceedings, but the +% conference itself. +FUNCTION {format.in.booktitle} +{ booktitle "booktitle" bibinfo.check duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + select.language + intype missing$ + { emphasize + bbl.in " " * + } + { intype " " * } + if$ + swap$ * + cap.status.std + } + if$ +} + +% This is used to format the booktitle of collection. +% Here the "intype" field is not supported, but "edition" is. +FUNCTION {format.in.booktitle.edition} +{ booktitle "booktitle" bibinfo.check duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + select.language + emphasize + edition empty$ 'skip$ + { ", " * + edition + convert.edition + "l" change.case$ + * "~" * bbl.edition * + } + if$ + bbl.in " " * swap$ * + cap.status.std + } + if$ +} + +FUNCTION {format.article.title} +{ title duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + "t" change.case$ + } + if$ + "title" bibinfo.check + duplicate$ empty$ 'skip$ + { quote.close 'this.status.quote := + is.last.char.not.punct + { punct.std 'this.status.punct := } + { punct.no 'this.status.punct := } + if$ + select.language + "``" swap$ * + cap.status.std + } + if$ +} + +FUNCTION {format.article.title.electronic} +{ title duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + "t" change.case$ + } + if$ + "title" bibinfo.check + duplicate$ empty$ + { skip$ } + { select.language } + if$ +} + +FUNCTION {format.book.title.edition} +{ title "title" bibinfo.check + duplicate$ empty$ + { "empty title in " cite$ * warning$ } + { this.to.prev.status + this.status.std + select.language + emphasize + edition empty$ 'skip$ + { ", " * + edition + convert.edition + status.cap + { "t" } + { "l" } + if$ + change.case$ + * "~" * bbl.edition * + } + if$ + cap.status.std + } + if$ +} + +FUNCTION {format.book.title} +{ title "title" bibinfo.check + duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + select.language + emphasize + } + if$ +} + + + +%% journal + +FUNCTION {format.journal} +{ journal duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + select.language + emphasize + } + if$ +} + + + +%% how published + +FUNCTION {format.howpublished} +{ howpublished duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + } + if$ +} + + + +%% institutions/organization/publishers/school + +FUNCTION {format.institution} +{ institution duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + } + if$ +} + +FUNCTION {format.organization} +{ organization duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + } + if$ +} + +FUNCTION {format.address.publisher.date} +{ publisher "publisher" bibinfo.warn format.address.org.or.pub.date } + +FUNCTION {format.address.publisher.date.nowarn} +{ publisher "publisher" bibinfo.check format.address.org.or.pub.date } + +FUNCTION {format.address.organization.date} +{ organization "organization" bibinfo.check format.address.org.or.pub.date } + +FUNCTION {format.school} +{ school duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + cap.status.std + } + if$ +} + + + +%% volume/number/series/chapter/pages + +FUNCTION {format.volume} +{ volume empty.field.to.null.string + duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + bbl.volume + status.cap + { capitalize } + { skip$ } + if$ + swap$ tie.or.space.prefix + "volume" bibinfo.check + * * + cap.status.std + } + if$ +} + +FUNCTION {format.number} +{ number empty.field.to.null.string + duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + status.cap + { bbl.number capitalize } + { bbl.number } + if$ + swap$ tie.or.space.prefix + "number" bibinfo.check + * * + cap.status.std + } + if$ +} + +FUNCTION {format.number.if.use.for.article} +{ is.use.number.for.article + { format.number } + { "" } + if$ +} + +% IEEE does not seem to tie the series so closely with the volume +% and number as is done in other bibliography styles. Instead the +% series is treated somewhat like an extension of the title. +FUNCTION {format.series} +{ series empty$ + { "" } + { this.to.prev.status + this.status.std + bbl.series " " * + series "series" bibinfo.check * + cap.status.std + } + if$ +} + + +FUNCTION {format.chapter} +{ chapter empty$ + { "" } + { this.to.prev.status + this.status.std + type empty$ + { bbl.chapter } + { type "l" change.case$ + "type" bibinfo.check + } + if$ + chapter tie.or.space.prefix + "chapter" bibinfo.check + * * + cap.status.std + } + if$ +} + + +% The intended use of format.paper is for paper numbers of inproceedings. +% The paper type can be overridden via the type field. +% We allow the type to be displayed even if the paper number is absent +% for things like "postdeadline paper" +FUNCTION {format.paper} +{ is.use.paper + { paper empty$ + { type empty$ + { "" } + { this.to.prev.status + this.status.std + type "type" bibinfo.check + cap.status.std + } + if$ + } + { this.to.prev.status + this.status.std + type empty$ + { bbl.paper } + { type "type" bibinfo.check } + if$ + " " * paper + "paper" bibinfo.check + * + cap.status.std + } + if$ + } + { "" } + if$ +} + + +FUNCTION {format.pages} +{ pages duplicate$ empty$ 'skip$ + { this.to.prev.status + this.status.std + duplicate$ is.multiple.pages + { + bbl.pages swap$ + n.dashify + } + { + bbl.page swap$ + } + if$ + tie.or.space.prefix + "pages" bibinfo.check + * * + cap.status.std + } + if$ +} + + + +%% technical report number + +FUNCTION {format.tech.report.number} +{ number "number" bibinfo.check + this.to.prev.status + this.status.std + cap.status.std + type duplicate$ empty$ + { pop$ + bbl.techrep + } + { skip$ } + if$ + "type" bibinfo.check + swap$ duplicate$ empty$ + { pop$ } + { tie.or.space.prefix * * } + if$ +} + + + +%% note + +FUNCTION {format.note} +{ note empty$ + { "" } + { this.to.prev.status + this.status.std + punct.period 'this.status.punct := + note #1 #1 substring$ + duplicate$ "{" = + { skip$ } + { status.cap + { "u" } + { "l" } + if$ + change.case$ + } + if$ + note #2 global.max$ substring$ * "note" bibinfo.check + cap.yes 'status.cap := + } + if$ +} + + + +%% patent + +FUNCTION {format.patent.date} +{ this.to.prev.status + this.status.std + year empty$ + { monthfiled duplicate$ empty$ + { "monthfiled" bibinfo.check pop$ "" } + { "monthfiled" bibinfo.check } + if$ + dayfiled duplicate$ empty$ + { "dayfiled" bibinfo.check pop$ "" * } + { "dayfiled" bibinfo.check + monthfiled empty$ + { "dayfiled without a monthfiled in " cite$ * warning$ + * + } + { " " swap$ * * } + if$ + } + if$ + yearfiled empty$ + { "no year or yearfiled in " cite$ * warning$ } + { yearfiled "yearfiled" bibinfo.check + swap$ + duplicate$ empty$ + { pop$ } + { ", " * swap$ * } + if$ + } + if$ + } + { month duplicate$ empty$ + { "month" bibinfo.check pop$ "" } + { "month" bibinfo.check } + if$ + day duplicate$ empty$ + { "day" bibinfo.check pop$ "" * } + { "day" bibinfo.check + month empty$ + { "day without a month in " cite$ * warning$ + * + } + { " " swap$ * * } + if$ + } + if$ + year "year" bibinfo.check + swap$ + duplicate$ empty$ + { pop$ } + { ", " * swap$ * } + if$ + } + if$ + cap.status.std +} + +FUNCTION {format.patent.nationality.type.number} +{ this.to.prev.status + this.status.std + nationality duplicate$ empty$ + { "nationality" bibinfo.warn pop$ "" } + { "nationality" bibinfo.check + duplicate$ "l" change.case$ "united states" = + { pop$ bbl.patentUS } + { skip$ } + if$ + " " * + } + if$ + type empty$ + { bbl.patent "type" bibinfo.check } + { type "type" bibinfo.check } + if$ + * + number duplicate$ empty$ + { "number" bibinfo.warn pop$ } + { "number" bibinfo.check + large.number.separate + swap$ " " * swap$ * + } + if$ + cap.status.std +} + + + +%% standard + +FUNCTION {format.organization.institution.standard.type.number} +{ this.to.prev.status + this.status.std + organization duplicate$ empty$ + { pop$ + institution duplicate$ empty$ + { "institution" bibinfo.warn } + { "institution" bibinfo.warn " " * } + if$ + } + { "organization" bibinfo.warn " " * } + if$ + type empty$ + { bbl.standard "type" bibinfo.check } + { type "type" bibinfo.check } + if$ + * + number duplicate$ empty$ + { "number" bibinfo.check pop$ } + { "number" bibinfo.check + large.number.separate + swap$ " " * swap$ * + } + if$ + cap.status.std +} + +FUNCTION {format.revision} +{ revision empty$ + { "" } + { this.to.prev.status + this.status.std + bbl.revision + revision tie.or.space.prefix + "revision" bibinfo.check + * * + cap.status.std + } + if$ +} + + +%% thesis + +FUNCTION {format.master.thesis.type} +{ this.to.prev.status + this.status.std + type empty$ + { + bbl.mthesis + } + { + type "type" bibinfo.check + } + if$ +cap.status.std +} + +FUNCTION {format.phd.thesis.type} +{ this.to.prev.status + this.status.std + type empty$ + { + bbl.phdthesis + } + { + type "type" bibinfo.check + } + if$ +cap.status.std +} + + + +%% URL + +FUNCTION {format.url} +{ url empty$ + { "" } + { this.to.prev.status + this.status.std + cap.yes 'status.cap := + name.url.prefix " " * + "\url{" * url * "}" * + punct.no 'this.status.punct := + punct.period 'prev.status.punct := + space.normal 'this.status.space := + space.normal 'prev.status.space := + quote.no 'this.status.quote := + } + if$ +} + + + + +%%%%%%%%%%%%%%%%%%%% +%% ENTRY HANDLERS %% +%%%%%%%%%%%%%%%%%%%% + + +% Note: In many journals, IEEE (or the authors) tend not to show the number +% for articles, so the display of the number is controlled here by the +% switch "is.use.number.for.article" +FUNCTION {article} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.journal "journal" bibinfo.check "journal" output.warn + format.volume output + format.number.if.use.for.article output + format.pages output + format.date "year" output.warn + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {book} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + author empty$ + { format.editors "author and editor" output.warn } + { format.authors output.nonnull } + if$ + name.or.dash + format.book.title.edition output + format.series output + author empty$ + { skip$ } + { format.editors output } + if$ + format.address.publisher.date output + format.volume output + format.number output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {booklet} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors output + name.or.dash + format.article.title "title" output.warn + format.howpublished "howpublished" bibinfo.check output + format.organization "organization" bibinfo.check output + format.address "address" bibinfo.check output + format.date output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {electronic} +{ std.status.using.period + start.entry + if.url.alt.interword.spacing + format.authors output + name.or.dash + format.date.electronic output + format.article.title.electronic output + format.howpublished "howpublished" bibinfo.check output + format.organization "organization" bibinfo.check output + format.address "address" bibinfo.check output + format.note output + format.url output + fin.entry + empty.entry.warn + if.url.std.interword.spacing +} + +FUNCTION {inbook} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + author empty$ + { format.editors "author and editor" output.warn } + { format.authors output.nonnull } + if$ + name.or.dash + format.book.title.edition output + format.series output + format.address.publisher.date output + format.volume output + format.number output + format.chapter output + format.pages output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {incollection} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.in.booktitle.edition "booktitle" output.warn + format.series output + format.editors output + format.address.publisher.date.nowarn output + format.volume output + format.number output + format.chapter output + format.pages output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {inproceedings} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.in.booktitle "booktitle" output.warn + format.series output + format.editors output + format.volume output + format.number output + publisher empty$ + { format.address.organization.date output } + { format.organization "organization" bibinfo.check output + format.address.publisher.date output + } + if$ + format.paper output + format.pages output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {manual} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors output + name.or.dash + format.book.title.edition "title" output.warn + format.howpublished "howpublished" bibinfo.check output + format.organization "organization" bibinfo.check output + format.address "address" bibinfo.check output + format.date output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {mastersthesis} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.master.thesis.type output.nonnull + format.school "school" bibinfo.warn output + format.address "address" bibinfo.check output + format.date "year" output.warn + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {misc} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors output + name.or.dash + format.article.title output + format.howpublished "howpublished" bibinfo.check output + format.organization "organization" bibinfo.check output + format.address "address" bibinfo.check output + format.pages output + format.date output + format.note output + format.url output + fin.entry + empty.entry.warn + if.url.std.interword.spacing +} + +FUNCTION {patent} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors output + name.or.dash + format.article.title output + format.patent.nationality.type.number output + format.patent.date output + format.note output + format.url output + fin.entry + empty.entry.warn + if.url.std.interword.spacing +} + +FUNCTION {periodical} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.editors output + name.or.dash + format.book.title "title" output.warn + format.series output + format.volume output + format.number output + format.organization "organization" bibinfo.check output + format.date "year" output.warn + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {phdthesis} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.phd.thesis.type output.nonnull + format.school "school" bibinfo.warn output + format.address "address" bibinfo.check output + format.date "year" output.warn + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {proceedings} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.editors output + name.or.dash + format.book.title "title" output.warn + format.series output + format.volume output + format.number output + publisher empty$ + { format.address.organization.date output } + { format.organization "organization" bibinfo.check output + format.address.publisher.date output + } + if$ + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {standard} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors output + name.or.dash + format.book.title "title" output.warn + format.howpublished "howpublished" bibinfo.check output + format.organization.institution.standard.type.number output + format.revision output + format.date output + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {techreport} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.howpublished "howpublished" bibinfo.check output + format.institution "institution" bibinfo.warn output + format.address "address" bibinfo.check output + format.tech.report.number output.nonnull + format.date "year" output.warn + format.note output + format.url output + fin.entry + if.url.std.interword.spacing +} + +FUNCTION {unpublished} +{ std.status.using.comma + start.entry + if.url.alt.interword.spacing + format.authors "author" output.warn + name.or.dash + format.article.title "title" output.warn + format.date output + format.note "note" output.warn + format.url output + fin.entry + if.url.std.interword.spacing +} + + +% The special entry type which provides the user interface to the +% BST controls +FUNCTION {IEEEtranBSTCTL} +{ is.print.banners.to.terminal + { "** IEEEtran BST control entry " quote$ * cite$ * quote$ * " detected." * + top$ + } + { skip$ } + if$ + CTLuse_article_number + empty$ + { skip$ } + { CTLuse_article_number + yes.no.to.int + 'is.use.number.for.article := + } + if$ + CTLuse_paper + empty$ + { skip$ } + { CTLuse_paper + yes.no.to.int + 'is.use.paper := + } + if$ + CTLuse_forced_etal + empty$ + { skip$ } + { CTLuse_forced_etal + yes.no.to.int + 'is.forced.et.al := + } + if$ + CTLmax_names_forced_etal + empty$ + { skip$ } + { CTLmax_names_forced_etal + string.to.integer + 'max.num.names.before.forced.et.al := + } + if$ + CTLnames_show_etal + empty$ + { skip$ } + { CTLnames_show_etal + string.to.integer + 'num.names.shown.with.forced.et.al := + } + if$ + CTLuse_alt_spacing + empty$ + { skip$ } + { CTLuse_alt_spacing + yes.no.to.int + 'is.use.alt.interword.spacing := + } + if$ + CTLalt_stretch_factor + empty$ + { skip$ } + { CTLalt_stretch_factor + 'ALTinterwordstretchfactor := + "\renewcommand{\BIBentryALTinterwordstretchfactor}{" + ALTinterwordstretchfactor * "}" * + write$ newline$ + } + if$ + CTLdash_repeated_names + empty$ + { skip$ } + { CTLdash_repeated_names + yes.no.to.int + 'is.dash.repeated.names := + } + if$ + CTLname_format_string + empty$ + { skip$ } + { CTLname_format_string + 'name.format.string := + } + if$ + CTLname_latex_cmd + empty$ + { skip$ } + { CTLname_latex_cmd + 'name.latex.cmd := + } + if$ + CTLname_url_prefix + missing$ + { skip$ } + { CTLname_url_prefix + 'name.url.prefix := + } + if$ + + + num.names.shown.with.forced.et.al max.num.names.before.forced.et.al > + { "CTLnames_show_etal cannot be greater than CTLmax_names_forced_etal in " cite$ * warning$ + max.num.names.before.forced.et.al 'num.names.shown.with.forced.et.al := + } + { skip$ } + if$ +} + + +%%%%%%%%%%%%%%%%%%% +%% ENTRY ALIASES %% +%%%%%%%%%%%%%%%%%%% +FUNCTION {conference}{inproceedings} +FUNCTION {online}{electronic} +FUNCTION {internet}{electronic} +FUNCTION {webpage}{electronic} +FUNCTION {www}{electronic} +FUNCTION {default.type}{misc} + + + +%%%%%%%%%%%%%%%%%% +%% MAIN PROGRAM %% +%%%%%%%%%%%%%%%%%% + +READ + +EXECUTE {initialize.controls} +EXECUTE {initialize.status.constants} +EXECUTE {banner.message} + +EXECUTE {initialize.longest.label} +ITERATE {longest.label.pass} + +EXECUTE {begin.bib} +ITERATE {call.type$} +EXECUTE {end.bib} + +EXECUTE{completed.message} + + +%% That's all folks, mds. diff --git a/spec/consensus/consensus-paper/IEEEtran.cls b/spec/consensus/consensus-paper/IEEEtran.cls new file mode 100644 index 000000000..9c967d555 --- /dev/null +++ b/spec/consensus/consensus-paper/IEEEtran.cls @@ -0,0 +1,4733 @@ +%% +%% IEEEtran.cls 2011/11/03 version V1.8 based on +%% IEEEtran.cls 2007/03/05 version V1.7a +%% The changes in V1.8 are made with a single goal in mind: +%% to change the look of the output using the [conference] option +%% and the default font size (10pt) to match the Word template more closely. +%% These changes may well have undesired side effects when other options +%% are in force! +%% +%% +%% This is the official IEEE LaTeX class for authors of the Institute of +%% Electrical and Electronics Engineers (IEEE) Transactions journals and +%% conferences. +%% +%% Support sites: +%% http://www.michaelshell.org/tex/ieeetran/ +%% http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/ +%% and +%% http://www.ieee.org/ +%% +%% Based on the original 1993 IEEEtran.cls, but with many bug fixes +%% and enhancements (from both JVH and MDS) over the 1996/7 version. +%% +%% +%% Contributors: +%% Gerry Murray (1993), Silvano Balemi (1993), +%% Jon Dixon (1996), Peter N"uchter (1996), +%% Juergen von Hagen (2000), and Michael Shell (2001-2007) +%% +%% +%% Copyright (c) 1993-2000 by Gerry Murray, Silvano Balemi, +%% Jon Dixon, Peter N"uchter, +%% Juergen von Hagen +%% and +%% Copyright (c) 2001-2007 by Michael Shell +%% +%% Current maintainer (V1.3 to V1.7): Michael Shell +%% See: +%% http://www.michaelshell.org/ +%% for current contact information. +%% +%% Special thanks to Peter Wilson (CUA) and Donald Arseneau +%% for allowing the inclusion of the \@ifmtarg command +%% from their ifmtarg LaTeX package. +%% +%%************************************************************************* +%% Legal Notice: +%% This code is offered as-is without any warranty either expressed or +%% implied; without even the implied warranty of MERCHANTABILITY or +%% FITNESS FOR A PARTICULAR PURPOSE! +%% User assumes all risk. +%% In no event shall IEEE or any contributor to this code be liable for +%% any damages or losses, including, but not limited to, incidental, +%% consequential, or any other damages, resulting from the use or misuse +%% of any information contained here. +%% +%% All comments are the opinions of their respective authors and are not +%% necessarily endorsed by the IEEE. +%% +%% This work is distributed under the LaTeX Project Public License (LPPL) +%% ( http://www.latex-project.org/ ) version 1.3, and may be freely used, +%% distributed and modified. A copy of the LPPL, version 1.3, is included +%% in the base LaTeX documentation of all distributions of LaTeX released +%% 2003/12/01 or later. +%% Retain all contribution notices and credits. +%% ** Modified files should be clearly indicated as such, including ** +%% ** renaming them and changing author support contact information. ** +%% +%% File list of work: IEEEtran.cls, IEEEtran_HOWTO.pdf, bare_adv.tex, +%% bare_conf.tex, bare_jrnl.tex, bare_jrnl_compsoc.tex +%% +%% Major changes to the user interface should be indicated by an +%% increase in the version numbers. If a version is a beta, it will +%% be indicated with a BETA suffix, i.e., 1.4 BETA. +%% Small changes can be indicated by appending letters to the version +%% such as "IEEEtran_v14a.cls". +%% In all cases, \Providesclass, any \typeout messages to the user, +%% \IEEEtransversionmajor and \IEEEtransversionminor must reflect the +%% correct version information. +%% The changes should also be documented via source comments. +%%************************************************************************* +%% +% +% Available class options +% e.g., \documentclass[10pt,conference]{IEEEtran} +% +% *** choose only one from each category *** +% +% 9pt, 10pt, 11pt, 12pt +% Sets normal font size. The default is 10pt. +% +% conference, journal, technote, peerreview, peerreviewca +% determines format mode - conference papers, journal papers, +% correspondence papers (technotes), or peer review papers. The user +% should also select 9pt when using technote. peerreview is like +% journal mode, but provides for a single-column "cover" title page for +% anonymous peer review. The paper title (without the author names) is +% repeated at the top of the page after the cover page. For peer review +% papers, the \IEEEpeerreviewmaketitle command must be executed (will +% automatically be ignored for non-peerreview modes) at the place the +% cover page is to end, usually just after the abstract (keywords are +% not normally used with peer review papers). peerreviewca is like +% peerreview, but allows the author names to be entered and formatted +% as with conference mode so that author affiliation and contact +% information can be easily seen on the cover page. +% The default is journal. +% +% draft, draftcls, draftclsnofoot, final +% determines if paper is formatted as a widely spaced draft (for +% handwritten editor comments) or as a properly typeset final version. +% draftcls restricts draft mode to the class file while all other LaTeX +% packages (i.e., \usepackage{graphicx}) will behave as final - allows +% for a draft paper with visible figures, etc. draftclsnofoot is like +% draftcls, but does not display the date and the word "DRAFT" at the foot +% of the pages. If using one of the draft modes, the user will probably +% also want to select onecolumn. +% The default is final. +% +% letterpaper, a4paper +% determines paper size: 8.5in X 11in or 210mm X 297mm. CHANGING THE PAPER +% SIZE WILL NOT ALTER THE TYPESETTING OF THE DOCUMENT - ONLY THE MARGINS +% WILL BE AFFECTED. In particular, documents using the a4paper option will +% have reduced side margins (A4 is narrower than US letter) and a longer +% bottom margin (A4 is longer than US letter). For both cases, the top +% margins will be the same and the text will be horizontally centered. +% For final submission to IEEE, authors should use US letter (8.5 X 11in) +% paper. Note that authors should ensure that all post-processing +% (ps, pdf, etc.) uses the same paper specificiation as the .tex document. +% Problems here are by far the number one reason for incorrect margins. +% IEEEtran will automatically set the default paper size under pdflatex +% (without requiring a change to pdftex.cfg), so this issue is more +% important to dvips users. Fix config.ps, config.pdf, or ~/.dvipsrc for +% dvips, or use the dvips -t papersize option instead as needed. See the +% testflow documentation +% http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/testflow +% for more details on dvips paper size configuration. +% The default is letterpaper. +% +% oneside, twoside +% determines if layout follows single sided or two sided (duplex) +% printing. The only notable change is with the headings at the top of +% the pages. +% The default is oneside. +% +% onecolumn, twocolumn +% determines if text is organized into one or two columns per page. One +% column mode is usually used only with draft papers. +% The default is twocolumn. +% +% compsoc +% Use the format of the IEEE Computer Society. +% +% romanappendices +% Use the "Appendix I" convention when numbering appendices. IEEEtran.cls +% now defaults to Alpha "Appendix A" convention - the opposite of what +% v1.6b and earlier did. +% +% captionsoff +% disables the display of the figure/table captions. Some IEEE journals +% request that captions be removed and figures/tables be put on pages +% of their own at the end of an initial paper submission. The endfloat +% package can be used with this class option to achieve this format. +% +% nofonttune +% turns off tuning of the font interword spacing. Maybe useful to those +% not using the standard Times fonts or for those who have already "tuned" +% their fonts. +% The default is to enable IEEEtran to tune font parameters. +% +% +%---------- +% Available CLASSINPUTs provided (all are macros unless otherwise noted): +% \CLASSINPUTbaselinestretch +% \CLASSINPUTinnersidemargin +% \CLASSINPUToutersidemargin +% \CLASSINPUTtoptextmargin +% \CLASSINPUTbottomtextmargin +% +% Available CLASSINFOs provided: +% \ifCLASSINFOpdf (TeX if conditional) +% \CLASSINFOpaperwidth (macro) +% \CLASSINFOpaperheight (macro) +% \CLASSINFOnormalsizebaselineskip (length) +% \CLASSINFOnormalsizeunitybaselineskip (length) +% +% Available CLASSOPTIONs provided: +% all class option flags (TeX if conditionals) unless otherwise noted, +% e.g., \ifCLASSOPTIONcaptionsoff +% point size options provided as a single macro: +% \CLASSOPTIONpt +% which will be defined as 9, 10, 11, or 12 depending on the document's +% normalsize point size. +% also, class option peerreviewca implies the use of class option peerreview +% and classoption draft implies the use of class option draftcls + + + + + +\ProvidesClass{IEEEtran}[2012/11/21 V1.8c by Harald Hanche-Olsen and Anders Christensen] +\typeout{-- Based on V1.7a by Michael Shell} +\typeout{-- See the "IEEEtran_HOWTO" manual for usage information.} +\typeout{-- http://www.michaelshell.org/tex/ieeetran/} +\NeedsTeXFormat{LaTeX2e} + +% IEEEtran.cls version numbers, provided as of V1.3 +% These values serve as a way a .tex file can +% determine if the new features are provided. +% The version number of this IEEEtrans.cls can be obtained from +% these values. i.e., V1.4 +% KEEP THESE AS INTEGERS! i.e., NO {4a} or anything like that- +% (no need to enumerate "a" minor changes here) +\def\IEEEtransversionmajor{1} +\def\IEEEtransversionminor{7} + +% These do nothing, but provide them like in article.cls +\newif\if@restonecol +\newif\if@titlepage + + +% class option conditionals +\newif\ifCLASSOPTIONonecolumn \CLASSOPTIONonecolumnfalse +\newif\ifCLASSOPTIONtwocolumn \CLASSOPTIONtwocolumntrue + +\newif\ifCLASSOPTIONoneside \CLASSOPTIONonesidetrue +\newif\ifCLASSOPTIONtwoside \CLASSOPTIONtwosidefalse + +\newif\ifCLASSOPTIONfinal \CLASSOPTIONfinaltrue +\newif\ifCLASSOPTIONdraft \CLASSOPTIONdraftfalse +\newif\ifCLASSOPTIONdraftcls \CLASSOPTIONdraftclsfalse +\newif\ifCLASSOPTIONdraftclsnofoot \CLASSOPTIONdraftclsnofootfalse + +\newif\ifCLASSOPTIONpeerreview \CLASSOPTIONpeerreviewfalse +\newif\ifCLASSOPTIONpeerreviewca \CLASSOPTIONpeerreviewcafalse + +\newif\ifCLASSOPTIONjournal \CLASSOPTIONjournaltrue +\newif\ifCLASSOPTIONconference \CLASSOPTIONconferencefalse +\newif\ifCLASSOPTIONtechnote \CLASSOPTIONtechnotefalse + +\newif\ifCLASSOPTIONnofonttune \CLASSOPTIONnofonttunefalse + +\newif\ifCLASSOPTIONcaptionsoff \CLASSOPTIONcaptionsofffalse + +\newif\ifCLASSOPTIONcompsoc \CLASSOPTIONcompsocfalse + +\newif\ifCLASSOPTIONromanappendices \CLASSOPTIONromanappendicesfalse + + +% class info conditionals + +% indicates if pdf (via pdflatex) output +\newif\ifCLASSINFOpdf \CLASSINFOpdffalse + + +% V1.6b internal flag to show if using a4paper +\newif\if@IEEEusingAfourpaper \@IEEEusingAfourpaperfalse + + + +% IEEEtran class scratch pad registers +% dimen +\newdimen\@IEEEtrantmpdimenA +\newdimen\@IEEEtrantmpdimenB +% count +\newcount\@IEEEtrantmpcountA +\newcount\@IEEEtrantmpcountB +% token list +\newtoks\@IEEEtrantmptoksA + +% we use \CLASSOPTIONpt so that we can ID the point size (even for 9pt docs) +% as well as LaTeX's \@ptsize to retain some compatability with some +% external packages +\def\@ptsize{0} +% LaTeX does not support 9pt, so we set \@ptsize to 0 - same as that of 10pt +\DeclareOption{9pt}{\def\CLASSOPTIONpt{9}\def\@ptsize{0}} +\DeclareOption{10pt}{\def\CLASSOPTIONpt{10}\def\@ptsize{0}} +\DeclareOption{11pt}{\def\CLASSOPTIONpt{11}\def\@ptsize{1}} +\DeclareOption{12pt}{\def\CLASSOPTIONpt{12}\def\@ptsize{2}} + + + +\DeclareOption{letterpaper}{\setlength{\paperheight}{11in}% + \setlength{\paperwidth}{8.5in}% + \@IEEEusingAfourpaperfalse + \def\CLASSOPTIONpaper{letter}% + \def\CLASSINFOpaperwidth{8.5in}% + \def\CLASSINFOpaperheight{11in}} + + +\DeclareOption{a4paper}{\setlength{\paperheight}{297mm}% + \setlength{\paperwidth}{210mm}% + \@IEEEusingAfourpapertrue + \def\CLASSOPTIONpaper{a4}% + \def\CLASSINFOpaperwidth{210mm}% + \def\CLASSINFOpaperheight{297mm}} + +\DeclareOption{oneside}{\@twosidefalse\@mparswitchfalse + \CLASSOPTIONonesidetrue\CLASSOPTIONtwosidefalse} +\DeclareOption{twoside}{\@twosidetrue\@mparswitchtrue + \CLASSOPTIONtwosidetrue\CLASSOPTIONonesidefalse} + +\DeclareOption{onecolumn}{\CLASSOPTIONonecolumntrue\CLASSOPTIONtwocolumnfalse} +\DeclareOption{twocolumn}{\CLASSOPTIONtwocolumntrue\CLASSOPTIONonecolumnfalse} + +% If the user selects draft, then this class AND any packages +% will go into draft mode. +\DeclareOption{draft}{\CLASSOPTIONdrafttrue\CLASSOPTIONdraftclstrue + \CLASSOPTIONdraftclsnofootfalse} +% draftcls is for a draft mode which will not affect any packages +% used by the document. +\DeclareOption{draftcls}{\CLASSOPTIONdraftfalse\CLASSOPTIONdraftclstrue + \CLASSOPTIONdraftclsnofootfalse} +% draftclsnofoot is like draftcls, but without the footer. +\DeclareOption{draftclsnofoot}{\CLASSOPTIONdraftfalse\CLASSOPTIONdraftclstrue + \CLASSOPTIONdraftclsnofoottrue} +\DeclareOption{final}{\CLASSOPTIONdraftfalse\CLASSOPTIONdraftclsfalse + \CLASSOPTIONdraftclsnofootfalse} + +\DeclareOption{journal}{\CLASSOPTIONpeerreviewfalse\CLASSOPTIONpeerreviewcafalse + \CLASSOPTIONjournaltrue\CLASSOPTIONconferencefalse\CLASSOPTIONtechnotefalse} + +\DeclareOption{conference}{\CLASSOPTIONpeerreviewfalse\CLASSOPTIONpeerreviewcafalse + \CLASSOPTIONjournalfalse\CLASSOPTIONconferencetrue\CLASSOPTIONtechnotefalse} + +\DeclareOption{technote}{\CLASSOPTIONpeerreviewfalse\CLASSOPTIONpeerreviewcafalse + \CLASSOPTIONjournalfalse\CLASSOPTIONconferencefalse\CLASSOPTIONtechnotetrue} + +\DeclareOption{peerreview}{\CLASSOPTIONpeerreviewtrue\CLASSOPTIONpeerreviewcafalse + \CLASSOPTIONjournalfalse\CLASSOPTIONconferencefalse\CLASSOPTIONtechnotefalse} + +\DeclareOption{peerreviewca}{\CLASSOPTIONpeerreviewtrue\CLASSOPTIONpeerreviewcatrue + \CLASSOPTIONjournalfalse\CLASSOPTIONconferencefalse\CLASSOPTIONtechnotefalse} + +\DeclareOption{nofonttune}{\CLASSOPTIONnofonttunetrue} + +\DeclareOption{captionsoff}{\CLASSOPTIONcaptionsofftrue} + +\DeclareOption{compsoc}{\CLASSOPTIONcompsoctrue} + +\DeclareOption{romanappendices}{\CLASSOPTIONromanappendicestrue} + + +% default to US letter paper, 10pt, twocolumn, one sided, final, journal +\ExecuteOptions{letterpaper,10pt,twocolumn,oneside,final,journal} +% overrride these defaults per user requests +\ProcessOptions + + + +% Computer Society conditional execution command +\long\def\@IEEEcompsoconly#1{\relax\ifCLASSOPTIONcompsoc\relax#1\relax\fi\relax} +% inverse +\long\def\@IEEEnotcompsoconly#1{\relax\ifCLASSOPTIONcompsoc\else\relax#1\relax\fi\relax} +% compsoc conference +\long\def\@IEEEcompsocconfonly#1{\relax\ifCLASSOPTIONcompsoc\ifCLASSOPTIONconference\relax#1\relax\fi\fi\relax} +% compsoc not conference +\long\def\@IEEEcompsocnotconfonly#1{\relax\ifCLASSOPTIONcompsoc\ifCLASSOPTIONconference\else\relax#1\relax\fi\fi\relax} + + +% IEEE uses Times Roman font, so we'll default to Times. +% These three commands make up the entire times.sty package. +\renewcommand{\sfdefault}{phv} +\renewcommand{\rmdefault}{ptm} +\renewcommand{\ttdefault}{pcr} + +\@IEEEcompsoconly{\typeout{-- Using IEEE Computer Society mode.}} + +% V1.7 compsoc nonconference papers, use Palatino/Palladio as the main text font, +% not Times Roman. +\@IEEEcompsocnotconfonly{\renewcommand{\rmdefault}{ppl}} + +% enable Times/Palatino main text font +\normalfont\selectfont + + + + + +% V1.7 conference notice message hook +\def\@IEEEconsolenoticeconference{\typeout{}% +\typeout{** Conference Paper **}% +\typeout{Before submitting the final camera ready copy, remember to:}% +\typeout{}% +\typeout{ 1. Manually equalize the lengths of two columns on the last page}% +\typeout{ of your paper;}% +\typeout{}% +\typeout{ 2. Ensure that any PostScript and/or PDF output post-processing}% +\typeout{ uses only Type 1 fonts and that every step in the generation}% +\typeout{ process uses the appropriate paper size.}% +\typeout{}} + + +% we can send console reminder messages to the user here +\AtEndDocument{\ifCLASSOPTIONconference\@IEEEconsolenoticeconference\fi} + + +% warn about the use of single column other than for draft mode +\ifCLASSOPTIONtwocolumn\else% + \ifCLASSOPTIONdraftcls\else% + \typeout{** ATTENTION: Single column mode is not typically used with IEEE publications.}% + \fi% +\fi + + +% V1.7 improved paper size setting code. +% Set pdfpage and dvips paper sizes. Conditional tests are similar to that +% of ifpdf.sty. Retain within {} to ensure tested macros are never altered, +% even if only effect is to set them to \relax. +% if \pdfoutput is undefined or equal to relax, output a dvips special +{\@ifundefined{pdfoutput}{\AtBeginDvi{\special{papersize=\CLASSINFOpaperwidth,\CLASSINFOpaperheight}}}{% +% pdfoutput is defined and not equal to \relax +% check for pdfpageheight existence just in case someone sets pdfoutput +% under non-pdflatex. If exists, set them regardless of value of \pdfoutput. +\@ifundefined{pdfpageheight}{\relax}{\global\pdfpagewidth\paperwidth +\global\pdfpageheight\paperheight}% +% if using \pdfoutput=0 under pdflatex, send dvips papersize special +\ifcase\pdfoutput +\AtBeginDvi{\special{papersize=\CLASSINFOpaperwidth,\CLASSINFOpaperheight}}% +\else +% we are using pdf output, set CLASSINFOpdf flag +\global\CLASSINFOpdftrue +\fi}} + +% let the user know the selected papersize +\typeout{-- Using \CLASSINFOpaperwidth\space x \CLASSINFOpaperheight\space +(\CLASSOPTIONpaper)\space paper.} + +\ifCLASSINFOpdf +\typeout{-- Using PDF output.} +\else +\typeout{-- Using DVI output.} +\fi + + +% The idea hinted here is for LaTeX to generate markleft{} and markright{} +% automatically for you after you enter \author{}, \journal{}, +% \journaldate{}, journalvol{}, \journalnum{}, etc. +% However, there may be some backward compatibility issues here as +% well as some special applications for IEEEtran.cls and special issues +% that may require the flexible \markleft{}, \markright{} and/or \markboth{}. +% We'll leave this as an open future suggestion. +%\newcommand{\journal}[1]{\def\@journal{#1}} +%\def\@journal{} + + + +% pointsize values +% used with ifx to determine the document's normal size +\def\@IEEEptsizenine{9} +\def\@IEEEptsizeten{10} +\def\@IEEEptsizeeleven{11} +\def\@IEEEptsizetwelve{12} + + + +% FONT DEFINITIONS (No sizexx.clo file needed) +% V1.6 revised font sizes, displayskip values and +% revised normalsize baselineskip to reduce underfull vbox problems +% on the 58pc = 696pt = 9.5in text height we want +% normalsize #lines/column baselineskip (aka leading) +% 9pt 63 11.0476pt (truncated down) +% 10pt 58 12pt (exact) +% 11pt 52 13.3846pt (truncated down) +% 12pt 50 13.92pt (exact) +% + +% we need to store the nominal baselineskip for the given font size +% in case baselinestretch ever changes. +% this is a dimen, so it will not hold stretch or shrink +\newdimen\@IEEEnormalsizeunitybaselineskip +\@IEEEnormalsizeunitybaselineskip\baselineskip + +\ifx\CLASSOPTIONpt\@IEEEptsizenine +\typeout{-- This is a 9 point document.} +\def\normalsize{\@setfontsize{\normalsize}{9}{11.0476pt}}% +\setlength{\@IEEEnormalsizeunitybaselineskip}{11.0476pt}% +\normalsize +\abovedisplayskip 1.5ex plus3pt minus1pt% +\belowdisplayskip \abovedisplayskip% +\abovedisplayshortskip 0pt plus3pt% +\belowdisplayshortskip 1.5ex plus3pt minus1pt +\def\small{\@setfontsize{\small}{8.5}{10pt}} +\def\footnotesize{\@setfontsize{\footnotesize}{8}{9pt}} +\def\scriptsize{\@setfontsize{\scriptsize}{7}{8pt}} +\def\tiny{\@setfontsize{\tiny}{5}{6pt}} +% sublargesize is the same as large - 10pt +\def\sublargesize{\@setfontsize{\sublargesize}{10}{12pt}} +\def\large{\@setfontsize{\large}{10}{12pt}} +\def\Large{\@setfontsize{\Large}{12}{14pt}} +\def\LARGE{\@setfontsize{\LARGE}{14}{17pt}} +\def\huge{\@setfontsize{\huge}{17}{20pt}} +\def\Huge{\@setfontsize{\Huge}{20}{24pt}} +\fi + + +% Check if we have selected 10 points +\ifx\CLASSOPTIONpt\@IEEEptsizeten +\typeout{-- This is a 10 point document.} +\def\normalsize{\@setfontsize{\normalsize}{10}{11}}% +\setlength{\@IEEEnormalsizeunitybaselineskip}{11pt}% +\normalsize +\abovedisplayskip 1.5ex plus4pt minus2pt% +\belowdisplayskip \abovedisplayskip% +\abovedisplayshortskip 0pt plus4pt% +\belowdisplayshortskip 1.5ex plus4pt minus2pt +\def\small{\@setfontsize{\small}{9}{10pt}} +\def\footnotesize{\@setfontsize{\footnotesize}{8}{9pt}} +\def\scriptsize{\@setfontsize{\scriptsize}{7}{8pt}} +\def\tiny{\@setfontsize{\tiny}{5}{6pt}} +% sublargesize is a tad smaller than large - 11pt +\def\sublargesize{\@setfontsize{\sublargesize}{11}{13.4pt}} +\def\large{\@setfontsize{\large}{12}{14pt}} +\def\Large{\@setfontsize{\Large}{14}{17pt}} +\def\LARGE{\@setfontsize{\LARGE}{17}{20pt}} +\def\huge{\@setfontsize{\huge}{20}{24pt}} +\def\Huge{\@setfontsize{\Huge}{24}{28pt}} +\fi + + +% Check if we have selected 11 points +\ifx\CLASSOPTIONpt\@IEEEptsizeeleven +\typeout{-- This is an 11 point document.} +\def\normalsize{\@setfontsize{\normalsize}{11}{13.3846pt}}% +\setlength{\@IEEEnormalsizeunitybaselineskip}{13.3846pt}% +\normalsize +\abovedisplayskip 1.5ex plus5pt minus3pt% +\belowdisplayskip \abovedisplayskip% +\abovedisplayshortskip 0pt plus5pt% +\belowdisplayshortskip 1.5ex plus5pt minus3pt +\def\small{\@setfontsize{\small}{10}{12pt}} +\def\footnotesize{\@setfontsize{\footnotesize}{9}{10.5pt}} +\def\scriptsize{\@setfontsize{\scriptsize}{8}{9pt}} +\def\tiny{\@setfontsize{\tiny}{6}{7pt}} +% sublargesize is the same as large - 12pt +\def\sublargesize{\@setfontsize{\sublargesize}{12}{14pt}} +\def\large{\@setfontsize{\large}{12}{14pt}} +\def\Large{\@setfontsize{\Large}{14}{17pt}} +\def\LARGE{\@setfontsize{\LARGE}{17}{20pt}} +\def\huge{\@setfontsize{\huge}{20}{24pt}} +\def\Huge{\@setfontsize{\Huge}{24}{28pt}} +\fi + + +% Check if we have selected 12 points +\ifx\CLASSOPTIONpt\@IEEEptsizetwelve +\typeout{-- This is a 12 point document.} +\def\normalsize{\@setfontsize{\normalsize}{12}{13.92pt}}% +\setlength{\@IEEEnormalsizeunitybaselineskip}{13.92pt}% +\normalsize +\abovedisplayskip 1.5ex plus6pt minus4pt% +\belowdisplayskip \abovedisplayskip% +\abovedisplayshortskip 0pt plus6pt% +\belowdisplayshortskip 1.5ex plus6pt minus4pt +\def\small{\@setfontsize{\small}{10}{12pt}} +\def\footnotesize{\@setfontsize{\footnotesize}{9}{10.5pt}} +\def\scriptsize{\@setfontsize{\scriptsize}{8}{9pt}} +\def\tiny{\@setfontsize{\tiny}{6}{7pt}} +% sublargesize is the same as large - 14pt +\def\sublargesize{\@setfontsize{\sublargesize}{14}{17pt}} +\def\large{\@setfontsize{\large}{14}{17pt}} +\def\Large{\@setfontsize{\Large}{17}{20pt}} +\def\LARGE{\@setfontsize{\LARGE}{20}{24pt}} +\def\huge{\@setfontsize{\huge}{22}{26pt}} +\def\Huge{\@setfontsize{\Huge}{24}{28pt}} +\fi + + +% V1.6 The Computer Modern Fonts will issue a substitution warning for +% 24pt titles (24.88pt is used instead) increase the substitution +% tolerance to turn off this warning +\def\fontsubfuzz{.9pt} +% However, the default (and correct) Times font will scale exactly as needed. + + +% warn the user in case they forget to use the 9pt option with +% technote +\ifCLASSOPTIONtechnote% + \ifx\CLASSOPTIONpt\@IEEEptsizenine\else% + \typeout{** ATTENTION: Technotes are normally 9pt documents.}% + \fi% +\fi + + +% V1.7 +% Improved \textunderscore to provide a much better fake _ when used with +% OT1 encoding. Under OT1, detect use of pcr or cmtt \ttfamily and use +% available true _ glyph for those two typewriter fonts. +\def\@IEEEstringptm{ptm} % Times Roman family +\def\@IEEEstringppl{ppl} % Palatino Roman family +\def\@IEEEstringphv{phv} % Helvetica Sans Serif family +\def\@IEEEstringpcr{pcr} % Courier typewriter family +\def\@IEEEstringcmtt{cmtt} % Computer Modern typewriter family +\DeclareTextCommandDefault{\textunderscore}{\leavevmode +\ifx\f@family\@IEEEstringpcr\string_\else +\ifx\f@family\@IEEEstringcmtt\string_\else +\ifx\f@family\@IEEEstringptm\kern 0em\vbox{\hrule\@width 0.5em\@height 0.5pt\kern -0.3ex}\else +\ifx\f@family\@IEEEstringppl\kern 0em\vbox{\hrule\@width 0.5em\@height 0.5pt\kern -0.3ex}\else +\ifx\f@family\@IEEEstringphv\kern -0.03em\vbox{\hrule\@width 0.62em\@height 0.52pt\kern -0.33ex}\kern -0.03em\else +\kern 0.09em\vbox{\hrule\@width 0.6em\@height 0.44pt\kern -0.63pt\kern -0.42ex}\kern 0.09em\fi\fi\fi\fi\fi\relax} + + + + +% set the default \baselinestretch +\def\baselinestretch{1} +\ifCLASSOPTIONdraftcls + \def\baselinestretch{1.5}% default baselinestretch for draft modes +\fi + + +% process CLASSINPUT baselinestretch +\ifx\CLASSINPUTbaselinestretch\@IEEEundefined +\else + \edef\baselinestretch{\CLASSINPUTbaselinestretch} % user CLASSINPUT override + \typeout{** ATTENTION: Overriding \string\baselinestretch\space to + \baselinestretch\space via \string\CLASSINPUT.} +\fi + +\normalsize % make \baselinestretch take affect + + + + +% store the normalsize baselineskip +\newdimen\CLASSINFOnormalsizebaselineskip +\CLASSINFOnormalsizebaselineskip=\baselineskip\relax +% and the normalsize unity (baselinestretch=1) baselineskip +% we could save a register by giving the user access to +% \@IEEEnormalsizeunitybaselineskip. However, let's protect +% its read only internal status +\newdimen\CLASSINFOnormalsizeunitybaselineskip +\CLASSINFOnormalsizeunitybaselineskip=\@IEEEnormalsizeunitybaselineskip\relax +% store the nominal value of jot +\newdimen\IEEEnormaljot +\IEEEnormaljot=0.25\baselineskip\relax + +% set \jot +\jot=\IEEEnormaljot\relax + + + + +% V1.6, we are now going to fine tune the interword spacing +% The default interword glue for Times under TeX appears to use a +% nominal interword spacing of 25% (relative to the font size, i.e., 1em) +% a maximum of 40% and a minimum of 19%. +% For example, 10pt text uses an interword glue of: +% +% 2.5pt plus 1.49998pt minus 0.59998pt +% +% However, IEEE allows for a more generous range which reduces the need +% for hyphenation, especially for two column text. Furthermore, IEEE +% tends to use a little bit more nominal space between the words. +% IEEE's interword spacing percentages appear to be: +% 35% nominal +% 23% minimum +% 50% maximum +% (They may even be using a tad more for the largest fonts such as 24pt.) +% +% for bold text, IEEE increases the spacing a little more: +% 37.5% nominal +% 23% minimum +% 55% maximum + +% here are the interword spacing ratios we'll use +% for medium (normal weight) +\def\@IEEEinterspaceratioM{0.35} +\def\@IEEEinterspaceMINratioM{0.23} +\def\@IEEEinterspaceMAXratioM{0.50} + +% for bold +\def\@IEEEinterspaceratioB{0.375} +\def\@IEEEinterspaceMINratioB{0.23} +\def\@IEEEinterspaceMAXratioB{0.55} + + +% command to revise the interword spacing for the current font under TeX: +% \fontdimen2 = nominal interword space +% \fontdimen3 = interword stretch +% \fontdimen4 = interword shrink +% since all changes to the \fontdimen are global, we can enclose these commands +% in braces to confine any font attribute or length changes +\def\@@@IEEEsetfontdimens#1#2#3{{% +\setlength{\@IEEEtrantmpdimenB}{\f@size pt}% grab the font size in pt, could use 1em instead. +\setlength{\@IEEEtrantmpdimenA}{#1\@IEEEtrantmpdimenB}% +\fontdimen2\font=\@IEEEtrantmpdimenA\relax +\addtolength{\@IEEEtrantmpdimenA}{-#2\@IEEEtrantmpdimenB}% +\fontdimen3\font=-\@IEEEtrantmpdimenA\relax +\setlength{\@IEEEtrantmpdimenA}{#1\@IEEEtrantmpdimenB}% +\addtolength{\@IEEEtrantmpdimenA}{-#3\@IEEEtrantmpdimenB}% +\fontdimen4\font=\@IEEEtrantmpdimenA\relax}} + +% revise the interword spacing for each font weight +\def\@@IEEEsetfontdimens{{% +\mdseries +\@@@IEEEsetfontdimens{\@IEEEinterspaceratioM}{\@IEEEinterspaceMAXratioM}{\@IEEEinterspaceMINratioM}% +\bfseries +\@@@IEEEsetfontdimens{\@IEEEinterspaceratioB}{\@IEEEinterspaceMAXratioB}{\@IEEEinterspaceMINratioB}% +}} + +% revise the interword spacing for each font shape +% \slshape is not often used for IEEE work and is not altered here. The \scshape caps are +% already a tad too large in the free LaTeX fonts (as compared to what IEEE uses) so we +% won't alter these either. +\def\@IEEEsetfontdimens{{% +\normalfont +\@@IEEEsetfontdimens +\normalfont\itshape +\@@IEEEsetfontdimens +}} + +% command to revise the interword spacing for each font size (and shape +% and weight). Only the \rmfamily is done here as \ttfamily uses a +% fixed spacing and \sffamily is not used as the main text of IEEE papers. +\def\@IEEEtunefonts{{\selectfont\rmfamily +\tiny\@IEEEsetfontdimens +\scriptsize\@IEEEsetfontdimens +\footnotesize\@IEEEsetfontdimens +\small\@IEEEsetfontdimens +\normalsize\@IEEEsetfontdimens +\sublargesize\@IEEEsetfontdimens +\large\@IEEEsetfontdimens +\LARGE\@IEEEsetfontdimens +\huge\@IEEEsetfontdimens +\Huge\@IEEEsetfontdimens}} + +% if the nofonttune class option is not given, revise the interword spacing +% now - in case IEEEtran makes any default length measurements, and make +% sure all the default fonts are loaded +\ifCLASSOPTIONnofonttune\else +\@IEEEtunefonts +\fi + +% and again at the start of the document in case the user loaded different fonts +\AtBeginDocument{\ifCLASSOPTIONnofonttune\else\@IEEEtunefonts\fi} + + + +% V1.6 +% LaTeX is a little to quick to use hyphenations +% So, we increase the penalty for their use and raise +% the badness level that triggers an underfull hbox +% warning. The author may still have to tweak things, +% but the appearance will be much better "right out +% of the box" than that under V1.5 and prior. +% TeX default is 50 +\hyphenpenalty=750 +% If we didn't adjust the interword spacing, 2200 might be better. +% The TeX default is 1000 +\hbadness=1350 +% IEEE does not use extra spacing after punctuation +\frenchspacing + +% V1.7 increase this a tad to discourage equation breaks +\binoppenalty=1000 % default 700 +\relpenalty=800 % default 500 + + +% margin note stuff +\marginparsep 10pt +\marginparwidth 20pt +\marginparpush 25pt + + +% if things get too close, go ahead and let them touch +\lineskip 0pt +\normallineskip 0pt +\lineskiplimit 0pt +\normallineskiplimit 0pt + +% The distance from the lower edge of the text body to the +% footline +\footskip 0.4in + +% normally zero, should be relative to font height. +% put in a little rubber to help stop some bad breaks (underfull vboxes) +\parskip 0ex plus 0.2ex minus 0.1ex +\ifCLASSOPTIONconference +\parskip 6pt plus 2pt minus 1pt +\fi + +\parindent 1.0em +\ifCLASSOPTIONconference +\parindent 14.45pt +\fi + +\topmargin -49.0pt +\headheight 12pt +\headsep 0.25in + +% use the normal font baselineskip +% so that \topskip is unaffected by changes in \baselinestretch +\topskip=\@IEEEnormalsizeunitybaselineskip +\textheight 58pc % 9.63in, 696pt +% Tweak textheight to a perfect integer number of lines/page. +% The normal baselineskip for each document point size is used +% to determine these values. +\ifx\CLASSOPTIONpt\@IEEEptsizenine\textheight=63\@IEEEnormalsizeunitybaselineskip\fi % 63 lines/page +\ifx\CLASSOPTIONpt\@IEEEptsizeten\textheight=58\@IEEEnormalsizeunitybaselineskip\fi % 58 lines/page +\ifx\CLASSOPTIONpt\@IEEEptsizeeleven\textheight=52\@IEEEnormalsizeunitybaselineskip\fi % 52 lines/page +\ifx\CLASSOPTIONpt\@IEEEptsizetwelve\textheight=50\@IEEEnormalsizeunitybaselineskip\fi % 50 lines/page + + +\columnsep 1.5pc +\textwidth 184.2mm + + +% the default side margins are equal +\if@IEEEusingAfourpaper +\oddsidemargin 14.32mm +\evensidemargin 14.32mm +\else +\oddsidemargin 0.680in +\evensidemargin 0.680in +\fi +% compensate for LaTeX's 1in offset +\addtolength{\oddsidemargin}{-1in} +\addtolength{\evensidemargin}{-1in} + + + +% adjust margins for conference mode +\ifCLASSOPTIONconference + \topmargin -0.25in + % we retain the reserved, but unused space for headers + \addtolength{\topmargin}{-\headheight} + \addtolength{\topmargin}{-\headsep} + \textheight 9.25in % The standard for conferences (668.4975pt) + % Tweak textheight to a perfect integer number of lines/page. + \ifx\CLASSOPTIONpt\@IEEEptsizenine\textheight=61\@IEEEnormalsizeunitybaselineskip\fi % 61 lines/page + \ifx\CLASSOPTIONpt\@IEEEptsizeten\textheight=62\@IEEEnormalsizeunitybaselineskip\fi % 62 lines/page + \ifx\CLASSOPTIONpt\@IEEEptsizeeleven\textheight=50\@IEEEnormalsizeunitybaselineskip\fi % 50 lines/page + \ifx\CLASSOPTIONpt\@IEEEptsizetwelve\textheight=48\@IEEEnormalsizeunitybaselineskip\fi % 48 lines/page +\fi + + +% compsoc conference +\ifCLASSOPTIONcompsoc +\ifCLASSOPTIONconference + % compsoc conference use a larger value for columnsep + \columnsep 0.375in + % compsoc conferences want 1in top margin, 1.125in bottom margin + \topmargin 0in + \addtolength{\topmargin}{-6pt}% we tweak this a tad to better comply with top of line stuff + % we retain the reserved, but unused space for headers + \addtolength{\topmargin}{-\headheight} + \addtolength{\topmargin}{-\headsep} + \textheight 8.875in % (641.39625pt) + % Tweak textheight to a perfect integer number of lines/page. + \ifx\CLASSOPTIONpt\@IEEEptsizenine\textheight=58\@IEEEnormalsizeunitybaselineskip\fi % 58 lines/page + \ifx\CLASSOPTIONpt\@IEEEptsizeten\textheight=53\@IEEEnormalsizeunitybaselineskip\fi % 53 lines/page + \ifx\CLASSOPTIONpt\@IEEEptsizeeleven\textheight=48\@IEEEnormalsizeunitybaselineskip\fi % 48 lines/page + \ifx\CLASSOPTIONpt\@IEEEptsizetwelve\textheight=46\@IEEEnormalsizeunitybaselineskip\fi % 46 lines/page + \textwidth 6.5in + % the default side margins are equal + \if@IEEEusingAfourpaper + \oddsidemargin 22.45mm + \evensidemargin 22.45mm + \else + \oddsidemargin 1in + \evensidemargin 1in + \fi + % compensate for LaTeX's 1in offset + \addtolength{\oddsidemargin}{-1in} + \addtolength{\evensidemargin}{-1in} +\fi\fi + + + +% draft mode settings override that of all other modes +% provides a nice 1in margin all around the paper and extra +% space between the lines for editor's comments +\ifCLASSOPTIONdraftcls + % want 1in from top of paper to text + \setlength{\topmargin}{-\headsep}% + \addtolength{\topmargin}{-\headheight}% + % we want 1in side margins regardless of paper type + \oddsidemargin 0in + \evensidemargin 0in + % set the text width + \setlength{\textwidth}{\paperwidth}% + \addtolength{\textwidth}{-2.0in}% + \setlength{\textheight}{\paperheight}% + \addtolength{\textheight}{-2.0in}% + % digitize textheight to be an integer number of lines. + % this may cause the bottom margin to be off a tad + \addtolength{\textheight}{-1\topskip}% + \divide\textheight by \baselineskip% + \multiply\textheight by \baselineskip% + \addtolength{\textheight}{\topskip}% +\fi + + + +% process CLASSINPUT inner/outer margin +% if inner margin defined, but outer margin not, set outer to inner. +\ifx\CLASSINPUTinnersidemargin\@IEEEundefined +\else + \ifx\CLASSINPUToutersidemargin\@IEEEundefined + \edef\CLASSINPUToutersidemargin{\CLASSINPUTinnersidemargin} + \fi +\fi + +\ifx\CLASSINPUToutersidemargin\@IEEEundefined +\else + % if outer margin defined, but inner margin not, set inner to outer. + \ifx\CLASSINPUTinnersidemargin\@IEEEundefined + \edef\CLASSINPUTinnersidemargin{\CLASSINPUToutersidemargin} + \fi + \setlength{\oddsidemargin}{\CLASSINPUTinnersidemargin} + \ifCLASSOPTIONtwoside + \setlength{\evensidemargin}{\CLASSINPUToutersidemargin} + \else + \setlength{\evensidemargin}{\CLASSINPUTinnersidemargin} + \fi + \addtolength{\oddsidemargin}{-1in} + \addtolength{\evensidemargin}{-1in} + \setlength{\textwidth}{\paperwidth} + \addtolength{\textwidth}{-\CLASSINPUTinnersidemargin} + \addtolength{\textwidth}{-\CLASSINPUToutersidemargin} + \typeout{** ATTENTION: Overriding inner side margin to \CLASSINPUTinnersidemargin\space and + outer side margin to \CLASSINPUToutersidemargin\space via \string\CLASSINPUT.} +\fi + + + +% process CLASSINPUT top/bottom text margin +% if toptext margin defined, but bottomtext margin not, set bottomtext to toptext margin +\ifx\CLASSINPUTtoptextmargin\@IEEEundefined +\else + \ifx\CLASSINPUTbottomtextmargin\@IEEEundefined + \edef\CLASSINPUTbottomtextmargin{\CLASSINPUTtoptextmargin} + \fi +\fi + +\ifx\CLASSINPUTbottomtextmargin\@IEEEundefined +\else + % if bottomtext margin defined, but toptext margin not, set toptext to bottomtext margin + \ifx\CLASSINPUTtoptextmargin\@IEEEundefined + \edef\CLASSINPUTtoptextmargin{\CLASSINPUTbottomtextmargin} + \fi + \setlength{\topmargin}{\CLASSINPUTtoptextmargin} + \addtolength{\topmargin}{-1in} + \addtolength{\topmargin}{-\headheight} + \addtolength{\topmargin}{-\headsep} + \setlength{\textheight}{\paperheight} + \addtolength{\textheight}{-\CLASSINPUTtoptextmargin} + \addtolength{\textheight}{-\CLASSINPUTbottomtextmargin} + % in the default format we use the normal baselineskip as topskip + % we only need 0.7 of this to clear typical top text and we need + % an extra 0.3 spacing at the bottom for descenders. This will + % correct for both. + \addtolength{\topmargin}{-0.3\@IEEEnormalsizeunitybaselineskip} + \typeout{** ATTENTION: Overriding top text margin to \CLASSINPUTtoptextmargin\space and + bottom text margin to \CLASSINPUTbottomtextmargin\space via \string\CLASSINPUT.} +\fi + + + + + + + +% LIST SPACING CONTROLS + +% Controls the amount of EXTRA spacing +% above and below \trivlist +% Both \list and IED lists override this. +% However, \trivlist will use this as will most +% things built from \trivlist like the \center +% environment. +\topsep 0.5\baselineskip + +% Controls the additional spacing around lists preceded +% or followed by blank lines. IEEE does not increase +% spacing before or after paragraphs so it is set to zero. +% \z@ is the same as zero, but faster. +\partopsep \z@ + +% Controls the spacing between paragraphs in lists. +% IEEE does not increase spacing before or after paragraphs +% so this is also zero. +% With IEEEtran.cls, global changes to +% this value DO affect lists (but not IED lists). +\parsep \z@ + +% Controls the extra spacing between list items. +% IEEE does not put extra spacing between items. +% With IEEEtran.cls, global changes to this value DO affect +% lists (but not IED lists). +\itemsep \z@ + +% \itemindent is the amount to indent the FIRST line of a list +% item. It is auto set to zero within the \list environment. To alter +% it, you have to do so when you call the \list. +% However, IEEE uses this for the theorem environment +% There is an alternative value for this near \leftmargini below +\itemindent -1em + +% \leftmargin, the spacing from the left margin of the main text to +% the left of the main body of a list item is set by \list. +% Hence this statement does nothing for lists. +% But, quote and verse do use it for indention. +\leftmargin 2em + +% we retain this stuff from the older IEEEtran.cls so that \list +% will work the same way as before. However, itemize, enumerate and +% description (IED) could care less about what these are as they +% all are overridden. +\leftmargini 2em +%\itemindent 2em % Alternative values: sometimes used. +%\leftmargini 0em +\leftmarginii 1em +\leftmarginiii 1.5em +\leftmarginiv 1.5em +\leftmarginv 1.0em +\leftmarginvi 1.0em +\labelsep 0.5em +\labelwidth \z@ + + +% The old IEEEtran.cls behavior of \list is retained. +% However, the new V1.3 IED list environments override all the +% @list stuff (\@listX is called within \list for the +% appropriate level just before the user's list_decl is called). +% \topsep is now 2pt as IEEE puts a little extra space around +% lists - used by those non-IED macros that depend on \list. +% Note that \parsep and \itemsep are not redefined as in +% the sizexx.clo \@listX (which article.cls uses) so global changes +% of these values DO affect \list +% +\def\@listi{\leftmargin\leftmargini \topsep 2pt plus 1pt minus 1pt} +\let\@listI\@listi +\def\@listii{\leftmargin\leftmarginii\labelwidth\leftmarginii% + \advance\labelwidth-\labelsep \topsep 2pt} +\def\@listiii{\leftmargin\leftmarginiii\labelwidth\leftmarginiii% + \advance\labelwidth-\labelsep \topsep 2pt} +\def\@listiv{\leftmargin\leftmarginiv\labelwidth\leftmarginiv% + \advance\labelwidth-\labelsep \topsep 2pt} +\def\@listv{\leftmargin\leftmarginv\labelwidth\leftmarginv% + \advance\labelwidth-\labelsep \topsep 2pt} +\def\@listvi{\leftmargin\leftmarginvi\labelwidth\leftmarginvi% + \advance\labelwidth-\labelsep \topsep 2pt} + + +% IEEE uses 5) not 5. +\def\labelenumi{\theenumi)} \def\theenumi{\arabic{enumi}} + +% IEEE uses a) not (a) +\def\labelenumii{\theenumii)} \def\theenumii{\alph{enumii}} + +% IEEE uses iii) not iii. +\def\labelenumiii{\theenumiii)} \def\theenumiii{\roman{enumiii}} + +% IEEE uses A) not A. +\def\labelenumiv{\theenumiv)} \def\theenumiv{\Alph{enumiv}} + +% exactly the same as in article.cls +\def\p@enumii{\theenumi} +\def\p@enumiii{\theenumi(\theenumii)} +\def\p@enumiv{\p@enumiii\theenumiii} + +% itemized list label styles +\def\labelitemi{$\bullet$} +\def\labelitemii{$\circ$} +\def\labelitemiii{\vrule height 0.8ex depth -0.2ex width 0.6ex} +\def\labelitemiv{$\ast$} + + + +% **** V1.3 ENHANCEMENTS **** +% Itemize, Enumerate and Description (IED) List Controls +% *************************** +% +% +% IEEE seems to use at least two different values by +% which ITEMIZED list labels are indented to the right +% For The Journal of Lightwave Technology (JLT) and The Journal +% on Selected Areas in Communications (JSAC), they tend to use +% an indention equal to \parindent. For Transactions on Communications +% they tend to indent ITEMIZED lists a little more--- 1.3\parindent. +% We'll provide both values here for you so that you can choose +% which one you like in your document using a command such as: +% setlength{\IEEEilabelindent}{\IEEEilabelindentB} +\newdimen\IEEEilabelindentA +\IEEEilabelindentA \parindent + +\newdimen\IEEEilabelindentB +\IEEEilabelindentB 1.3\parindent +% However, we'll default to using \parindent +% which makes more sense to me +\newdimen\IEEEilabelindent +\IEEEilabelindent \IEEEilabelindentA + + +% This controls the default amount the enumerated list labels +% are indented to the right. +% Normally, this is the same as the paragraph indention +\newdimen\IEEEelabelindent +\IEEEelabelindent \parindent + +% This controls the default amount the description list labels +% are indented to the right. +% Normally, this is the same as the paragraph indention +\newdimen\IEEEdlabelindent +\IEEEdlabelindent \parindent + +% This is the value actually used within the IED lists. +% The IED environments automatically set its value to +% one of the three values above, so global changes do +% not have any effect +\newdimen\IEEElabelindent +\IEEElabelindent \parindent + +% The actual amount labels will be indented is +% \IEEElabelindent multiplied by the factor below +% corresponding to the level of nesting depth +% This provides a means by which the user can +% alter the effective \IEEElabelindent for deeper +% levels +% There may not be such a thing as correct "standard IEEE" +% values. What IEEE actually does may depend on the specific +% circumstances. +% The first list level almost always has full indention. +% The second levels I've seen have only 75% of the normal indentation +% Three level or greater nestings are very rare. I am guessing +% that they don't use any indentation. +\def\IEEElabelindentfactori{1.0} % almost always one +\def\IEEElabelindentfactorii{0.75} % 0.0 or 1.0 may be used in some cases +\def\IEEElabelindentfactoriii{0.0} % 0.75? 0.5? 0.0? +\def\IEEElabelindentfactoriv{0.0} +\def\IEEElabelindentfactorv{0.0} +\def\IEEElabelindentfactorvi{0.0} + +% value actually used within IED lists, it is auto +% set to one of the 6 values above +% global changes here have no effect +\def\IEEElabelindentfactor{1.0} + +% This controls the default spacing between the end of the IED +% list labels and the list text, when normal text is used for +% the labels. +\newdimen\IEEEiednormlabelsep +\IEEEiednormlabelsep \parindent + +% This controls the default spacing between the end of the IED +% list labels and the list text, when math symbols are used for +% the labels (nomenclature lists). IEEE usually increases the +% spacing in these cases +\newdimen\IEEEiedmathlabelsep +\IEEEiedmathlabelsep 1.2em + +% This controls the extra vertical separation put above and +% below each IED list. IEEE usually puts a little extra spacing +% around each list. However, this spacing is barely noticeable. +\newskip\IEEEiedtopsep +\IEEEiedtopsep 2pt plus 1pt minus 1pt + + +% This command is executed within each IED list environment +% at the beginning of the list. You can use this to set the +% parameters for some/all your IED list(s) without disturbing +% global parameters that affect things other than lists. +% i.e., renewcommand{\IEEEiedlistdecl}{\setlength{\labelsep}{5em}} +% will alter the \labelsep for the next list(s) until +% \IEEEiedlistdecl is redefined. +\def\IEEEiedlistdecl{\relax} + +% This command provides an easy way to set \leftmargin based +% on the \labelwidth, \labelsep and the argument \IEEElabelindent +% Usage: \IEEEcalcleftmargin{width-to-indent-the-label} +% output is in the \leftmargin variable, i.e., effectively: +% \leftmargin = argument + \labelwidth + \labelsep +% Note controlled spacing here, shield end of lines with % +\def\IEEEcalcleftmargin#1{\setlength{\leftmargin}{#1}% +\addtolength{\leftmargin}{\labelwidth}% +\addtolength{\leftmargin}{\labelsep}} + +% This command provides an easy way to set \labelwidth to the +% width of the given text. It is the same as +% \settowidth{\labelwidth}{label-text} +% and useful as a shorter alternative. +% Typically used to set \labelwidth to be the width +% of the longest label in the list +\def\IEEEsetlabelwidth#1{\settowidth{\labelwidth}{#1}} + +% When this command is executed, IED lists will use the +% IEEEiedmathlabelsep label separation rather than the normal +% spacing. To have an effect, this command must be executed via +% the \IEEEiedlistdecl or within the option of the IED list +% environments. +\def\IEEEusemathlabelsep{\setlength{\labelsep}{\IEEEiedmathlabelsep}} + +% A flag which controls whether the IED lists automatically +% calculate \leftmargin from \IEEElabelindent, \labelwidth and \labelsep +% Useful if you want to specify your own \leftmargin +% This flag must be set (\IEEEnocalcleftmargintrue or \IEEEnocalcleftmarginfalse) +% via the \IEEEiedlistdecl or within the option of the IED list +% environments to have an effect. +\newif\ifIEEEnocalcleftmargin +\IEEEnocalcleftmarginfalse + +% A flag which controls whether \IEEElabelindent is multiplied by +% the \IEEElabelindentfactor for each list level. +% This flag must be set via the \IEEEiedlistdecl or within the option +% of the IED list environments to have an effect. +\newif\ifIEEEnolabelindentfactor +\IEEEnolabelindentfactorfalse + + +% internal variable to indicate type of IED label +% justification +% 0 - left; 1 - center; 2 - right +\def\@IEEEiedjustify{0} + + +% commands to allow the user to control IED +% label justifications. Use these commands within +% the IED environment option or in the \IEEEiedlistdecl +% Note that changing the normal list justifications +% is nonstandard and IEEE may not like it if you do so! +% I include these commands as they may be helpful to +% those who are using these enhanced list controls for +% other non-IEEE related LaTeX work. +% itemize and enumerate automatically default to right +% justification, description defaults to left. +\def\IEEEiedlabeljustifyl{\def\@IEEEiedjustify{0}}%left +\def\IEEEiedlabeljustifyc{\def\@IEEEiedjustify{1}}%center +\def\IEEEiedlabeljustifyr{\def\@IEEEiedjustify{2}}%right + + + + +% commands to save to and restore from the list parameter copies +% this allows us to set all the list parameters within +% the list_decl and prevent \list (and its \@list) +% from overriding any of our parameters +% V1.6 use \edefs instead of dimen's to conserve dimen registers +% Note controlled spacing here, shield end of lines with % +\def\@IEEEsavelistparams{\edef\@IEEEiedtopsep{\the\topsep}% +\edef\@IEEEiedlabelwidth{\the\labelwidth}% +\edef\@IEEEiedlabelsep{\the\labelsep}% +\edef\@IEEEiedleftmargin{\the\leftmargin}% +\edef\@IEEEiedpartopsep{\the\partopsep}% +\edef\@IEEEiedparsep{\the\parsep}% +\edef\@IEEEieditemsep{\the\itemsep}% +\edef\@IEEEiedrightmargin{\the\rightmargin}% +\edef\@IEEEiedlistparindent{\the\listparindent}% +\edef\@IEEEieditemindent{\the\itemindent}} + +% Note controlled spacing here +\def\@IEEErestorelistparams{\topsep\@IEEEiedtopsep\relax% +\labelwidth\@IEEEiedlabelwidth\relax% +\labelsep\@IEEEiedlabelsep\relax% +\leftmargin\@IEEEiedleftmargin\relax% +\partopsep\@IEEEiedpartopsep\relax% +\parsep\@IEEEiedparsep\relax% +\itemsep\@IEEEieditemsep\relax% +\rightmargin\@IEEEiedrightmargin\relax% +\listparindent\@IEEEiedlistparindent\relax% +\itemindent\@IEEEieditemindent\relax} + + +% v1.6b provide original LaTeX IED list environments +% note that latex.ltx defines \itemize and \enumerate, but not \description +% which must be created by the base classes +% save original LaTeX itemize and enumerate +\let\LaTeXitemize\itemize +\let\endLaTeXitemize\enditemize +\let\LaTeXenumerate\enumerate +\let\endLaTeXenumerate\endenumerate + +% provide original LaTeX description environment from article.cls +\newenvironment{LaTeXdescription} + {\list{}{\labelwidth\z@ \itemindent-\leftmargin + \let\makelabel\descriptionlabel}} + {\endlist} +\newcommand*\descriptionlabel[1]{\hspace\labelsep + \normalfont\bfseries #1} + + +% override LaTeX's default IED lists +\def\itemize{\@IEEEitemize} +\def\enditemize{\@endIEEEitemize} +\def\enumerate{\@IEEEenumerate} +\def\endenumerate{\@endIEEEenumerate} +\def\description{\@IEEEdescription} +\def\enddescription{\@endIEEEdescription} + +% provide the user with aliases - may help those using packages that +% override itemize, enumerate, or description +\def\IEEEitemize{\@IEEEitemize} +\def\endIEEEitemize{\@endIEEEitemize} +\def\IEEEenumerate{\@IEEEenumerate} +\def\endIEEEenumerate{\@endIEEEenumerate} +\def\IEEEdescription{\@IEEEdescription} +\def\endIEEEdescription{\@endIEEEdescription} + + +% V1.6 we want to keep the IEEEtran IED list definitions as our own internal +% commands so they are protected against redefinition +\def\@IEEEitemize{\@ifnextchar[{\@@IEEEitemize}{\@@IEEEitemize[\relax]}} +\def\@IEEEenumerate{\@ifnextchar[{\@@IEEEenumerate}{\@@IEEEenumerate[\relax]}} +\def\@IEEEdescription{\@ifnextchar[{\@@IEEEdescription}{\@@IEEEdescription[\relax]}} +\def\@endIEEEitemize{\endlist} +\def\@endIEEEenumerate{\endlist} +\def\@endIEEEdescription{\endlist} + + +% DO NOT ALLOW BLANK LINES TO BE IN THESE IED ENVIRONMENTS +% AS THIS WILL FORCE NEW PARAGRAPHS AFTER THE IED LISTS +% IEEEtran itemized list MDS 1/2001 +% Note controlled spacing here, shield end of lines with % +\def\@@IEEEitemize[#1]{% + \ifnum\@itemdepth>3\relax\@toodeep\else% + \ifnum\@listdepth>5\relax\@toodeep\else% + \advance\@itemdepth\@ne% + \edef\@itemitem{labelitem\romannumeral\the\@itemdepth}% + % get the labelindentfactor for this level + \advance\@listdepth\@ne% we need to know what the level WILL be + \edef\IEEElabelindentfactor{\csname IEEElabelindentfactor\romannumeral\the\@listdepth\endcsname}% + \advance\@listdepth-\@ne% undo our increment + \def\@IEEEiedjustify{2}% right justified labels are default + % set other defaults + \IEEEnocalcleftmarginfalse% + \IEEEnolabelindentfactorfalse% + \topsep\IEEEiedtopsep% + \IEEElabelindent\IEEEilabelindent% + \labelsep\IEEEiednormlabelsep% + \partopsep 0ex% + \parsep 0ex% + \itemsep \parskip% + \rightmargin 0em% + \listparindent 0em% + \itemindent 0em% + % calculate the label width + % the user can override this later if + % they specified a \labelwidth + \settowidth{\labelwidth}{\csname labelitem\romannumeral\the\@itemdepth\endcsname}% + \@IEEEsavelistparams% save our list parameters + \list{\csname\@itemitem\endcsname}{% + \@IEEErestorelistparams% override any list{} changes + % to our globals + \let\makelabel\@IEEEiedmakelabel% v1.6b setup \makelabel + \IEEEiedlistdecl% let user alter parameters + #1\relax% + % If the user has requested not to use the + % labelindent factor, don't revise \labelindent + \ifIEEEnolabelindentfactor\relax% + \else\IEEElabelindent=\IEEElabelindentfactor\labelindent% + \fi% + % Unless the user has requested otherwise, + % calculate our left margin based + % on \IEEElabelindent, \labelwidth and + % \labelsep + \ifIEEEnocalcleftmargin\relax% + \else\IEEEcalcleftmargin{\IEEElabelindent}% + \fi}\fi\fi}% + + +% DO NOT ALLOW BLANK LINES TO BE IN THESE IED ENVIRONMENTS +% AS THIS WILL FORCE NEW PARAGRAPHS AFTER THE IED LISTS +% IEEEtran enumerate list MDS 1/2001 +% Note controlled spacing here, shield end of lines with % +\def\@@IEEEenumerate[#1]{% + \ifnum\@enumdepth>3\relax\@toodeep\else% + \ifnum\@listdepth>5\relax\@toodeep\else% + \advance\@enumdepth\@ne% + \edef\@enumctr{enum\romannumeral\the\@enumdepth}% + % get the labelindentfactor for this level + \advance\@listdepth\@ne% we need to know what the level WILL be + \edef\IEEElabelindentfactor{\csname IEEElabelindentfactor\romannumeral\the\@listdepth\endcsname}% + \advance\@listdepth-\@ne% undo our increment + \def\@IEEEiedjustify{2}% right justified labels are default + % set other defaults + \IEEEnocalcleftmarginfalse% + \IEEEnolabelindentfactorfalse% + \topsep\IEEEiedtopsep% + \IEEElabelindent\IEEEelabelindent% + \labelsep\IEEEiednormlabelsep% + \partopsep 0ex% + \parsep 0ex% + \itemsep 0ex% + \rightmargin 0em% + \listparindent 0em% + \itemindent 0em% + % calculate the label width + % We'll set it to the width suitable for all labels using + % normalfont 1) to 9) + % The user can override this later + \settowidth{\labelwidth}{9)}% + \@IEEEsavelistparams% save our list parameters + \list{\csname label\@enumctr\endcsname}{\usecounter{\@enumctr}% + \@IEEErestorelistparams% override any list{} changes + % to our globals + \let\makelabel\@IEEEiedmakelabel% v1.6b setup \makelabel + \IEEEiedlistdecl% let user alter parameters + #1\relax% + % If the user has requested not to use the + % IEEElabelindent factor, don't revise \IEEElabelindent + \ifIEEEnolabelindentfactor\relax% + \else\IEEElabelindent=\IEEElabelindentfactor\IEEElabelindent% + \fi% + % Unless the user has requested otherwise, + % calculate our left margin based + % on \IEEElabelindent, \labelwidth and + % \labelsep + \ifIEEEnocalcleftmargin\relax% + \else\IEEEcalcleftmargin{\IEEElabelindent}% + \fi}\fi\fi}% + + +% DO NOT ALLOW BLANK LINES TO BE IN THESE IED ENVIRONMENTS +% AS THIS WILL FORCE NEW PARAGRAPHS AFTER THE IED LISTS +% IEEEtran description list MDS 1/2001 +% Note controlled spacing here, shield end of lines with % +\def\@@IEEEdescription[#1]{% + \ifnum\@listdepth>5\relax\@toodeep\else% + % get the labelindentfactor for this level + \advance\@listdepth\@ne% we need to know what the level WILL be + \edef\IEEElabelindentfactor{\csname IEEElabelindentfactor\romannumeral\the\@listdepth\endcsname}% + \advance\@listdepth-\@ne% undo our increment + \def\@IEEEiedjustify{0}% left justified labels are default + % set other defaults + \IEEEnocalcleftmarginfalse% + \IEEEnolabelindentfactorfalse% + \topsep\IEEEiedtopsep% + \IEEElabelindent\IEEEdlabelindent% + % assume normal labelsep + \labelsep\IEEEiednormlabelsep% + \partopsep 0ex% + \parsep 0ex% + \itemsep 0ex% + \rightmargin 0em% + \listparindent 0em% + \itemindent 0em% + % Bogus label width in case the user forgets + % to set it. + % TIP: If you want to see what a variable's width is you + % can use the TeX command \showthe\width-variable to + % display it on the screen during compilation + % (This might be helpful to know when you need to find out + % which label is the widest) + \settowidth{\labelwidth}{Hello}% + \@IEEEsavelistparams% save our list parameters + \list{}{\@IEEErestorelistparams% override any list{} changes + % to our globals + \let\makelabel\@IEEEiedmakelabel% v1.6b setup \makelabel + \IEEEiedlistdecl% let user alter parameters + #1\relax% + % If the user has requested not to use the + % labelindent factor, don't revise \IEEElabelindent + \ifIEEEnolabelindentfactor\relax% + \else\IEEElabelindent=\IEEElabelindentfactor\IEEElabelindent% + \fi% + % Unless the user has requested otherwise, + % calculate our left margin based + % on \IEEElabelindent, \labelwidth and + % \labelsep + \ifIEEEnocalcleftmargin\relax% + \else\IEEEcalcleftmargin{\IEEElabelindent}\relax% + \fi}\fi} + +% v1.6b we use one makelabel that does justification as needed. +\def\@IEEEiedmakelabel#1{\relax\if\@IEEEiedjustify 0\relax +\makebox[\labelwidth][l]{\normalfont #1}\else +\if\@IEEEiedjustify 1\relax +\makebox[\labelwidth][c]{\normalfont #1}\else +\makebox[\labelwidth][r]{\normalfont #1}\fi\fi} + + +% VERSE and QUOTE +% V1.7 define environments with newenvironment +\newenvironment{verse}{\let\\=\@centercr + \list{}{\itemsep\z@ \itemindent -1.5em \listparindent \itemindent + \rightmargin\leftmargin\advance\leftmargin 1.5em}\item\relax} + {\endlist} +\newenvironment{quotation}{\list{}{\listparindent 1.5em \itemindent\listparindent + \rightmargin\leftmargin \parsep 0pt plus 1pt}\item\relax} + {\endlist} +\newenvironment{quote}{\list{}{\rightmargin\leftmargin}\item\relax} + {\endlist} + + +% \titlepage +% provided only for backward compatibility. \maketitle is the correct +% way to create the title page. +\newif\if@restonecol +\def\titlepage{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn + \else \newpage \fi \thispagestyle{empty}\c@page\z@} +\def\endtitlepage{\if@restonecol\twocolumn \else \newpage \fi} + +% standard values from article.cls +\arraycolsep 5pt +\arrayrulewidth .4pt +\doublerulesep 2pt + +\tabcolsep 6pt +\tabbingsep 0.5em + + +%% FOOTNOTES +% +%\skip\footins 10pt plus 4pt minus 2pt +% V1.6 respond to changes in font size +% space added above the footnotes (if present) +\skip\footins 0.9\baselineskip plus 0.4\baselineskip minus 0.2\baselineskip + +% V1.6, we need to make \footnotesep responsive to changes +% in \baselineskip or strange spacings will result when in +% draft mode. Here is a little LaTeX secret - \footnotesep +% determines the height of an invisible strut that is placed +% *above* the baseline of footnotes after the first. Since +% LaTeX considers the space for characters to be 0.7/baselineskip +% above the baseline and 0.3/baselineskip below it, we need to +% use 0.7/baselineskip as a \footnotesep to maintain equal spacing +% between all the lines of the footnotes. IEEE often uses a tad +% more, so use 0.8\baselineskip. This slightly larger value also helps +% the text to clear the footnote marks. Note that \thanks in IEEEtran +% uses its own value of \footnotesep which is set in \maketitle. +{\footnotesize +\global\footnotesep 0.8\baselineskip} + +\def\unnumberedfootnote{\gdef\@thefnmark{\quad}\@footnotetext} + +\skip\@mpfootins 0.3\baselineskip +\fboxsep = 3pt +\fboxrule = .4pt +% V1.6 use 1em, then use LaTeX2e's \@makefnmark +% Note that IEEE normally *left* aligns the footnote marks, so we don't need +% box resizing tricks here. +%\long\def\@makefnmark{\scriptsize\normalfont\@thefnmark} +\long\def\@makefntext#1{\parindent 1em\indent\hbox{\@makefnmark}#1}% V1.6 use 1em +\long\def\@maketablefntext#1{\raggedleft\leavevmode\hbox{\@makefnmark}#1} +% V1.7 compsoc does not use superscipts for footnote marks +\ifCLASSOPTIONcompsoc +\def\@IEEEcompsocmakefnmark{\hbox{\normalfont\@thefnmark.\ }} +\long\def\@makefntext#1{\parindent 1em\indent\hbox{\@IEEEcompsocmakefnmark}#1} +\fi + +% IEEE does not use footnote rules. Or do they? +\def\footnoterule{\vskip-2pt \hrule height 0.6pt depth \z@ \vskip1.6pt\relax} +\toks@\expandafter{\@setminipage\let\footnoterule\relax\footnotesep\z@} +\edef\@setminipage{\the\toks@} + +% V1.7 for compsoc, IEEE uses a footnote rule only for \thanks. We devise a "one-shot" +% system to implement this. +\newif\if@IEEEenableoneshotfootnoterule +\@IEEEenableoneshotfootnoterulefalse +\ifCLASSOPTIONcompsoc +\def\footnoterule{\relax\if@IEEEenableoneshotfootnoterule +\kern-5pt +\hbox to \columnwidth{\hfill\vrule width 0.5\columnwidth height 0.4pt\hfill} +\kern4.6pt +\global\@IEEEenableoneshotfootnoterulefalse +\else +\relax +\fi} +\fi + +% V1.6 do not allow LaTeX to break a footnote across multiple pages +\interfootnotelinepenalty=10000 + +% V1.6 discourage breaks within equations +% Note that amsmath normally sets this to 10000, +% but LaTeX2e normally uses 100. +\interdisplaylinepenalty=2500 + +% default allows section depth up to /paragraph +\setcounter{secnumdepth}{4} + +% technotes do not allow /paragraph +\ifCLASSOPTIONtechnote + \setcounter{secnumdepth}{3} +\fi +% neither do compsoc conferences +\@IEEEcompsocconfonly{\setcounter{secnumdepth}{3}} + + +\newcounter{section} +\newcounter{subsection}[section] +\newcounter{subsubsection}[subsection] +\newcounter{paragraph}[subsubsection] + +% used only by IEEEtran's IEEEeqnarray as other packages may +% have their own, different, implementations +\newcounter{IEEEsubequation}[equation] + +% as shown when called by user from \ref, \label and in table of contents +\def\theequation{\arabic{equation}} % 1 +\def\theIEEEsubequation{\theequation\alph{IEEEsubequation}} % 1a (used only by IEEEtran's IEEEeqnarray) +\ifCLASSOPTIONcompsoc +% compsoc is all arabic +\def\thesection{\arabic{section}} +\def\thesubsection{\thesection.\arabic{subsection}} +\def\thesubsubsection{\thesubsection.\arabic{subsubsection}} +\def\theparagraph{\thesubsubsection.\arabic{paragraph}} +\else +\def\thesection{\Roman{section}} % I +% V1.7, \mbox prevents breaks around - +\def\thesubsection{\mbox{\thesection-\Alph{subsection}}} % I-A +% V1.7 use I-A1 format used by IEEE rather than I-A.1 +\def\thesubsubsection{\thesubsection\arabic{subsubsection}} % I-A1 +\def\theparagraph{\thesubsubsection\alph{paragraph}} % I-A1a +\fi + +% From Heiko Oberdiek. Because of the \mbox in \thesubsection, we need to +% tell hyperref to disable the \mbox command when making PDF bookmarks. +% This done already with hyperref.sty version 6.74o and later, but +% it will not hurt to do it here again for users of older versions. +\@ifundefined{pdfstringdefPreHook}{\let\pdfstringdefPreHook\@empty}{}% +\g@addto@macro\pdfstringdefPreHook{\let\mbox\relax} + + +% Main text forms (how shown in main text headings) +% V1.6, using \thesection in \thesectiondis allows changes +% in the former to automatically appear in the latter +\ifCLASSOPTIONcompsoc + \ifCLASSOPTIONconference% compsoc conference + \def\thesectiondis{\thesection.} + \def\thesubsectiondis{\thesectiondis\arabic{subsection}.} + \def\thesubsubsectiondis{\thesubsectiondis\arabic{subsubsection}.} + \def\theparagraphdis{\thesubsubsectiondis\arabic{paragraph}.} + \else% compsoc not conferencs + \def\thesectiondis{\thesection} + \def\thesubsectiondis{\thesectiondis.\arabic{subsection}} + \def\thesubsubsectiondis{\thesubsectiondis.\arabic{subsubsection}} + \def\theparagraphdis{\thesubsubsectiondis.\arabic{paragraph}} + \fi +\else% not compsoc + \def\thesectiondis{\thesection.} % I. + \def\thesubsectiondis{\Alph{subsection}.} % B. + \def\thesubsubsectiondis{\arabic{subsubsection})} % 3) + \def\theparagraphdis{\alph{paragraph})} % d) +\fi + +% just like LaTeX2e's \@eqnnum +\def\theequationdis{{\normalfont \normalcolor (\theequation)}}% (1) +% IEEEsubequation used only by IEEEtran's IEEEeqnarray +\def\theIEEEsubequationdis{{\normalfont \normalcolor (\theIEEEsubequation)}}% (1a) +% redirect LaTeX2e's equation number display and all that depend on +% it, through IEEEtran's \theequationdis +\def\@eqnnum{\theequationdis} + + + +% V1.7 provide string macros as article.cls does +\def\contentsname{Contents} +\def\listfigurename{List of Figures} +\def\listtablename{List of Tables} +\def\refname{References} +\def\indexname{Index} +\def\figurename{Fig.} +\def\tablename{TABLE} +\@IEEEcompsocconfonly{\def\figurename{Figure}\def\tablename{Table}} +\def\partname{Part} +\def\appendixname{Appendix} +\def\abstractname{Abstract} +% IEEE specific names +\def\IEEEkeywordsname{Keywords} +\def\IEEEproofname{Proof} + + +% LIST OF FIGURES AND TABLES AND TABLE OF CONTENTS +% +\def\@pnumwidth{1.55em} +\def\@tocrmarg{2.55em} +\def\@dotsep{4.5} +\setcounter{tocdepth}{3} + +% adjusted some spacings here so that section numbers will not easily +% collide with the section titles. +% VIII; VIII-A; and VIII-A.1 are usually the worst offenders. +% MDS 1/2001 +\def\tableofcontents{\section*{\contentsname}\@starttoc{toc}} +\def\l@section#1#2{\addpenalty{\@secpenalty}\addvspace{1.0em plus 1pt}% + \@tempdima 2.75em \begingroup \parindent \z@ \rightskip \@pnumwidth% + \parfillskip-\@pnumwidth {\bfseries\leavevmode #1}\hfil\hbox to\@pnumwidth{\hss #2}\par% + \endgroup} +% argument format #1:level, #2:labelindent,#3:labelsep +\def\l@subsection{\@dottedtocline{2}{2.75em}{3.75em}} +\def\l@subsubsection{\@dottedtocline{3}{6.5em}{4.5em}} +% must provide \l@ defs for ALL sublevels EVEN if tocdepth +% is such as they will not appear in the table of contents +% these defs are how TOC knows what level these things are! +\def\l@paragraph{\@dottedtocline{4}{6.5em}{5.5em}} +\def\l@subparagraph{\@dottedtocline{5}{6.5em}{6.5em}} +\def\listoffigures{\section*{\listfigurename}\@starttoc{lof}} +\def\l@figure{\@dottedtocline{1}{0em}{2.75em}} +\def\listoftables{\section*{\listtablename}\@starttoc{lot}} +\let\l@table\l@figure + + +%% Definitions for floats +%% +%% Normal Floats +\floatsep 1\baselineskip plus 0.2\baselineskip minus 0.2\baselineskip +\textfloatsep 1.7\baselineskip plus 0.2\baselineskip minus 0.4\baselineskip +\@fptop 0pt plus 1fil +\@fpsep 0.75\baselineskip plus 2fil +\@fpbot 0pt plus 1fil +\def\topfraction{0.9} +\def\bottomfraction{0.4} +\def\floatpagefraction{0.8} +% V1.7, let top floats approach 90% of page +\def\textfraction{0.1} + +%% Double Column Floats +\dblfloatsep 1\baselineskip plus 0.2\baselineskip minus 0.2\baselineskip + +\dbltextfloatsep 1.7\baselineskip plus 0.2\baselineskip minus 0.4\baselineskip +% Note that it would be nice if the rubber here actually worked in LaTeX2e. +% There is a long standing limitation in LaTeX, first discovered (to the best +% of my knowledge) by Alan Jeffrey in 1992. LaTeX ignores the stretchable +% portion of \dbltextfloatsep, and as a result, double column figures can and +% do result in an non-integer number of lines in the main text columns with +% underfull vbox errors as a consequence. A post to comp.text.tex +% by Donald Arseneau confirms that this had not yet been fixed in 1998. +% IEEEtran V1.6 will fix this problem for you in the titles, but it doesn't +% protect you from other double floats. Happy vspace'ing. + +\@dblfptop 0pt plus 1fil +\@dblfpsep 0.75\baselineskip plus 2fil +\@dblfpbot 0pt plus 1fil +\def\dbltopfraction{0.8} +\def\dblfloatpagefraction{0.8} +\setcounter{dbltopnumber}{4} + +\intextsep 1\baselineskip plus 0.2\baselineskip minus 0.2\baselineskip +\setcounter{topnumber}{2} +\setcounter{bottomnumber}{2} +\setcounter{totalnumber}{4} + + + +% article class provides these, we should too. +\newlength\abovecaptionskip +\newlength\belowcaptionskip +% but only \abovecaptionskip is used above figure captions and *below* table +% captions +\setlength\abovecaptionskip{0.65\baselineskip} +\setlength\belowcaptionskip{0.75\baselineskip} +% V1.6 create hooks in case the caption spacing ever needs to be +% overridden by a user +\def\@IEEEfigurecaptionsepspace{\vskip\abovecaptionskip\relax}% +\def\@IEEEtablecaptionsepspace{\vskip\belowcaptionskip\relax}% + + +% 1.6b revise caption system so that \@makecaption uses two arguments +% as with LaTeX2e. Otherwise, there will be problems when using hyperref. +\def\@IEEEtablestring{table} + +\ifCLASSOPTIONcompsoc +% V1.7 compsoc \@makecaption +\ifCLASSOPTIONconference% compsoc conference +\long\def\@makecaption#1#2{% +% test if is a for a figure or table +\ifx\@captype\@IEEEtablestring% +% if a table, do table caption +\normalsize\begin{center}{\normalfont\sffamily\normalsize {#1.}~ #2}\end{center}% +\@IEEEtablecaptionsepspace +% if not a table, format it as a figure +\else +\@IEEEfigurecaptionsepspace +\setbox\@tempboxa\hbox{\normalfont\sffamily\normalsize {#1.}~ #2}% +\ifdim \wd\@tempboxa >\hsize% +% if caption is longer than a line, let it wrap around +\setbox\@tempboxa\hbox{\normalfont\sffamily\normalsize {#1.}~ }% +\parbox[t]{\hsize}{\normalfont\sffamily\normalsize \noindent\unhbox\@tempboxa#2}% +% if caption is shorter than a line, center +\else% +\hbox to\hsize{\normalfont\sffamily\normalsize\hfil\box\@tempboxa\hfil}% +\fi\fi} +\else% nonconference compsoc +\long\def\@makecaption#1#2{% +% test if is a for a figure or table +\ifx\@captype\@IEEEtablestring% +% if a table, do table caption +\normalsize\begin{center}{\normalfont\sffamily\normalsize #1}\\{\normalfont\sffamily\normalsize #2}\end{center}% +\@IEEEtablecaptionsepspace +% if not a table, format it as a figure +\else +\@IEEEfigurecaptionsepspace +\setbox\@tempboxa\hbox{\normalfont\sffamily\normalsize {#1.}~ #2}% +\ifdim \wd\@tempboxa >\hsize% +% if caption is longer than a line, let it wrap around +\setbox\@tempboxa\hbox{\normalfont\sffamily\normalsize {#1.}~ }% +\parbox[t]{\hsize}{\normalfont\sffamily\normalsize \noindent\unhbox\@tempboxa#2}% +% if caption is shorter than a line, left justify +\else% +\hbox to\hsize{\normalfont\sffamily\normalsize\box\@tempboxa\hfil}% +\fi\fi} +\fi + +\else% traditional noncompsoc \@makecaption +\long\def\@makecaption#1#2{% +% test if is a for a figure or table +\ifx\@captype\@IEEEtablestring% +% if a table, do table caption +\footnotesize{\centering\normalfont\footnotesize#1.\qquad\scshape #2\par}% +\@IEEEtablecaptionsepspace +% if not a table, format it as a figure +\else +\@IEEEfigurecaptionsepspace +% 3/2001 use footnotesize, not small; use two nonbreaking spaces, not one +\setbox\@tempboxa\hbox{\normalfont\footnotesize {#1.}~~ #2}% +\ifdim \wd\@tempboxa >\hsize% +% if caption is longer than a line, let it wrap around +\setbox\@tempboxa\hbox{\normalfont\footnotesize {#1.}~~ }% +\parbox[t]{\hsize}{\normalfont\footnotesize\noindent\unhbox\@tempboxa#2}% +% if caption is shorter than a line, center if conference, left justify otherwise +\else% +\ifCLASSOPTIONconference \hbox to\hsize{\normalfont\footnotesize\box\@tempboxa\hfil}% +\else \hbox to\hsize{\normalfont\footnotesize\box\@tempboxa\hfil}% +\fi\fi\fi} +\fi + + + +% V1.7 disable captions class option, do so in a way that retains operation of \label +% within \caption +\ifCLASSOPTIONcaptionsoff +\long\def\@makecaption#1#2{\vspace*{2em}\footnotesize\begin{center}{\footnotesize #1}\end{center}% +\let\@IEEEtemporiglabeldefsave\label +\let\@IEEEtemplabelargsave\relax +\def\label##1{\gdef\@IEEEtemplabelargsave{##1}}% +\setbox\@tempboxa\hbox{#2}% +\let\label\@IEEEtemporiglabeldefsave +\ifx\@IEEEtemplabelargsave\relax\else\label{\@IEEEtemplabelargsave}\fi} +\fi + + +% V1.7 define end environments with \def not \let so as to work OK with +% preview-latex +\newcounter{figure} +\def\thefigure{\@arabic\c@figure} +\def\fps@figure{tbp} +\def\ftype@figure{1} +\def\ext@figure{lof} +\def\fnum@figure{\figurename~\thefigure} +\def\figure{\@float{figure}} +\def\endfigure{\end@float} +\@namedef{figure*}{\@dblfloat{figure}} +\@namedef{endfigure*}{\end@dblfloat} +\newcounter{table} +\ifCLASSOPTIONcompsoc +\def\thetable{\arabic{table}} +\else +\def\thetable{\@Roman\c@table} +\fi +\def\fps@table{tbp} +\def\ftype@table{2} +\def\ext@table{lot} +\def\fnum@table{\tablename~\thetable} +% V1.6 IEEE uses 8pt text for tables +% to default to footnotesize, we hack into LaTeX2e's \@floatboxreset and pray +\def\table{\def\@floatboxreset{\reset@font\scriptsize\@setminipage}% + \let\@makefntext\@maketablefntext + \@float{table}} +\def\endtable{\end@float} +% v1.6b double column tables need to default to footnotesize as well. +\@namedef{table*}{\def\@floatboxreset{\reset@font\scriptsize\@setminipage}\@dblfloat{table}} +\@namedef{endtable*}{\end@dblfloat} + + + + +%% +%% START OF IEEEeqnarry DEFINITIONS +%% +%% Inspired by the concepts, examples, and previous works of LaTeX +%% coders and developers such as Donald Arseneau, Fred Bartlett, +%% David Carlisle, Tony Liu, Frank Mittelbach, Piet van Oostrum, +%% Roland Winkler and Mark Wooding. +%% I don't make the claim that my work here is even near their calibre. ;) + + +% hook to allow easy changeover to IEEEtran.cls/tools.sty error reporting +\def\@IEEEclspkgerror{\ClassError{IEEEtran}} + +\newif\if@IEEEeqnarraystarform% flag to indicate if the environment was called as the star form +\@IEEEeqnarraystarformfalse + +\newif\if@advanceIEEEeqncolcnt% tracks if the environment should advance the col counter +% allows a way to make an \IEEEeqnarraybox that can be used within an \IEEEeqnarray +% used by IEEEeqnarraymulticol so that it can work properly in both +\@advanceIEEEeqncolcnttrue + +\newcount\@IEEEeqnnumcols % tracks how many IEEEeqnarray cols are defined +\newcount\@IEEEeqncolcnt % tracks how many IEEEeqnarray cols the user actually used + + +% The default math style used by the columns +\def\IEEEeqnarraymathstyle{\displaystyle} +% The default text style used by the columns +% default to using the current font +\def\IEEEeqnarraytextstyle{\relax} + +% like the iedlistdecl but for \IEEEeqnarray +\def\IEEEeqnarraydecl{\relax} +\def\IEEEeqnarrayboxdecl{\relax} + +% \yesnumber is the opposite of \nonumber +% a novel concept with the same def as the equationarray package +% However, we give IEEE versions too since some LaTeX packages such as +% the MDWtools mathenv.sty redefine \nonumber to something else. +\providecommand{\yesnumber}{\global\@eqnswtrue} +\def\IEEEyesnumber{\global\@eqnswtrue} +\def\IEEEnonumber{\global\@eqnswfalse} + + +\def\IEEEyessubnumber{\global\@IEEEissubequationtrue\global\@eqnswtrue% +\if@IEEEeqnarrayISinner% only do something inside an IEEEeqnarray +\if@IEEElastlinewassubequation\addtocounter{equation}{-1}\else\setcounter{IEEEsubequation}{1}\fi% +\def\@currentlabel{\p@IEEEsubequation\theIEEEsubequation}\fi} + +% flag to indicate that an equation is a sub equation +\newif\if@IEEEissubequation% +\@IEEEissubequationfalse + +% allows users to "push away" equations that get too close to the equation numbers +\def\IEEEeqnarraynumspace{\hphantom{\if@IEEEissubequation\theIEEEsubequationdis\else\theequationdis\fi}} + +% provides a way to span multiple columns within IEEEeqnarray environments +% will consider \if@advanceIEEEeqncolcnt before globally advancing the +% column counter - so as to work within \IEEEeqnarraybox +% usage: \IEEEeqnarraymulticol{number cols. to span}{col type}{cell text} +\long\def\IEEEeqnarraymulticol#1#2#3{\multispan{#1}% +% check if column is defined +\relax\expandafter\ifx\csname @IEEEeqnarraycolDEF#2\endcsname\@IEEEeqnarraycolisdefined% +\csname @IEEEeqnarraycolPRE#2\endcsname#3\relax\relax\relax\relax\relax% +\relax\relax\relax\relax\relax\csname @IEEEeqnarraycolPOST#2\endcsname% +\else% if not, error and use default type +\@IEEEclspkgerror{Invalid column type "#2" in \string\IEEEeqnarraymulticol.\MessageBreak +Using a default centering column instead}% +{You must define IEEEeqnarray column types before use.}% +\csname @IEEEeqnarraycolPRE@IEEEdefault\endcsname#3\relax\relax\relax\relax\relax% +\relax\relax\relax\relax\relax\csname @IEEEeqnarraycolPOST@IEEEdefault\endcsname% +\fi% +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by #1\relax\fi} + +% like \omit, but maintains track of the column counter for \IEEEeqnarray +\def\IEEEeqnarrayomit{\omit\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by 1\relax\fi} + + +% provides a way to define a letter referenced column type +% usage: \IEEEeqnarraydefcol{col. type letter/name}{pre insertion text}{post insertion text} +\def\IEEEeqnarraydefcol#1#2#3{\expandafter\def\csname @IEEEeqnarraycolPRE#1\endcsname{#2}% +\expandafter\def\csname @IEEEeqnarraycolPOST#1\endcsname{#3}% +\expandafter\def\csname @IEEEeqnarraycolDEF#1\endcsname{1}} + + +% provides a way to define a numerically referenced inter-column glue types +% usage: \IEEEeqnarraydefcolsep{col. glue number}{glue definition} +\def\IEEEeqnarraydefcolsep#1#2{\expandafter\def\csname @IEEEeqnarraycolSEP\romannumeral #1\endcsname{#2}% +\expandafter\def\csname @IEEEeqnarraycolSEPDEF\romannumeral #1\endcsname{1}} + + +\def\@IEEEeqnarraycolisdefined{1}% just a macro for 1, used for checking undefined column types + + +% expands and appends the given argument to the \@IEEEtrantmptoksA token list +% used to build up the \halign preamble +\def\@IEEEappendtoksA#1{\edef\@@IEEEappendtoksA{\@IEEEtrantmptoksA={\the\@IEEEtrantmptoksA #1}}% +\@@IEEEappendtoksA} + +% also appends to \@IEEEtrantmptoksA, but does not expand the argument +% uses \toks8 as a scratchpad register +\def\@IEEEappendNOEXPANDtoksA#1{\toks8={#1}% +\edef\@@IEEEappendNOEXPANDtoksA{\@IEEEtrantmptoksA={\the\@IEEEtrantmptoksA\the\toks8}}% +\@@IEEEappendNOEXPANDtoksA} + +% define some common column types for the user +% math +\IEEEeqnarraydefcol{l}{$\IEEEeqnarraymathstyle}{$\hfil} +\IEEEeqnarraydefcol{c}{\hfil$\IEEEeqnarraymathstyle}{$\hfil} +\IEEEeqnarraydefcol{r}{\hfil$\IEEEeqnarraymathstyle}{$} +\IEEEeqnarraydefcol{L}{$\IEEEeqnarraymathstyle{}}{{}$\hfil} +\IEEEeqnarraydefcol{C}{\hfil$\IEEEeqnarraymathstyle{}}{{}$\hfil} +\IEEEeqnarraydefcol{R}{\hfil$\IEEEeqnarraymathstyle{}}{{}$} +% text +\IEEEeqnarraydefcol{s}{\IEEEeqnarraytextstyle}{\hfil} +\IEEEeqnarraydefcol{t}{\hfil\IEEEeqnarraytextstyle}{\hfil} +\IEEEeqnarraydefcol{u}{\hfil\IEEEeqnarraytextstyle}{} + +% vertical rules +\IEEEeqnarraydefcol{v}{}{\vrule width\arrayrulewidth} +\IEEEeqnarraydefcol{vv}{\vrule width\arrayrulewidth\hfil}{\hfil\vrule width\arrayrulewidth} +\IEEEeqnarraydefcol{V}{}{\vrule width\arrayrulewidth\hskip\doublerulesep\vrule width\arrayrulewidth} +\IEEEeqnarraydefcol{VV}{\vrule width\arrayrulewidth\hskip\doublerulesep\vrule width\arrayrulewidth\hfil}% +{\hfil\vrule width\arrayrulewidth\hskip\doublerulesep\vrule width\arrayrulewidth} + +% horizontal rules +\IEEEeqnarraydefcol{h}{}{\leaders\hrule height\arrayrulewidth\hfil} +\IEEEeqnarraydefcol{H}{}{\leaders\vbox{\hrule width\arrayrulewidth\vskip\doublerulesep\hrule width\arrayrulewidth}\hfil} + +% plain +\IEEEeqnarraydefcol{x}{}{} +\IEEEeqnarraydefcol{X}{$}{$} + +% the default column type to use in the event a column type is not defined +\IEEEeqnarraydefcol{@IEEEdefault}{\hfil$\IEEEeqnarraymathstyle}{$\hfil} + + +% a zero tabskip (used for "-" col types) +\def\@IEEEeqnarraycolSEPzero{0pt plus 0pt minus 0pt} +% a centering tabskip (used for "+" col types) +\def\@IEEEeqnarraycolSEPcenter{1000pt plus 0pt minus 1000pt} + +% top level default tabskip glues for the start, end, and inter-column +% may be reset within environments not always at the top level, e.g., \IEEEeqnarraybox +\edef\@IEEEeqnarraycolSEPdefaultstart{\@IEEEeqnarraycolSEPcenter}% default start glue +\edef\@IEEEeqnarraycolSEPdefaultend{\@IEEEeqnarraycolSEPcenter}% default end glue +\edef\@IEEEeqnarraycolSEPdefaultmid{\@IEEEeqnarraycolSEPzero}% default inter-column glue + + + +% creates a vertical rule that extends from the bottom to the top a a cell +% Provided in case other packages redefine \vline some other way. +% usage: \IEEEeqnarrayvrule[rule thickness] +% If no argument is provided, \arrayrulewidth will be used for the rule thickness. +\newcommand\IEEEeqnarrayvrule[1][\arrayrulewidth]{\vrule\@width#1\relax} + +% creates a blank separator row +% usage: \IEEEeqnarrayseprow[separation length][font size commands] +% default is \IEEEeqnarrayseprow[0.25\normalbaselineskip][\relax] +% blank arguments inherit the default values +% uses \skip5 as a scratch register - calls \@IEEEeqnarraystrutsize which uses more scratch registers +\def\IEEEeqnarrayseprow{\relax\@ifnextchar[{\@IEEEeqnarrayseprow}{\@IEEEeqnarrayseprow[0.25\normalbaselineskip]}} +\def\@IEEEeqnarrayseprow[#1]{\relax\@ifnextchar[{\@@IEEEeqnarrayseprow[#1]}{\@@IEEEeqnarrayseprow[#1][\relax]}} +\def\@@IEEEeqnarrayseprow[#1][#2]{\def\@IEEEeqnarrayseprowARGONE{#1}% +\ifx\@IEEEeqnarrayseprowARGONE\@empty% +% get the skip value, based on the font commands +% use skip5 because \IEEEeqnarraystrutsize uses \skip0, \skip2, \skip3 +% assign within a bogus box to confine the font changes +{\setbox0=\hbox{#2\relax\global\skip5=0.25\normalbaselineskip}}% +\else% +{\setbox0=\hbox{#2\relax\global\skip5=#1}}% +\fi% +\@IEEEeqnarrayhoptolastcolumn\IEEEeqnarraystrutsize{\skip5}{0pt}[\relax]\relax} + +% creates a blank separator row, but omits all the column templates +% usage: \IEEEeqnarrayseprowcut[separation length][font size commands] +% default is \IEEEeqnarrayseprowcut[0.25\normalbaselineskip][\relax] +% blank arguments inherit the default values +% uses \skip5 as a scratch register - calls \@IEEEeqnarraystrutsize which uses more scratch registers +\def\IEEEeqnarrayseprowcut{\multispan{\@IEEEeqnnumcols}\relax% span all the cols +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by \@IEEEeqnnumcols\relax\fi% +\@ifnextchar[{\@IEEEeqnarrayseprowcut}{\@IEEEeqnarrayseprowcut[0.25\normalbaselineskip]}} +\def\@IEEEeqnarrayseprowcut[#1]{\relax\@ifnextchar[{\@@IEEEeqnarrayseprowcut[#1]}{\@@IEEEeqnarrayseprowcut[#1][\relax]}} +\def\@@IEEEeqnarrayseprowcut[#1][#2]{\def\@IEEEeqnarrayseprowARGONE{#1}% +\ifx\@IEEEeqnarrayseprowARGONE\@empty% +% get the skip value, based on the font commands +% use skip5 because \IEEEeqnarraystrutsize uses \skip0, \skip2, \skip3 +% assign within a bogus box to confine the font changes +{\setbox0=\hbox{#2\relax\global\skip5=0.25\normalbaselineskip}}% +\else% +{\setbox0=\hbox{#2\relax\global\skip5=#1}}% +\fi% +\IEEEeqnarraystrutsize{\skip5}{0pt}[\relax]\relax} + + + +% draws a single rule across all the columns optional +% argument determines the rule width, \arrayrulewidth is the default +% updates column counter as needed and turns off struts +% usage: \IEEEeqnarrayrulerow[rule line thickness] +\def\IEEEeqnarrayrulerow{\multispan{\@IEEEeqnnumcols}\relax% span all the cols +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by \@IEEEeqnnumcols\relax\fi% +\@ifnextchar[{\@IEEEeqnarrayrulerow}{\@IEEEeqnarrayrulerow[\arrayrulewidth]}} +\def\@IEEEeqnarrayrulerow[#1]{\leaders\hrule height#1\hfil\relax% put in our rule +% turn off any struts +\IEEEeqnarraystrutsize{0pt}{0pt}[\relax]\relax} + + +% draws a double rule by using a single rule row, a separator row, and then +% another single rule row +% first optional argument determines the rule thicknesses, \arrayrulewidth is the default +% second optional argument determines the rule spacing, \doublerulesep is the default +% usage: \IEEEeqnarraydblrulerow[rule line thickness][rule spacing] +\def\IEEEeqnarraydblrulerow{\multispan{\@IEEEeqnnumcols}\relax% span all the cols +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by \@IEEEeqnnumcols\relax\fi% +\@ifnextchar[{\@IEEEeqnarraydblrulerow}{\@IEEEeqnarraydblrulerow[\arrayrulewidth]}} +\def\@IEEEeqnarraydblrulerow[#1]{\relax\@ifnextchar[{\@@IEEEeqnarraydblrulerow[#1]}% +{\@@IEEEeqnarraydblrulerow[#1][\doublerulesep]}} +\def\@@IEEEeqnarraydblrulerow[#1][#2]{\def\@IEEEeqnarraydblrulerowARG{#1}% +% we allow the user to say \IEEEeqnarraydblrulerow[][] +\ifx\@IEEEeqnarraydblrulerowARG\@empty% +\@IEEEeqnarrayrulerow[\arrayrulewidth]% +\else% +\@IEEEeqnarrayrulerow[#1]\relax% +\fi% +\def\@IEEEeqnarraydblrulerowARG{#2}% +\ifx\@IEEEeqnarraydblrulerowARG\@empty% +\\\IEEEeqnarrayseprow[\doublerulesep][\relax]% +\else% +\\\IEEEeqnarrayseprow[#2][\relax]% +\fi% +\\\multispan{\@IEEEeqnnumcols}% +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by \@IEEEeqnnumcols\relax\fi% +\def\@IEEEeqnarraydblrulerowARG{#1}% +\ifx\@IEEEeqnarraydblrulerowARG\@empty% +\@IEEEeqnarrayrulerow[\arrayrulewidth]% +\else% +\@IEEEeqnarrayrulerow[#1]% +\fi% +} + +% draws a double rule by using a single rule row, a separator (cutting) row, and then +% another single rule row +% first optional argument determines the rule thicknesses, \arrayrulewidth is the default +% second optional argument determines the rule spacing, \doublerulesep is the default +% usage: \IEEEeqnarraydblrulerow[rule line thickness][rule spacing] +\def\IEEEeqnarraydblrulerowcut{\multispan{\@IEEEeqnnumcols}\relax% span all the cols +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by \@IEEEeqnnumcols\relax\fi% +\@ifnextchar[{\@IEEEeqnarraydblrulerowcut}{\@IEEEeqnarraydblrulerowcut[\arrayrulewidth]}} +\def\@IEEEeqnarraydblrulerowcut[#1]{\relax\@ifnextchar[{\@@IEEEeqnarraydblrulerowcut[#1]}% +{\@@IEEEeqnarraydblrulerowcut[#1][\doublerulesep]}} +\def\@@IEEEeqnarraydblrulerowcut[#1][#2]{\def\@IEEEeqnarraydblrulerowARG{#1}% +% we allow the user to say \IEEEeqnarraydblrulerow[][] +\ifx\@IEEEeqnarraydblrulerowARG\@empty% +\@IEEEeqnarrayrulerow[\arrayrulewidth]% +\else% +\@IEEEeqnarrayrulerow[#1]% +\fi% +\def\@IEEEeqnarraydblrulerowARG{#2}% +\ifx\@IEEEeqnarraydblrulerowARG\@empty% +\\\IEEEeqnarrayseprowcut[\doublerulesep][\relax]% +\else% +\\\IEEEeqnarrayseprowcut[#2][\relax]% +\fi% +\\\multispan{\@IEEEeqnnumcols}% +% advance column counter only if the IEEEeqnarray environment wants it +\if@advanceIEEEeqncolcnt\global\advance\@IEEEeqncolcnt by \@IEEEeqnnumcols\relax\fi% +\def\@IEEEeqnarraydblrulerowARG{#1}% +\ifx\@IEEEeqnarraydblrulerowARG\@empty% +\@IEEEeqnarrayrulerow[\arrayrulewidth]% +\else% +\@IEEEeqnarrayrulerow[#1]% +\fi% +} + + + +% inserts a full row's worth of &'s +% relies on \@IEEEeqnnumcols to provide the correct number of columns +% uses \@IEEEtrantmptoksA, \count0 as scratch registers +\def\@IEEEeqnarrayhoptolastcolumn{\@IEEEtrantmptoksA={}\count0=1\relax% +\loop% add cols if the user did not use them all +\ifnum\count0<\@IEEEeqnnumcols\relax% +\@IEEEappendtoksA{&}% +\advance\count0 by 1\relax% update the col count +\repeat% +\the\@IEEEtrantmptoksA%execute the &'s +} + + + +\newif\if@IEEEeqnarrayISinner % flag to indicate if we are within the lines +\@IEEEeqnarrayISinnerfalse % of an IEEEeqnarray - after the IEEEeqnarraydecl + +\edef\@IEEEeqnarrayTHEstrutheight{0pt} % height and depth of IEEEeqnarray struts +\edef\@IEEEeqnarrayTHEstrutdepth{0pt} + +\edef\@IEEEeqnarrayTHEmasterstrutheight{0pt} % default height and depth of +\edef\@IEEEeqnarrayTHEmasterstrutdepth{0pt} % struts within an IEEEeqnarray + +\edef\@IEEEeqnarrayTHEmasterstrutHSAVE{0pt} % saved master strut height +\edef\@IEEEeqnarrayTHEmasterstrutDSAVE{0pt} % and depth + +\newif\if@IEEEeqnarrayusemasterstrut % flag to indicate that the master strut value +\@IEEEeqnarrayusemasterstruttrue % is to be used + + + +% saves the strut height and depth of the master strut +\def\@IEEEeqnarraymasterstrutsave{\relax% +\expandafter\skip0=\@IEEEeqnarrayTHEmasterstrutheight\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEmasterstrutdepth\relax% +% remove stretchability +\dimen0\skip0\relax% +\dimen2\skip2\relax% +% save values +\edef\@IEEEeqnarrayTHEmasterstrutHSAVE{\the\dimen0}% +\edef\@IEEEeqnarrayTHEmasterstrutDSAVE{\the\dimen2}} + +% restores the strut height and depth of the master strut +\def\@IEEEeqnarraymasterstrutrestore{\relax% +\expandafter\skip0=\@IEEEeqnarrayTHEmasterstrutHSAVE\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEmasterstrutDSAVE\relax% +% remove stretchability +\dimen0\skip0\relax% +\dimen2\skip2\relax% +% restore values +\edef\@IEEEeqnarrayTHEmasterstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEmasterstrutdepth{\the\dimen2}} + + +% globally restores the strut height and depth to the +% master values and sets the master strut flag to true +\def\@IEEEeqnarraystrutreset{\relax% +\expandafter\skip0=\@IEEEeqnarrayTHEmasterstrutheight\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEmasterstrutdepth\relax% +% remove stretchability +\dimen0\skip0\relax% +\dimen2\skip2\relax% +% restore values +\xdef\@IEEEeqnarrayTHEstrutheight{\the\dimen0}% +\xdef\@IEEEeqnarrayTHEstrutdepth{\the\dimen2}% +\global\@IEEEeqnarrayusemasterstruttrue} + + +% if the master strut is not to be used, make the current +% values of \@IEEEeqnarrayTHEstrutheight, \@IEEEeqnarrayTHEstrutdepth +% and the use master strut flag, global +% this allows user strut commands issued in the last column to be carried +% into the isolation/strut column +\def\@IEEEeqnarrayglobalizestrutstatus{\relax% +\if@IEEEeqnarrayusemasterstrut\else% +\xdef\@IEEEeqnarrayTHEstrutheight{\@IEEEeqnarrayTHEstrutheight}% +\xdef\@IEEEeqnarrayTHEstrutdepth{\@IEEEeqnarrayTHEstrutdepth}% +\global\@IEEEeqnarrayusemasterstrutfalse% +\fi} + + + +% usage: \IEEEeqnarraystrutsize{height}{depth}[font size commands] +% If called outside the lines of an IEEEeqnarray, sets the height +% and depth of both the master and local struts. If called inside +% an IEEEeqnarray line, sets the height and depth of the local strut +% only and sets the flag to indicate the use of the local strut +% values. If the height or depth is left blank, 0.7\normalbaselineskip +% and 0.3\normalbaselineskip will be used, respectively. +% The optional argument can be used to evaluate the lengths under +% a different font size and styles. If none is specified, the current +% font is used. +% uses scratch registers \skip0, \skip2, \skip3, \dimen0, \dimen2 +\def\IEEEeqnarraystrutsize#1#2{\relax\@ifnextchar[{\@IEEEeqnarraystrutsize{#1}{#2}}{\@IEEEeqnarraystrutsize{#1}{#2}[\relax]}} +\def\@IEEEeqnarraystrutsize#1#2[#3]{\def\@IEEEeqnarraystrutsizeARG{#1}% +\ifx\@IEEEeqnarraystrutsizeARG\@empty% +{\setbox0=\hbox{#3\relax\global\skip3=0.7\normalbaselineskip}}% +\skip0=\skip3\relax% +\else% arg one present +{\setbox0=\hbox{#3\relax\global\skip3=#1\relax}}% +\skip0=\skip3\relax% +\fi% if null arg +\def\@IEEEeqnarraystrutsizeARG{#2}% +\ifx\@IEEEeqnarraystrutsizeARG\@empty% +{\setbox0=\hbox{#3\relax\global\skip3=0.3\normalbaselineskip}}% +\skip2=\skip3\relax% +\else% arg two present +{\setbox0=\hbox{#3\relax\global\skip3=#2\relax}}% +\skip2=\skip3\relax% +\fi% if null arg +% remove stretchability, just to be safe +\dimen0\skip0\relax% +\dimen2\skip2\relax% +% dimen0 = height, dimen2 = depth +\if@IEEEeqnarrayISinner% inner does not touch master strut size +\edef\@IEEEeqnarrayTHEstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEstrutdepth{\the\dimen2}% +\@IEEEeqnarrayusemasterstrutfalse% do not use master +\else% outer, have to set master strut too +\edef\@IEEEeqnarrayTHEmasterstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEmasterstrutdepth{\the\dimen2}% +\edef\@IEEEeqnarrayTHEstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEstrutdepth{\the\dimen2}% +\@IEEEeqnarrayusemasterstruttrue% use master strut +\fi} + + +% usage: \IEEEeqnarraystrutsizeadd{added height}{added depth}[font size commands] +% If called outside the lines of an IEEEeqnarray, adds the given height +% and depth to both the master and local struts. +% If called inside an IEEEeqnarray line, adds the given height and depth +% to the local strut only and sets the flag to indicate the use +% of the local strut values. +% In both cases, if a height or depth is left blank, 0pt is used instead. +% The optional argument can be used to evaluate the lengths under +% a different font size and styles. If none is specified, the current +% font is used. +% uses scratch registers \skip0, \skip2, \skip3, \dimen0, \dimen2 +\def\IEEEeqnarraystrutsizeadd#1#2{\relax\@ifnextchar[{\@IEEEeqnarraystrutsizeadd{#1}{#2}}{\@IEEEeqnarraystrutsizeadd{#1}{#2}[\relax]}} +\def\@IEEEeqnarraystrutsizeadd#1#2[#3]{\def\@IEEEeqnarraystrutsizearg{#1}% +\ifx\@IEEEeqnarraystrutsizearg\@empty% +\skip0=0pt\relax% +\else% arg one present +{\setbox0=\hbox{#3\relax\global\skip3=#1}}% +\skip0=\skip3\relax% +\fi% if null arg +\def\@IEEEeqnarraystrutsizearg{#2}% +\ifx\@IEEEeqnarraystrutsizearg\@empty% +\skip2=0pt\relax% +\else% arg two present +{\setbox0=\hbox{#3\relax\global\skip3=#2}}% +\skip2=\skip3\relax% +\fi% if null arg +% remove stretchability, just to be safe +\dimen0\skip0\relax% +\dimen2\skip2\relax% +% dimen0 = height, dimen2 = depth +\if@IEEEeqnarrayISinner% inner does not touch master strut size +% get local strut size +\expandafter\skip0=\@IEEEeqnarrayTHEstrutheight\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEstrutdepth\relax% +% add it to the user supplied values +\advance\dimen0 by \skip0\relax% +\advance\dimen2 by \skip2\relax% +% update the local strut size +\edef\@IEEEeqnarrayTHEstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEstrutdepth{\the\dimen2}% +\@IEEEeqnarrayusemasterstrutfalse% do not use master +\else% outer, have to set master strut too +% get master strut size +\expandafter\skip0=\@IEEEeqnarrayTHEmasterstrutheight\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEmasterstrutdepth\relax% +% add it to the user supplied values +\advance\dimen0 by \skip0\relax% +\advance\dimen2 by \skip2\relax% +% update the local and master strut sizes +\edef\@IEEEeqnarrayTHEmasterstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEmasterstrutdepth{\the\dimen2}% +\edef\@IEEEeqnarrayTHEstrutheight{\the\dimen0}% +\edef\@IEEEeqnarrayTHEstrutdepth{\the\dimen2}% +\@IEEEeqnarrayusemasterstruttrue% use master strut +\fi} + + +% allow user a way to see the struts +\newif\ifIEEEvisiblestruts +\IEEEvisiblestrutsfalse + +% inserts an invisible strut using the master or local strut values +% uses scratch registers \skip0, \skip2, \dimen0, \dimen2 +\def\@IEEEeqnarrayinsertstrut{\relax% +\if@IEEEeqnarrayusemasterstrut +% get master strut size +\expandafter\skip0=\@IEEEeqnarrayTHEmasterstrutheight\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEmasterstrutdepth\relax% +\else% +% get local strut size +\expandafter\skip0=\@IEEEeqnarrayTHEstrutheight\relax% +\expandafter\skip2=\@IEEEeqnarrayTHEstrutdepth\relax% +\fi% +% remove stretchability, probably not needed +\dimen0\skip0\relax% +\dimen2\skip2\relax% +% dimen0 = height, dimen2 = depth +% allow user to see struts if desired +\ifIEEEvisiblestruts% +\vrule width0.2pt height\dimen0 depth\dimen2\relax% +\else% +\vrule width0pt height\dimen0 depth\dimen2\relax\fi} + + +% creates an invisible strut, useable even outside \IEEEeqnarray +% if \IEEEvisiblestrutstrue, the strut will be visible and 0.2pt wide. +% usage: \IEEEstrut[height][depth][font size commands] +% default is \IEEEstrut[0.7\normalbaselineskip][0.3\normalbaselineskip][\relax] +% blank arguments inherit the default values +% uses \dimen0, \dimen2, \skip0, \skip2 +\def\IEEEstrut{\relax\@ifnextchar[{\@IEEEstrut}{\@IEEEstrut[0.7\normalbaselineskip]}} +\def\@IEEEstrut[#1]{\relax\@ifnextchar[{\@@IEEEstrut[#1]}{\@@IEEEstrut[#1][0.3\normalbaselineskip]}} +\def\@@IEEEstrut[#1][#2]{\relax\@ifnextchar[{\@@@IEEEstrut[#1][#2]}{\@@@IEEEstrut[#1][#2][\relax]}} +\def\@@@IEEEstrut[#1][#2][#3]{\mbox{#3\relax% +\def\@IEEEstrutARG{#1}% +\ifx\@IEEEstrutARG\@empty% +\skip0=0.7\normalbaselineskip\relax% +\else% +\skip0=#1\relax% +\fi% +\def\@IEEEstrutARG{#2}% +\ifx\@IEEEstrutARG\@empty% +\skip2=0.3\normalbaselineskip\relax% +\else% +\skip2=#2\relax% +\fi% +% remove stretchability, probably not needed +\dimen0\skip0\relax% +\dimen2\skip2\relax% +\ifIEEEvisiblestruts% +\vrule width0.2pt height\dimen0 depth\dimen2\relax% +\else% +\vrule width0.0pt height\dimen0 depth\dimen2\relax\fi}} + + +% enables strut mode by setting a default strut size and then zeroing the +% \baselineskip, \lineskip, \lineskiplimit and \jot +\def\IEEEeqnarraystrutmode{\IEEEeqnarraystrutsize{0.7\normalbaselineskip}{0.3\normalbaselineskip}[\relax]% +\baselineskip=0pt\lineskip=0pt\lineskiplimit=0pt\jot=0pt} + + + +\def\IEEEeqnarray{\@IEEEeqnarraystarformfalse\@IEEEeqnarray} +\def\endIEEEeqnarray{\end@IEEEeqnarray} + +\@namedef{IEEEeqnarray*}{\@IEEEeqnarraystarformtrue\@IEEEeqnarray} +\@namedef{endIEEEeqnarray*}{\end@IEEEeqnarray} + + +% \IEEEeqnarray is an enhanced \eqnarray. +% The star form defaults to not putting equation numbers at the end of each row. +% usage: \IEEEeqnarray[decl]{cols} +\def\@IEEEeqnarray{\relax\@ifnextchar[{\@@IEEEeqnarray}{\@@IEEEeqnarray[\relax]}} +\def\@@IEEEeqnarray[#1]#2{% + % default to showing the equation number or not based on whether or not + % the star form was involked + \if@IEEEeqnarraystarform\global\@eqnswfalse + \else% not the star form + \global\@eqnswtrue + \fi% if star form + \@IEEEissubequationfalse% default to no subequations + \@IEEElastlinewassubequationfalse% assume last line is not a sub equation + \@IEEEeqnarrayISinnerfalse% not yet within the lines of the halign + \@IEEEeqnarraystrutsize{0pt}{0pt}[\relax]% turn off struts by default + \@IEEEeqnarrayusemasterstruttrue% use master strut till user asks otherwise + \IEEEvisiblestrutsfalse% diagnostic mode defaults to off + % no extra space unless the user specifically requests it + \lineskip=0pt\relax + \lineskiplimit=0pt\relax + \baselineskip=\normalbaselineskip\relax% + \jot=\IEEEnormaljot\relax% + \mathsurround\z@\relax% no extra spacing around math + \@advanceIEEEeqncolcnttrue% advance the col counter for each col the user uses, + % used in \IEEEeqnarraymulticol and in the preamble build + \stepcounter{equation}% advance equation counter before first line + \setcounter{IEEEsubequation}{0}% no subequation yet + \def\@currentlabel{\p@equation\theequation}% redefine the ref label + \IEEEeqnarraydecl\relax% allow a way for the user to make global overrides + #1\relax% allow user to override defaults + \let\\\@IEEEeqnarraycr% replace newline with one that can put in eqn. numbers + \global\@IEEEeqncolcnt\z@% col. count = 0 for first line + \@IEEEbuildpreamble #2\end\relax% build the preamble and put it into \@IEEEtrantmptoksA + % put in the column for the equation number + \ifnum\@IEEEeqnnumcols>0\relax\@IEEEappendtoksA{&}\fi% col separator for those after the first + \toks0={##}% + % advance the \@IEEEeqncolcnt for the isolation col, this helps with error checking + \@IEEEappendtoksA{\global\advance\@IEEEeqncolcnt by 1\relax}% + % add the isolation column + \@IEEEappendtoksA{\tabskip\z@skip\bgroup\the\toks0\egroup}% + % advance the \@IEEEeqncolcnt for the equation number col, this helps with error checking + \@IEEEappendtoksA{&\global\advance\@IEEEeqncolcnt by 1\relax}% + % add the equation number col to the preamble + \@IEEEappendtoksA{\tabskip\z@skip\hb@xt@\z@\bgroup\hss\the\toks0\egroup}% + % note \@IEEEeqnnumcols does not count the equation col or isolation col + % set the starting tabskip glue as determined by the preamble build + \tabskip=\@IEEEBPstartglue\relax + % begin the display alignment + \@IEEEeqnarrayISinnertrue% commands are now within the lines + $$\everycr{}\halign to\displaywidth\bgroup + % "exspand" the preamble + \span\the\@IEEEtrantmptoksA\cr} + +% enter isolation/strut column (or the next column if the user did not use +% every column), record the strut status, complete the columns, do the strut if needed, +% restore counters to correct values and exit +\def\end@IEEEeqnarray{\@IEEEeqnarrayglobalizestrutstatus&\@@IEEEeqnarraycr\egroup% +\if@IEEElastlinewassubequation\global\advance\c@IEEEsubequation\m@ne\fi% +\global\advance\c@equation\m@ne% +$$\@ignoretrue} + +% need a way to remember if last line is a subequation +\newif\if@IEEElastlinewassubequation% +\@IEEElastlinewassubequationfalse + +% IEEEeqnarray uses a modifed \\ instead of the plain \cr to +% end rows. This allows for things like \\*[vskip amount] +% This "cr" macros are modified versions those for LaTeX2e's eqnarray +% the {\ifnum0=`} braces must be kept away from the last column to avoid +% altering spacing of its math, so we use & to advance to the next column +% as there is an isolation/strut column after the user's columns +\def\@IEEEeqnarraycr{\@IEEEeqnarrayglobalizestrutstatus&% save strut status and advance to next column + {\ifnum0=`}\fi + \@ifstar{% + \global\@eqpen\@M\@IEEEeqnarrayYCR + }{% + \global\@eqpen\interdisplaylinepenalty \@IEEEeqnarrayYCR + }% +} + +\def\@IEEEeqnarrayYCR{\@testopt\@IEEEeqnarrayXCR\z@skip} + +\def\@IEEEeqnarrayXCR[#1]{% + \ifnum0=`{\fi}% + \@@IEEEeqnarraycr + \noalign{\penalty\@eqpen\vskip\jot\vskip #1\relax}}% + +\def\@@IEEEeqnarraycr{\@IEEEtrantmptoksA={}% clear token register + \advance\@IEEEeqncolcnt by -1\relax% adjust col count because of the isolation column + \ifnum\@IEEEeqncolcnt>\@IEEEeqnnumcols\relax + \@IEEEclspkgerror{Too many columns within the IEEEeqnarray\MessageBreak + environment}% + {Use fewer \string &'s or put more columns in the IEEEeqnarry column\MessageBreak + specifications.}\relax% + \else + \loop% add cols if the user did not use them all + \ifnum\@IEEEeqncolcnt<\@IEEEeqnnumcols\relax + \@IEEEappendtoksA{&}% + \advance\@IEEEeqncolcnt by 1\relax% update the col count + \repeat + % this number of &'s will take us the the isolation column + \fi + % execute the &'s + \the\@IEEEtrantmptoksA% + % handle the strut/isolation column + \@IEEEeqnarrayinsertstrut% do the strut if needed + \@IEEEeqnarraystrutreset% reset the strut system for next line or IEEEeqnarray + &% and enter the equation number column + % is this line needs an equation number, display it and advance the + % (sub)equation counters, record what type this line was + \if@eqnsw% + \if@IEEEissubequation\theIEEEsubequationdis\addtocounter{equation}{1}\stepcounter{IEEEsubequation}% + \global\@IEEElastlinewassubequationtrue% + \else% display a standard equation number, initialize the IEEEsubequation counter + \theequationdis\stepcounter{equation}\setcounter{IEEEsubequation}{0}% + \global\@IEEElastlinewassubequationfalse\fi% + \fi% + % reset the eqnsw flag to indicate default preference of the display of equation numbers + \if@IEEEeqnarraystarform\global\@eqnswfalse\else\global\@eqnswtrue\fi + \global\@IEEEissubequationfalse% reset the subequation flag + % reset the number of columns the user actually used + \global\@IEEEeqncolcnt\z@\relax + % the real end of the line + \cr} + + + + + +% \IEEEeqnarraybox is like \IEEEeqnarray except the box form puts everything +% inside a vtop, vbox, or vcenter box depending on the letter in the second +% optional argument (t,b,c). Vbox is the default. Unlike \IEEEeqnarray, +% equation numbers are not displayed and \IEEEeqnarraybox can be nested. +% \IEEEeqnarrayboxm is for math mode (like \array) and does not put the vbox +% within an hbox. +% \IEEEeqnarrayboxt is for text mode (like \tabular) and puts the vbox within +% a \hbox{$ $} construct. +% \IEEEeqnarraybox will auto detect whether to use \IEEEeqnarrayboxm or +% \IEEEeqnarrayboxt depending on the math mode. +% The third optional argument specifies the width this box is to be set to - +% natural width is the default. +% The * forms do not add \jot line spacing +% usage: \IEEEeqnarraybox[decl][pos][width]{cols} +\def\IEEEeqnarrayboxm{\@IEEEeqnarraystarformfalse\@IEEEeqnarrayboxHBOXSWfalse\@IEEEeqnarraybox} +\def\endIEEEeqnarrayboxm{\end@IEEEeqnarraybox} +\@namedef{IEEEeqnarrayboxm*}{\@IEEEeqnarraystarformtrue\@IEEEeqnarrayboxHBOXSWfalse\@IEEEeqnarraybox} +\@namedef{endIEEEeqnarrayboxm*}{\end@IEEEeqnarraybox} + +\def\IEEEeqnarrayboxt{\@IEEEeqnarraystarformfalse\@IEEEeqnarrayboxHBOXSWtrue\@IEEEeqnarraybox} +\def\endIEEEeqnarrayboxt{\end@IEEEeqnarraybox} +\@namedef{IEEEeqnarrayboxt*}{\@IEEEeqnarraystarformtrue\@IEEEeqnarrayboxHBOXSWtrue\@IEEEeqnarraybox} +\@namedef{endIEEEeqnarrayboxt*}{\end@IEEEeqnarraybox} + +\def\IEEEeqnarraybox{\@IEEEeqnarraystarformfalse\ifmmode\@IEEEeqnarrayboxHBOXSWfalse\else\@IEEEeqnarrayboxHBOXSWtrue\fi% +\@IEEEeqnarraybox} +\def\endIEEEeqnarraybox{\end@IEEEeqnarraybox} + +\@namedef{IEEEeqnarraybox*}{\@IEEEeqnarraystarformtrue\ifmmode\@IEEEeqnarrayboxHBOXSWfalse\else\@IEEEeqnarrayboxHBOXSWtrue\fi% +\@IEEEeqnarraybox} +\@namedef{endIEEEeqnarraybox*}{\end@IEEEeqnarraybox} + +% flag to indicate if the \IEEEeqnarraybox needs to put things into an hbox{$ $} +% for \vcenter in non-math mode +\newif\if@IEEEeqnarrayboxHBOXSW% +\@IEEEeqnarrayboxHBOXSWfalse + +\def\@IEEEeqnarraybox{\relax\@ifnextchar[{\@@IEEEeqnarraybox}{\@@IEEEeqnarraybox[\relax]}} +\def\@@IEEEeqnarraybox[#1]{\relax\@ifnextchar[{\@@@IEEEeqnarraybox[#1]}{\@@@IEEEeqnarraybox[#1][b]}} +\def\@@@IEEEeqnarraybox[#1][#2]{\relax\@ifnextchar[{\@@@@IEEEeqnarraybox[#1][#2]}{\@@@@IEEEeqnarraybox[#1][#2][\relax]}} + +% #1 = decl; #2 = t,b,c; #3 = width, #4 = col specs +\def\@@@@IEEEeqnarraybox[#1][#2][#3]#4{\@IEEEeqnarrayISinnerfalse % not yet within the lines of the halign + \@IEEEeqnarraymasterstrutsave% save current master strut values + \@IEEEeqnarraystrutsize{0pt}{0pt}[\relax]% turn off struts by default + \@IEEEeqnarrayusemasterstruttrue% use master strut till user asks otherwise + \IEEEvisiblestrutsfalse% diagnostic mode defaults to off + % no extra space unless the user specifically requests it + \lineskip=0pt\relax% + \lineskiplimit=0pt\relax% + \baselineskip=\normalbaselineskip\relax% + \jot=\IEEEnormaljot\relax% + \mathsurround\z@\relax% no extra spacing around math + % the default end glues are zero for an \IEEEeqnarraybox + \edef\@IEEEeqnarraycolSEPdefaultstart{\@IEEEeqnarraycolSEPzero}% default start glue + \edef\@IEEEeqnarraycolSEPdefaultend{\@IEEEeqnarraycolSEPzero}% default end glue + \edef\@IEEEeqnarraycolSEPdefaultmid{\@IEEEeqnarraycolSEPzero}% default inter-column glue + \@advanceIEEEeqncolcntfalse% do not advance the col counter for each col the user uses, + % used in \IEEEeqnarraymulticol and in the preamble build + \IEEEeqnarrayboxdecl\relax% allow a way for the user to make global overrides + #1\relax% allow user to override defaults + \let\\\@IEEEeqnarrayboxcr% replace newline with one that allows optional spacing + \@IEEEbuildpreamble #4\end\relax% build the preamble and put it into \@IEEEtrantmptoksA + % add an isolation column to the preamble to stop \\'s {} from getting into the last col + \ifnum\@IEEEeqnnumcols>0\relax\@IEEEappendtoksA{&}\fi% col separator for those after the first + \toks0={##}% + % add the isolation column to the preamble + \@IEEEappendtoksA{\tabskip\z@skip\bgroup\the\toks0\egroup}% + % set the starting tabskip glue as determined by the preamble build + \tabskip=\@IEEEBPstartglue\relax + % begin the alignment + \everycr{}% + % use only the very first token to determine the positioning + % this stops some problems when the user uses more than one letter, + % but is probably not worth the effort + % \noindent is used as a delimiter + \def\@IEEEgrabfirstoken##1##2\noindent{\let\@IEEEgrabbedfirstoken=##1}% + \@IEEEgrabfirstoken#2\relax\relax\noindent + % \@IEEEgrabbedfirstoken has the first token, the rest are discarded + % if we need to put things into and hbox and go into math mode, do so now + \if@IEEEeqnarrayboxHBOXSW \leavevmode \hbox \bgroup $\fi% + % use the appropriate vbox type + \if\@IEEEgrabbedfirstoken t\relax\vtop\else\if\@IEEEgrabbedfirstoken c\relax% + \vcenter\else\vbox\fi\fi\bgroup% + \@IEEEeqnarrayISinnertrue% commands are now within the lines + \ifx#3\relax\halign\else\halign to #3\relax\fi% + \bgroup + % "exspand" the preamble + \span\the\@IEEEtrantmptoksA\cr} + +% carry strut status and enter the isolation/strut column, +% exit from math mode if needed, and exit +\def\end@IEEEeqnarraybox{\@IEEEeqnarrayglobalizestrutstatus% carry strut status +&% enter isolation/strut column +\@IEEEeqnarrayinsertstrut% do strut if needed +\@IEEEeqnarraymasterstrutrestore% restore the previous master strut values +% reset the strut system for next IEEEeqnarray +% (sets local strut values back to previous master strut values) +\@IEEEeqnarraystrutreset% +% ensure last line, exit from halign, close vbox +\crcr\egroup\egroup% +% exit from math mode and close hbox if needed +\if@IEEEeqnarrayboxHBOXSW $\egroup\fi} + + + +% IEEEeqnarraybox uses a modifed \\ instead of the plain \cr to +% end rows. This allows for things like \\[vskip amount] +% This "cr" macros are modified versions those for LaTeX2e's eqnarray +% For IEEEeqnarraybox, \\* is the same as \\ +% the {\ifnum0=`} braces must be kept away from the last column to avoid +% altering spacing of its math, so we use & to advance to the isolation/strut column +% carry strut status into isolation/strut column +\def\@IEEEeqnarrayboxcr{\@IEEEeqnarrayglobalizestrutstatus% carry strut status +&% enter isolation/strut column +\@IEEEeqnarrayinsertstrut% do strut if needed +% reset the strut system for next line or IEEEeqnarray +\@IEEEeqnarraystrutreset% +{\ifnum0=`}\fi% +\@ifstar{\@IEEEeqnarrayboxYCR}{\@IEEEeqnarrayboxYCR}} + +% test and setup the optional argument to \\[] +\def\@IEEEeqnarrayboxYCR{\@testopt\@IEEEeqnarrayboxXCR\z@skip} + +% IEEEeqnarraybox does not automatically increase line spacing by \jot +\def\@IEEEeqnarrayboxXCR[#1]{\ifnum0=`{\fi}% +\cr\noalign{\if@IEEEeqnarraystarform\else\vskip\jot\fi\vskip#1\relax}} + + + +% starts the halign preamble build +\def\@IEEEbuildpreamble{\@IEEEtrantmptoksA={}% clear token register +\let\@IEEEBPcurtype=u%current column type is not yet known +\let\@IEEEBPprevtype=s%the previous column type was the start +\let\@IEEEBPnexttype=u%next column type is not yet known +% ensure these are valid +\def\@IEEEBPcurglue={0pt plus 0pt minus 0pt}% +\def\@IEEEBPcurcolname{@IEEEdefault}% name of current column definition +% currently acquired numerically referenced glue +% use a name that is easier to remember +\let\@IEEEBPcurnum=\@IEEEtrantmpcountA% +\@IEEEBPcurnum=0% +% tracks number of columns in the preamble +\@IEEEeqnnumcols=0% +% record the default end glues +\edef\@IEEEBPstartglue{\@IEEEeqnarraycolSEPdefaultstart}% +\edef\@IEEEBPendglue{\@IEEEeqnarraycolSEPdefaultend}% +% now parse the user's column specifications +\@@IEEEbuildpreamble} + + +% parses and builds the halign preamble +\def\@@IEEEbuildpreamble#1#2{\let\@@nextIEEEbuildpreamble=\@@IEEEbuildpreamble% +% use only the very first token to check the end +% \noindent is used as a delimiter as \end can be present here +\def\@IEEEgrabfirstoken##1##2\noindent{\let\@IEEEgrabbedfirstoken=##1}% +\@IEEEgrabfirstoken#1\relax\relax\noindent +\ifx\@IEEEgrabbedfirstoken\end\let\@@nextIEEEbuildpreamble=\@@IEEEfinishpreamble\else% +% identify current and next token type +\@IEEEgetcoltype{#1}{\@IEEEBPcurtype}{1}% current, error on invalid +\@IEEEgetcoltype{#2}{\@IEEEBPnexttype}{0}% next, no error on invalid next +% if curtype is a glue, get the glue def +\if\@IEEEBPcurtype g\@IEEEgetcurglue{#1}{\@IEEEBPcurglue}\fi% +% if curtype is a column, get the column def and set the current column name +\if\@IEEEBPcurtype c\@IEEEgetcurcol{#1}\fi% +% if curtype is a numeral, acquire the user defined glue +\if\@IEEEBPcurtype n\@IEEEprocessNcol{#1}\fi% +% process the acquired glue +\if\@IEEEBPcurtype g\@IEEEprocessGcol\fi% +% process the acquired col +\if\@IEEEBPcurtype c\@IEEEprocessCcol\fi% +% ready prevtype for next col spec. +\let\@IEEEBPprevtype=\@IEEEBPcurtype% +% be sure and put back the future token(s) as a group +\fi\@@nextIEEEbuildpreamble{#2}} + + +% executed just after preamble build is completed +% warn about zero cols, and if prevtype type = u, put in end tabskip glue +\def\@@IEEEfinishpreamble#1{\ifnum\@IEEEeqnnumcols<1\relax +\@IEEEclspkgerror{No column specifiers declared for IEEEeqnarray}% +{At least one column type must be declared for each IEEEeqnarray.}% +\fi%num cols less than 1 +%if last type undefined, set default end tabskip glue +\if\@IEEEBPprevtype u\@IEEEappendtoksA{\tabskip=\@IEEEBPendglue}\fi} + + +% Identify and return the column specifier's type code +\def\@IEEEgetcoltype#1#2#3{% +% use only the very first token to determine the type +% \noindent is used as a delimiter as \end can be present here +\def\@IEEEgrabfirstoken##1##2\noindent{\let\@IEEEgrabbedfirstoken=##1}% +\@IEEEgrabfirstoken#1\relax\relax\noindent +% \@IEEEgrabfirstoken has the first token, the rest are discarded +% n = number +% g = glue (any other char in catagory 12) +% c = letter +% e = \end +% u = undefined +% third argument: 0 = no error message, 1 = error on invalid char +\let#2=u\relax% assume invalid until know otherwise +\ifx\@IEEEgrabbedfirstoken\end\let#2=e\else +\ifcat\@IEEEgrabbedfirstoken\relax\else% screen out control sequences +\if0\@IEEEgrabbedfirstoken\let#2=n\else +\if1\@IEEEgrabbedfirstoken\let#2=n\else +\if2\@IEEEgrabbedfirstoken\let#2=n\else +\if3\@IEEEgrabbedfirstoken\let#2=n\else +\if4\@IEEEgrabbedfirstoken\let#2=n\else +\if5\@IEEEgrabbedfirstoken\let#2=n\else +\if6\@IEEEgrabbedfirstoken\let#2=n\else +\if7\@IEEEgrabbedfirstoken\let#2=n\else +\if8\@IEEEgrabbedfirstoken\let#2=n\else +\if9\@IEEEgrabbedfirstoken\let#2=n\else +\ifcat,\@IEEEgrabbedfirstoken\let#2=g\relax +\else\ifcat a\@IEEEgrabbedfirstoken\let#2=c\relax\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi +\if#2u\relax +\if0\noexpand#3\relax\else\@IEEEclspkgerror{Invalid character in column specifications}% +{Only letters, numerals and certain other symbols are allowed \MessageBreak +as IEEEeqnarray column specifiers.}\fi\fi} + + +% identify the current letter referenced column +% if invalid, use a default column +\def\@IEEEgetcurcol#1{\expandafter\ifx\csname @IEEEeqnarraycolDEF#1\endcsname\@IEEEeqnarraycolisdefined% +\def\@IEEEBPcurcolname{#1}\else% invalid column name +\@IEEEclspkgerror{Invalid column type "#1" in column specifications.\MessageBreak +Using a default centering column instead}% +{You must define IEEEeqnarray column types before use.}% +\def\@IEEEBPcurcolname{@IEEEdefault}\fi} + + +% identify and return the predefined (punctuation) glue value +\def\@IEEEgetcurglue#1#2{% +% ! = \! (neg small) -0.16667em (-3/18 em) +% , = \, (small) 0.16667em ( 3/18 em) +% : = \: (med) 0.22222em ( 4/18 em) +% ; = \; (large) 0.27778em ( 5/18 em) +% ' = \quad 1em +% " = \qquad 2em +% . = 0.5\arraycolsep +% / = \arraycolsep +% ? = 2\arraycolsep +% * = 1fil +% + = \@IEEEeqnarraycolSEPcenter +% - = \@IEEEeqnarraycolSEPzero +% Note that all em values are referenced to the math font (textfont2) fontdimen6 +% value for 1em. +% +% use only the very first token to determine the type +% this prevents errant tokens from getting in the main text +% \noindent is used as a delimiter here +\def\@IEEEgrabfirstoken##1##2\noindent{\let\@IEEEgrabbedfirstoken=##1}% +\@IEEEgrabfirstoken#1\relax\relax\noindent +% get the math font 1em value +% LaTeX2e's NFSS2 does not preload the fonts, but \IEEEeqnarray needs +% to gain access to the math (\textfont2) font's spacing parameters. +% So we create a bogus box here that uses the math font to ensure +% that \textfont2 is loaded and ready. If this is not done, +% the \textfont2 stuff here may not work. +% Thanks to Bernd Raichle for his 1997 post on this topic. +{\setbox0=\hbox{$\displaystyle\relax$}}% +% fontdimen6 has the width of 1em (a quad). +\@IEEEtrantmpdimenA=\fontdimen6\textfont2\relax% +% identify the glue value based on the first token +% we discard anything after the first +\if!\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=-0.16667\@IEEEtrantmpdimenA\edef#2{\the\@IEEEtrantmpdimenA}\else +\if,\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=0.16667\@IEEEtrantmpdimenA\edef#2{\the\@IEEEtrantmpdimenA}\else +\if:\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=0.22222\@IEEEtrantmpdimenA\edef#2{\the\@IEEEtrantmpdimenA}\else +\if;\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=0.27778\@IEEEtrantmpdimenA\edef#2{\the\@IEEEtrantmpdimenA}\else +\if'\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=1\@IEEEtrantmpdimenA\edef#2{\the\@IEEEtrantmpdimenA}\else +\if"\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=2\@IEEEtrantmpdimenA\edef#2{\the\@IEEEtrantmpdimenA}\else +\if.\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=0.5\arraycolsep\edef#2{\the\@IEEEtrantmpdimenA}\else +\if/\@IEEEgrabbedfirstoken\edef#2{\the\arraycolsep}\else +\if?\@IEEEgrabbedfirstoken\@IEEEtrantmpdimenA=2\arraycolsep\edef#2{\the\@IEEEtrantmpdimenA}\else +\if *\@IEEEgrabbedfirstoken\edef#2{0pt plus 1fil minus 0pt}\else +\if+\@IEEEgrabbedfirstoken\edef#2{\@IEEEeqnarraycolSEPcenter}\else +\if-\@IEEEgrabbedfirstoken\edef#2{\@IEEEeqnarraycolSEPzero}\else +\edef#2{\@IEEEeqnarraycolSEPzero}% +\@IEEEclspkgerror{Invalid predefined inter-column glue type "#1" in\MessageBreak +column specifications. Using a default value of\MessageBreak +0pt instead}% +{Only !,:;'"./?*+ and - are valid predefined glue types in the\MessageBreak +IEEEeqnarray column specifications.}\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi} + + + +% process a numerical digit from the column specification +% and look up the corresponding user defined glue value +% can transform current type from n to g or a as the user defined glue is acquired +\def\@IEEEprocessNcol#1{\if\@IEEEBPprevtype g% +\@IEEEclspkgerror{Back-to-back inter-column glue specifiers in column\MessageBreak +specifications. Ignoring consecutive glue specifiers\MessageBreak +after the first}% +{You cannot have two or more glue types next to each other\MessageBreak +in the IEEEeqnarray column specifications.}% +\let\@IEEEBPcurtype=a% abort this glue, future digits will be discarded +\@IEEEBPcurnum=0\relax% +\else% if we previously aborted a glue +\if\@IEEEBPprevtype a\@IEEEBPcurnum=0\let\@IEEEBPcurtype=a%maintain digit abortion +\else%acquire this number +% save the previous type before the numerical digits started +\if\@IEEEBPprevtype n\else\let\@IEEEBPprevsavedtype=\@IEEEBPprevtype\fi% +\multiply\@IEEEBPcurnum by 10\relax% +\advance\@IEEEBPcurnum by #1\relax% add in number, \relax is needed to stop TeX's number scan +\if\@IEEEBPnexttype n\else%close acquisition +\expandafter\ifx\csname @IEEEeqnarraycolSEPDEF\expandafter\romannumeral\number\@IEEEBPcurnum\endcsname\@IEEEeqnarraycolisdefined% +\edef\@IEEEBPcurglue{\csname @IEEEeqnarraycolSEP\expandafter\romannumeral\number\@IEEEBPcurnum\endcsname}% +\else%user glue not defined +\@IEEEclspkgerror{Invalid user defined inter-column glue type "\number\@IEEEBPcurnum" in\MessageBreak +column specifications. Using a default value of\MessageBreak +0pt instead}% +{You must define all IEEEeqnarray numerical inter-column glue types via\MessageBreak +\string\IEEEeqnarraydefcolsep \space before they are used in column specifications.}% +\edef\@IEEEBPcurglue{\@IEEEeqnarraycolSEPzero}% +\fi% glue defined or not +\let\@IEEEBPcurtype=g% change the type to reflect the acquired glue +\let\@IEEEBPprevtype=\@IEEEBPprevsavedtype% restore the prev type before this number glue +\@IEEEBPcurnum=0\relax%ready for next acquisition +\fi%close acquisition, get glue +\fi%discard or acquire number +\fi%prevtype glue or not +} + + +% process an acquired glue +% add any acquired column/glue pair to the preamble +\def\@IEEEprocessGcol{\if\@IEEEBPprevtype a\let\@IEEEBPcurtype=a%maintain previous glue abortions +\else +% if this is the start glue, save it, but do nothing else +% as this is not used in the preamble, but before +\if\@IEEEBPprevtype s\edef\@IEEEBPstartglue{\@IEEEBPcurglue}% +\else%not the start glue +\if\@IEEEBPprevtype g%ignore if back to back glues +\@IEEEclspkgerror{Back-to-back inter-column glue specifiers in column\MessageBreak +specifications. Ignoring consecutive glue specifiers\MessageBreak +after the first}% +{You cannot have two or more glue types next to each other\MessageBreak +in the IEEEeqnarray column specifications.}% +\let\@IEEEBPcurtype=a% abort this glue +\else% not a back to back glue +\if\@IEEEBPprevtype c\relax% if the previoustype was a col, add column/glue pair to preamble +\ifnum\@IEEEeqnnumcols>0\relax\@IEEEappendtoksA{&}\fi +\toks0={##}% +% make preamble advance col counter if this environment needs this +\if@advanceIEEEeqncolcnt\@IEEEappendtoksA{\global\advance\@IEEEeqncolcnt by 1\relax}\fi +% insert the column defintion into the preamble, being careful not to expand +% the column definition +\@IEEEappendtoksA{\tabskip=\@IEEEBPcurglue}% +\@IEEEappendNOEXPANDtoksA{\begingroup\csname @IEEEeqnarraycolPRE}% +\@IEEEappendtoksA{\@IEEEBPcurcolname}% +\@IEEEappendNOEXPANDtoksA{\endcsname}% +\@IEEEappendtoksA{\the\toks0}% +\@IEEEappendNOEXPANDtoksA{\relax\relax\relax\relax\relax% +\relax\relax\relax\relax\relax\csname @IEEEeqnarraycolPOST}% +\@IEEEappendtoksA{\@IEEEBPcurcolname}% +\@IEEEappendNOEXPANDtoksA{\endcsname\relax\relax\relax\relax\relax% +\relax\relax\relax\relax\relax\endgroup}% +\advance\@IEEEeqnnumcols by 1\relax%one more column in the preamble +\else% error: non-start glue with no pending column +\@IEEEclspkgerror{Inter-column glue specifier without a prior column\MessageBreak +type in the column specifications. Ignoring this glue\MessageBreak +specifier}% +{Except for the first and last positions, glue can be placed only\MessageBreak +between column types.}% +\let\@IEEEBPcurtype=a% abort this glue +\fi% previous was a column +\fi% back-to-back glues +\fi% is start column glue +\fi% prev type not a +} + + +% process an acquired letter referenced column and, if necessary, add it to the preamble +\def\@IEEEprocessCcol{\if\@IEEEBPnexttype g\else +\if\@IEEEBPnexttype n\else +% we have a column followed by something other than a glue (or numeral glue) +% so we must add this column to the preamble now +\ifnum\@IEEEeqnnumcols>0\relax\@IEEEappendtoksA{&}\fi%col separator for those after the first +\if\@IEEEBPnexttype e\@IEEEappendtoksA{\tabskip=\@IEEEBPendglue\relax}\else%put in end glue +\@IEEEappendtoksA{\tabskip=\@IEEEeqnarraycolSEPdefaultmid\relax}\fi% or default mid glue +\toks0={##}% +% make preamble advance col counter if this environment needs this +\if@advanceIEEEeqncolcnt\@IEEEappendtoksA{\global\advance\@IEEEeqncolcnt by 1\relax}\fi +% insert the column definition into the preamble, being careful not to expand +% the column definition +\@IEEEappendNOEXPANDtoksA{\begingroup\csname @IEEEeqnarraycolPRE}% +\@IEEEappendtoksA{\@IEEEBPcurcolname}% +\@IEEEappendNOEXPANDtoksA{\endcsname}% +\@IEEEappendtoksA{\the\toks0}% +\@IEEEappendNOEXPANDtoksA{\relax\relax\relax\relax\relax% +\relax\relax\relax\relax\relax\csname @IEEEeqnarraycolPOST}% +\@IEEEappendtoksA{\@IEEEBPcurcolname}% +\@IEEEappendNOEXPANDtoksA{\endcsname\relax\relax\relax\relax\relax% +\relax\relax\relax\relax\relax\endgroup}% +\advance\@IEEEeqnnumcols by 1\relax%one more column in the preamble +\fi%next type not numeral +\fi%next type not glue +} + + +%% +%% END OF IEEEeqnarry DEFINITIONS +%% + + + + +% set up the running headings, this complex because of all the different +% modes IEEEtran supports +\if@twoside + \ifCLASSOPTIONtechnote + \def\ps@headings{% + \def\@oddhead{\hbox{}\scriptsize\leftmark \hfil \thepage} + \def\@evenhead{\scriptsize\thepage \hfil \leftmark\hbox{}} + \ifCLASSOPTIONdraftcls + \ifCLASSOPTIONdraftclsnofoot + \def\@oddfoot{}\def\@evenfoot{}% + \else + \def\@oddfoot{\scriptsize\@date\hfil DRAFT} + \def\@evenfoot{\scriptsize DRAFT\hfil\@date} + \fi + \else + \def\@oddfoot{}\def\@evenfoot{} + \fi} + \else % not a technote + \def\ps@headings{% + \ifCLASSOPTIONconference + \def\@oddhead{} + \def\@evenhead{} + \else + \def\@oddhead{\hbox{}\scriptsize\rightmark \hfil \thepage} + \def\@evenhead{\scriptsize\thepage \hfil \leftmark\hbox{}} + \fi + \ifCLASSOPTIONdraftcls + \def\@oddhead{\hbox{}\scriptsize\rightmark \hfil \thepage} + \def\@evenhead{\scriptsize\thepage \hfil \leftmark\hbox{}} + \ifCLASSOPTIONdraftclsnofoot + \def\@oddfoot{}\def\@evenfoot{}% + \else + \def\@oddfoot{\scriptsize\@date\hfil DRAFT} + \def\@evenfoot{\scriptsize DRAFT\hfil\@date} + \fi + \else + \def\@oddfoot{}\def\@evenfoot{}% + \fi} + \fi +\else % single side +\def\ps@headings{% + \ifCLASSOPTIONconference + \def\@oddhead{} + \def\@evenhead{} + \else + \def\@oddhead{\hbox{}\scriptsize\leftmark \hfil \thepage} + \def\@evenhead{} + \fi + \ifCLASSOPTIONdraftcls + \def\@oddhead{\hbox{}\scriptsize\leftmark \hfil \thepage} + \def\@evenhead{} + \ifCLASSOPTIONdraftclsnofoot + \def\@oddfoot{} + \else + \def\@oddfoot{\scriptsize \@date \hfil DRAFT} + \fi + \else + \def\@oddfoot{} + \fi + \def\@evenfoot{}} +\fi + + +% title page style +\def\ps@IEEEtitlepagestyle{\def\@oddfoot{}\def\@evenfoot{}% +\ifCLASSOPTIONconference + \def\@oddhead{}% + \def\@evenhead{}% +\else + \def\@oddhead{\hbox{}\scriptsize\leftmark \hfil \thepage}% + \def\@evenhead{\scriptsize\thepage \hfil \leftmark\hbox{}}% +\fi +\ifCLASSOPTIONdraftcls + \def\@oddhead{\hbox{}\scriptsize\leftmark \hfil \thepage}% + \def\@evenhead{\scriptsize\thepage \hfil \leftmark\hbox{}}% + \ifCLASSOPTIONdraftclsnofoot\else + \def\@oddfoot{\scriptsize \@date\hfil DRAFT}% + \def\@evenfoot{\scriptsize DRAFT\hfil \@date}% + \fi +\else + % all non-draft mode footers + \if@IEEEusingpubid + % for title pages that are using a pubid + % do not repeat pubid if using peer review option + \ifCLASSOPTIONpeerreview + \else + \footskip 0pt% + \ifCLASSOPTIONcompsoc + \def\@oddfoot{\hss\normalfont\scriptsize\raisebox{-1.5\@IEEEnormalsizeunitybaselineskip}[0ex][0ex]{\@IEEEpubid}\hss}% + \def\@evenfoot{\hss\normalfont\scriptsize\raisebox{-1.5\@IEEEnormalsizeunitybaselineskip}[0ex][0ex]{\@IEEEpubid}\hss}% + \else + \def\@oddfoot{\hss\normalfont\footnotesize\raisebox{1.5ex}[1.5ex]{\@IEEEpubid}\hss}% + \def\@evenfoot{\hss\normalfont\footnotesize\raisebox{1.5ex}[1.5ex]{\@IEEEpubid}\hss}% + \fi + \fi + \fi +\fi} + + +% peer review cover page style +\def\ps@IEEEpeerreviewcoverpagestyle{% +\def\@oddhead{}\def\@evenhead{}% +\def\@oddfoot{}\def\@evenfoot{}% +\ifCLASSOPTIONdraftcls + \ifCLASSOPTIONdraftclsnofoot\else + \def\@oddfoot{\scriptsize \@date\hfil DRAFT}% + \def\@evenfoot{\scriptsize DRAFT\hfil \@date}% + \fi +\else + % non-draft mode footers + \if@IEEEusingpubid + \footskip 0pt% + \ifCLASSOPTIONcompsoc + \def\@oddfoot{\hss\normalfont\scriptsize\raisebox{-1.5\@IEEEnormalsizeunitybaselineskip}[0ex][0ex]{\@IEEEpubid}\hss}% + \def\@evenfoot{\hss\normalfont\scriptsize\raisebox{-1.5\@IEEEnormalsizeunitybaselineskip}[0ex][0ex]{\@IEEEpubid}\hss}% + \else + \def\@oddfoot{\hss\normalfont\footnotesize\raisebox{1.5ex}[1.5ex]{\@IEEEpubid}\hss}% + \def\@evenfoot{\hss\normalfont\footnotesize\raisebox{1.5ex}[1.5ex]{\@IEEEpubid}\hss}% + \fi + \fi +\fi} + + +% start with empty headings +\def\rightmark{}\def\leftmark{} + + +%% Defines the command for putting the header. \footernote{TEXT} is the same +%% as \markboth{TEXT}{TEXT}. +%% Note that all the text is forced into uppercase, if you have some text +%% that needs to be in lower case, for instance et. al., then either manually +%% set \leftmark and \rightmark or use \MakeLowercase{et. al.} within the +%% arguments to \markboth. +\def\markboth#1#2{\def\leftmark{\@IEEEcompsoconly{\sffamily}\MakeUppercase{#1}}% +\def\rightmark{\@IEEEcompsoconly{\sffamily}\MakeUppercase{#2}}} +\def\footernote#1{\markboth{#1}{#1}} + +\def\today{\ifcase\month\or + January\or February\or March\or April\or May\or June\or + July\or August\or September\or October\or November\or December\fi + \space\number\day, \number\year} + + + + +%% CITATION AND BIBLIOGRAPHY COMMANDS +%% +%% V1.6 no longer supports the older, nonstandard \shortcite and \citename setup stuff +% +% +% Modify Latex2e \@citex to separate citations with "], [" +\def\@citex[#1]#2{% + \let\@citea\@empty + \@cite{\@for\@citeb:=#2\do + {\@citea\def\@citea{], [}% + \edef\@citeb{\expandafter\@firstofone\@citeb\@empty}% + \if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi + \@ifundefined{b@\@citeb}{\mbox{\reset@font\bfseries ?}% + \G@refundefinedtrue + \@latex@warning + {Citation `\@citeb' on page \thepage \space undefined}}% + {\hbox{\csname b@\@citeb\endcsname}}}}{#1}} + +% V1.6 we create hooks for the optional use of Donald Arseneau's +% cite.sty package. cite.sty is "smart" and will notice that the +% following format controls are already defined and will not +% redefine them. The result will be the proper sorting of the +% citation numbers and auto detection of 3 or more entry "ranges" - +% all in IEEE style: [1], [2], [5]--[7], [12] +% This also allows for an optional note, i.e., \cite[mynote]{..}. +% If the \cite with note has more than one reference, the note will +% be applied to the last of the listed references. It is generally +% desired that if a note is given, only one reference is listed in +% that \cite. +% Thanks to Mr. Arseneau for providing the required format arguments +% to produce the IEEE style. +\def\citepunct{], [} +\def\citedash{]--[} + +% V1.7 default to using same font for urls made by url.sty +\AtBeginDocument{\csname url@samestyle\endcsname} + +% V1.6 class files should always provide these +\def\newblock{\hskip .11em\@plus.33em\@minus.07em} +\let\@openbib@code\@empty + + +% Provide support for the control entries of IEEEtran.bst V1.00 and later. +% V1.7 optional argument allows for a different aux file to be specified in +% order to handle multiple bibliographies. For example, with multibib.sty: +% \newcites{sec}{Secondary Literature} +% \bstctlcite[@auxoutsec]{BSTcontrolhak} +\def\bstctlcite{\@ifnextchar[{\@bstctlcite}{\@bstctlcite[@auxout]}} +\def\@bstctlcite[#1]#2{\@bsphack + \@for\@citeb:=#2\do{% + \edef\@citeb{\expandafter\@firstofone\@citeb}% + \if@filesw\immediate\write\csname #1\endcsname{\string\citation{\@citeb}}\fi}% + \@esphack} + +% V1.6 provide a way for a user to execute a command just before +% a given reference number - used to insert a \newpage to balance +% the columns on the last page +\edef\@IEEEtriggerrefnum{0} % the default of zero means that + % the command is not executed +\def\@IEEEtriggercmd{\newpage} + +% allow the user to alter the triggered command +\long\def\IEEEtriggercmd#1{\long\def\@IEEEtriggercmd{#1}} + +% allow user a way to specify the reference number just before the +% command is executed +\def\IEEEtriggeratref#1{\@IEEEtrantmpcountA=#1% +\edef\@IEEEtriggerrefnum{\the\@IEEEtrantmpcountA}}% + +% trigger command at the given reference +\def\@IEEEbibitemprefix{\@IEEEtrantmpcountA=\@IEEEtriggerrefnum\relax% +\advance\@IEEEtrantmpcountA by -1\relax% +\ifnum\c@enumiv=\@IEEEtrantmpcountA\relax\@IEEEtriggercmd\relax\fi} + + +\def\@biblabel#1{[#1]} + +% compsoc journals left align the reference numbers +\@IEEEcompsocnotconfonly{\def\@biblabel#1{[#1]\hfill}} + +% controls bib item spacing +\def\IEEEbibitemsep{2.5pt plus .5pt} + +\@IEEEcompsocconfonly{\def\IEEEbibitemsep{1\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip}} + + +\def\thebibliography#1{\section*{\refname}% + \addcontentsline{toc}{section}{\refname}% + % V1.6 add some rubber space here and provide a command trigger + \footnotesize\@IEEEcompsocconfonly{\small}\vskip 0.3\baselineskip plus 0.1\baselineskip minus 0.1\baselineskip% + \list{\@biblabel{\@arabic\c@enumiv}}% + {\settowidth\labelwidth{\@biblabel{#1}}% + \leftmargin\labelwidth + \labelsep 1em + \advance\leftmargin\labelsep\relax + \itemsep \IEEEbibitemsep\relax + \usecounter{enumiv}% + \let\p@enumiv\@empty + \renewcommand\theenumiv{\@arabic\c@enumiv}}% + \let\@IEEElatexbibitem\bibitem% + \def\bibitem{\@IEEEbibitemprefix\@IEEElatexbibitem}% +\def\newblock{\hskip .11em plus .33em minus .07em}% +% originally: +% \sloppy\clubpenalty4000\widowpenalty4000% +% by adding the \interlinepenalty here, we make it more +% difficult, but not impossible, for LaTeX to break within a reference. +% IEEE almost never breaks a reference (but they do it more often with +% technotes). You may get an underfull vbox warning around the bibliography, +% but the final result will be much more like what IEEE will publish. +% MDS 11/2000 +\ifCLASSOPTIONtechnote\sloppy\clubpenalty4000\widowpenalty4000\interlinepenalty100% +\else\sloppy\clubpenalty4000\widowpenalty4000\interlinepenalty500\fi% + \sfcode`\.=1000\relax} +\let\endthebibliography=\endlist + + + + +% TITLE PAGE COMMANDS +% +% +% \IEEEmembership is used to produce the sublargesize italic font used to indicate author +% IEEE membership. compsoc uses a large size sans slant font +\def\IEEEmembership#1{{\@IEEEnotcompsoconly{\sublargesize}\normalfont\@IEEEcompsoconly{\sffamily}\textit{#1}}} + + +% \IEEEauthorrefmark{} produces a footnote type symbol to indicate author affiliation. +% When given an argument of 1 to 9, \IEEEauthorrefmark{} follows the standard LaTeX footnote +% symbol sequence convention. However, for arguments 10 and above, \IEEEauthorrefmark{} +% reverts to using lower case roman numerals, so it cannot overflow. Do note that you +% cannot use \footnotemark[] in place of \IEEEauthorrefmark{} within \author as the footnote +% symbols will have been turned off to prevent \thanks from creating footnote marks. +% \IEEEauthorrefmark{} produces a symbol that appears to LaTeX as having zero vertical +% height - this allows for a more compact line packing, but the user must ensure that +% the interline spacing is large enough to prevent \IEEEauthorrefmark{} from colliding +% with the text above. +% V1.7 make this a robust command +\DeclareRobustCommand*{\IEEEauthorrefmark}[1]{\raisebox{0pt}[0pt][0pt]{\textsuperscript{\footnotesize\ensuremath{\ifcase#1\or *\or \dagger\or \ddagger\or% + \mathsection\or \mathparagraph\or \|\or **\or \dagger\dagger% + \or \ddagger\ddagger \else\textsuperscript{\expandafter\romannumeral#1}\fi}}}} + + +% FONT CONTROLS AND SPACINGS FOR CONFERENCE MODE AUTHOR NAME AND AFFILIATION BLOCKS +% +% The default font styles for the author name and affiliation blocks (confmode) +\def\@IEEEauthorblockNstyle{\normalfont\@IEEEcompsocnotconfonly{\sffamily}\sublargesize\@IEEEcompsocconfonly{\large}} +\def\@IEEEauthorblockAstyle{\normalfont\@IEEEcompsocnotconfonly{\sffamily}\@IEEEcompsocconfonly{\itshape}\normalsize\@IEEEcompsocconfonly{\large}} +% The default if the user does not use an author block +\def\@IEEEauthordefaulttextstyle{\normalfont\@IEEEcompsocnotconfonly{\sffamily}\sublargesize} + +% spacing from title (or special paper notice) to author name blocks (confmode) +% can be negative +\def\@IEEEauthorblockconfadjspace{-0.25em} +% compsoc conferences need more space here +\@IEEEcompsocconfonly{\def\@IEEEauthorblockconfadjspace{0.75\@IEEEnormalsizeunitybaselineskip}} +\ifCLASSOPTIONconference\def\@IEEEauthorblockconfadjspace{20pt}\fi + +% spacing between name and affiliation blocks (confmode) +% This can be negative. +% IEEE doesn't want any added spacing here, but I will leave these +% controls in place in case they ever change their mind. +% Personally, I like 0.75ex. +%\def\@IEEEauthorblockNtopspace{0.75ex} +%\def\@IEEEauthorblockAtopspace{0.75ex} +\def\@IEEEauthorblockNtopspace{0.0ex} +\def\@IEEEauthorblockAtopspace{0.0ex} +% baseline spacing within name and affiliation blocks (confmode) +% must be positive, spacings below certain values will make +% the position of line of text sensitive to the contents of the +% line above it i.e., whether or not the prior line has descenders, +% subscripts, etc. For this reason it is a good idea to keep +% these above 2.6ex +\def\@IEEEauthorblockNinterlinespace{2.6ex} +\def\@IEEEauthorblockAinterlinespace{2.75ex} + +% This tracks the required strut size. +% See the \@IEEEauthorhalign command for the actual default value used. +\def\@IEEEauthorblockXinterlinespace{2.7ex} + +% variables to retain font size and style across groups +% values given here have no effect as they will be overwritten later +\gdef\@IEEESAVESTATEfontsize{10} +\gdef\@IEEESAVESTATEfontbaselineskip{12} +\gdef\@IEEESAVESTATEfontencoding{OT1} +\gdef\@IEEESAVESTATEfontfamily{ptm} +\gdef\@IEEESAVESTATEfontseries{m} +\gdef\@IEEESAVESTATEfontshape{n} + +% saves the current font attributes +\def\@IEEEcurfontSAVE{\global\let\@IEEESAVESTATEfontsize\f@size% +\global\let\@IEEESAVESTATEfontbaselineskip\f@baselineskip% +\global\let\@IEEESAVESTATEfontencoding\f@encoding% +\global\let\@IEEESAVESTATEfontfamily\f@family% +\global\let\@IEEESAVESTATEfontseries\f@series% +\global\let\@IEEESAVESTATEfontshape\f@shape} + +% restores the saved font attributes +\def\@IEEEcurfontRESTORE{\fontsize{\@IEEESAVESTATEfontsize}{\@IEEESAVESTATEfontbaselineskip}% +\fontencoding{\@IEEESAVESTATEfontencoding}% +\fontfamily{\@IEEESAVESTATEfontfamily}% +\fontseries{\@IEEESAVESTATEfontseries}% +\fontshape{\@IEEESAVESTATEfontshape}% +\selectfont} + + +% variable to indicate if the current block is the first block in the column +\newif\if@IEEEprevauthorblockincol \@IEEEprevauthorblockincolfalse + + +% the command places a strut with height and depth = \@IEEEauthorblockXinterlinespace +% we use this technique to have complete manual control over the spacing of the lines +% within the halign environment. +% We set the below baseline portion at 30%, the above +% baseline portion at 70% of the total length. +% Responds to changes in the document's \baselinestretch +\def\@IEEEauthorstrutrule{\@IEEEtrantmpdimenA\@IEEEauthorblockXinterlinespace% +\@IEEEtrantmpdimenA=\baselinestretch\@IEEEtrantmpdimenA% +\rule[-0.3\@IEEEtrantmpdimenA]{0pt}{\@IEEEtrantmpdimenA}} + + +% blocks to hold the authors' names and affilations. +% Makes formatting easy for conferences +% +% use real definitions in conference mode +% name block +\def\IEEEauthorblockN#1{\relax\@IEEEauthorblockNstyle% set the default text style +\gdef\@IEEEauthorblockXinterlinespace{0pt}% disable strut for spacer row +% the \expandafter hides the \cr in conditional tex, see the array.sty docs +% for details, probably not needed here as the \cr is in a macro +% do a spacer row if needed +\if@IEEEprevauthorblockincol\expandafter\@IEEEauthorblockNtopspaceline\fi +\global\@IEEEprevauthorblockincoltrue% we now have a block in this column +%restore the correct strut value +\gdef\@IEEEauthorblockXinterlinespace{\@IEEEauthorblockNinterlinespace}% +% input the author names +#1% +% end the row if the user did not already +\crcr} +% spacer row for names +\def\@IEEEauthorblockNtopspaceline{\cr\noalign{\vskip\@IEEEauthorblockNtopspace}} +% +% affiliation block +\def\IEEEauthorblockA#1{\relax\@IEEEauthorblockAstyle% set the default text style +\gdef\@IEEEauthorblockXinterlinespace{0pt}%disable strut for spacer row +% the \expandafter hides the \cr in conditional tex, see the array.sty docs +% for details, probably not needed here as the \cr is in a macro +% do a spacer row if needed +\if@IEEEprevauthorblockincol\expandafter\@IEEEauthorblockAtopspaceline\fi +\global\@IEEEprevauthorblockincoltrue% we now have a block in this column +%restore the correct strut value +\gdef\@IEEEauthorblockXinterlinespace{\@IEEEauthorblockAinterlinespace}% +% input the author affiliations +#1% +% end the row if the user did not already +\crcr} +% spacer row for affiliations +\def\@IEEEauthorblockAtopspaceline{\cr\noalign{\vskip\@IEEEauthorblockAtopspace}} + + +% allow papers to compile even if author blocks are used in modes other +% than conference or peerreviewca. For such cases, we provide dummy blocks. +\ifCLASSOPTIONconference +\else + \ifCLASSOPTIONpeerreviewca\else + % not conference or peerreviewca mode + \def\IEEEauthorblockN#1{#1}% + \def\IEEEauthorblockA#1{#1}% + \fi +\fi + + + +% we provide our own halign so as not to have to depend on tabular +\def\@IEEEauthorhalign{\@IEEEauthordefaulttextstyle% default text style + \lineskip=0pt\relax% disable line spacing + \lineskiplimit=0pt\relax% + \baselineskip=0pt\relax% + \@IEEEcurfontSAVE% save the current font + \mathsurround\z@\relax% no extra spacing around math + \let\\\@IEEEauthorhaligncr% replace newline with halign friendly one + \tabskip=0pt\relax% no column spacing + \everycr{}% ensure no problems here + \@IEEEprevauthorblockincolfalse% no author blocks yet + \def\@IEEEauthorblockXinterlinespace{2.7ex}% default interline space + \vtop\bgroup%vtop box + \halign\bgroup&\relax\hfil\@IEEEcurfontRESTORE\relax ##\relax + \hfil\@IEEEcurfontSAVE\@IEEEauthorstrutrule\cr} + +% ensure last line, exit from halign, close vbox +\def\end@IEEEauthorhalign{\crcr\egroup\egroup} + +% handle bogus star form +\def\@IEEEauthorhaligncr{{\ifnum0=`}\fi\@ifstar{\@@IEEEauthorhaligncr}{\@@IEEEauthorhaligncr}} + +% test and setup the optional argument to \\[] +\def\@@IEEEauthorhaligncr{\@testopt\@@@IEEEauthorhaligncr\z@skip} + +% end the line and do the optional spacer +\def\@@@IEEEauthorhaligncr[#1]{\ifnum0=`{\fi}\cr\noalign{\vskip#1\relax}} + + + +% flag to prevent multiple \and warning messages +\newif\if@IEEEWARNand +\@IEEEWARNandtrue + +% if in conference or peerreviewca modes, we support the use of \and as \author is a +% tabular environment, otherwise we warn the user that \and is invalid +% outside of conference or peerreviewca modes. +\def\and{\relax} % provide a bogus \and that we will then override + +\renewcommand{\and}[1][\relax]{\if@IEEEWARNand\typeout{** WARNING: \noexpand\and is valid only + when in conference or peerreviewca}\typeout{modes (line \the\inputlineno).}\fi\global\@IEEEWARNandfalse} + +\ifCLASSOPTIONconference% +\renewcommand{\and}[1][\hfill]{\end{@IEEEauthorhalign}#1\begin{@IEEEauthorhalign}}% +\fi +\ifCLASSOPTIONpeerreviewca +\renewcommand{\and}[1][\hfill]{\end{@IEEEauthorhalign}#1\begin{@IEEEauthorhalign}}% +\fi + + +% page clearing command +% based on LaTeX2e's \cleardoublepage, but allows different page styles +% for the inserted blank pages +\def\@IEEEcleardoublepage#1{\clearpage\if@twoside\ifodd\c@page\else +\hbox{}\thispagestyle{#1}\newpage\if@twocolumn\hbox{}\thispagestyle{#1}\newpage\fi\fi\fi} + + +% user command to invoke the title page +\def\maketitle{\par% + \begingroup% + \normalfont% + \def\thefootnote{}% the \thanks{} mark type is empty + \def\footnotemark{}% and kill space from \thanks within author + \let\@makefnmark\relax% V1.7, must *really* kill footnotemark to remove all \textsuperscript spacing as well. + \footnotesize% equal spacing between thanks lines + \footnotesep 0.7\baselineskip%see global setting of \footnotesep for more info + % V1.7 disable \thanks note indention for compsoc + \@IEEEcompsoconly{\long\def\@makefntext##1{\parindent 1em\noindent\hbox{\@makefnmark}##1}}% + \normalsize% + \ifCLASSOPTIONpeerreview + \newpage\global\@topnum\z@ \@maketitle\@IEEEstatictitlevskip\@IEEEaftertitletext% + \thispagestyle{IEEEpeerreviewcoverpagestyle}\@thanks% + \else + \if@twocolumn% + \ifCLASSOPTIONtechnote% + \newpage\global\@topnum\z@ \@maketitle\@IEEEstatictitlevskip\@IEEEaftertitletext% + \else + \twocolumn[\@maketitle\@IEEEstatictitlevskip\@IEEEaftertitletext]% + \fi + \else + \newpage\global\@topnum\z@ \@maketitle\@IEEEstatictitlevskip\@IEEEaftertitletext% + \fi + \thispagestyle{IEEEtitlepagestyle}\@thanks% + \fi + % pullup page for pubid if used. + \if@IEEEusingpubid + \enlargethispage{-\@IEEEpubidpullup}% + \fi + \endgroup + \setcounter{footnote}{0}\let\maketitle\relax\let\@maketitle\relax + \gdef\@thanks{}% + % v1.6b do not clear these as we will need the title again for peer review papers + % \gdef\@author{}\gdef\@title{}% + \let\thanks\relax} + + + +% V1.7 parbox to format \@IEEEcompsoctitleabstractindextext +\long\def\@IEEEcompsoctitleabstractindextextbox#1{\parbox{0.915\textwidth}{#1}} + +% formats the Title, authors names, affiliations and special paper notice +% THIS IS A CONTROLLED SPACING COMMAND! Do not allow blank lines or unintentional +% spaces to enter the definition - use % at the end of each line +\def\@maketitle{\newpage +\begingroup\centering +\ifCLASSOPTIONtechnote% technotes + {\bfseries\large\@IEEEcompsoconly{\sffamily}\@title\par}\vskip 1.3em{\lineskip .5em\@IEEEcompsoconly{\sffamily}\@author + \@IEEEspecialpapernotice\par{\@IEEEcompsoconly{\vskip 1.5em\relax + \@IEEEcompsoctitleabstractindextextbox{\@IEEEcompsoctitleabstractindextext}\par + \hfill\@IEEEcompsocdiamondline\hfill\hbox{}\par}}}\relax +\else% not a technote + \vskip0.2em{\Huge\@IEEEcompsoconly{\sffamily}\@IEEEcompsocconfonly{\normalfont\normalsize\vskip 2\@IEEEnormalsizeunitybaselineskip + \bfseries\Large}\@title\par}\vskip1.0em\par% + % V1.6 handle \author differently if in conference mode + \ifCLASSOPTIONconference% + {\@IEEEspecialpapernotice\mbox{}\vskip\@IEEEauthorblockconfadjspace% + \mbox{}\hfill\begin{@IEEEauthorhalign}\@author\end{@IEEEauthorhalign}\hfill\mbox{}\par}\relax + \else% peerreviewca, peerreview or journal + \ifCLASSOPTIONpeerreviewca + % peerreviewca handles author names just like conference mode + {\@IEEEcompsoconly{\sffamily}\@IEEEspecialpapernotice\mbox{}\vskip\@IEEEauthorblockconfadjspace% + \mbox{}\hfill\begin{@IEEEauthorhalign}\@author\end{@IEEEauthorhalign}\hfill\mbox{}\par + {\@IEEEcompsoconly{\vskip 1.5em\relax + \@IEEEcompsoctitleabstractindextextbox{\@IEEEcompsoctitleabstractindextext}\par\hfill + \@IEEEcompsocdiamondline\hfill\hbox{}\par}}}\relax + \else% journal or peerreview + {\lineskip.5em\@IEEEcompsoconly{\sffamily}\sublargesize\@author\@IEEEspecialpapernotice\par + {\@IEEEcompsoconly{\vskip 1.5em\relax + \@IEEEcompsoctitleabstractindextextbox{\@IEEEcompsoctitleabstractindextext}\par\hfill + \@IEEEcompsocdiamondline\hfill\hbox{}\par}}}\relax + \fi + \fi +\fi\par\endgroup} + + + +% V1.7 Computer Society "diamond line" which follows index terms for nonconference papers +\def\@IEEEcompsocdiamondline{\vrule depth 0pt height 0.5pt width 4cm\hspace{7.5pt}% +\raisebox{-3.5pt}{\fontfamily{pzd}\fontencoding{U}\fontseries{m}\fontshape{n}\fontsize{11}{12}\selectfont\char70}% +\hspace{7.5pt}\vrule depth 0pt height 0.5pt width 4cm\relax} + +% V1.7 standard LateX2e \thanks, but with \itshape under compsoc. Also make it a \long\def +% We also need to trigger the one-shot footnote rule +\def\@IEEEtriggeroneshotfootnoterule{\global\@IEEEenableoneshotfootnoteruletrue} + + +\long\def\thanks#1{\footnotemark + \protected@xdef\@thanks{\@thanks + \protect\footnotetext[\the\c@footnote]{\@IEEEcompsoconly{\itshape + \protect\@IEEEtriggeroneshotfootnoterule\relax}\ignorespaces#1}}} +\let\@thanks\@empty + +% V1.7 allow \author to contain \par's. This is needed to allow \thanks to contain \par. +\long\def\author#1{\gdef\@author{#1}} + + +% in addition to setting up IEEEitemize, we need to remove a baselineskip space above and +% below it because \list's \pars introduce blank lines because of the footnote struts. +\def\@IEEEsetupcompsocitemizelist{\def\labelitemi{$\bullet$}% +\setlength{\IEEElabelindent}{0pt}\setlength{\parskip}{0pt}% +\setlength{\partopsep}{0pt}\setlength{\topsep}{0.5\baselineskip}\vspace{-1\baselineskip}\relax} + + +% flag for fake non-compsoc \IEEEcompsocthanksitem - prevents line break on very first item +\newif\if@IEEEbreakcompsocthanksitem \@IEEEbreakcompsocthanksitemfalse + +\ifCLASSOPTIONcompsoc +% V1.7 compsoc bullet item \thanks +% also, we need to redefine this to destroy the argument in \@IEEEdynamictitlevspace +\long\def\IEEEcompsocitemizethanks#1{\relax\@IEEEbreakcompsocthanksitemfalse\footnotemark + \protected@xdef\@thanks{\@thanks + \protect\footnotetext[\the\c@footnote]{\itshape\protect\@IEEEtriggeroneshotfootnoterule + {\let\IEEEiedlistdecl\relax\protect\begin{IEEEitemize}[\protect\@IEEEsetupcompsocitemizelist]\ignorespaces#1\relax + \protect\end{IEEEitemize}}\protect\vspace{-1\baselineskip}}}} +\DeclareRobustCommand*{\IEEEcompsocthanksitem}{\item} +\else +% non-compsoc, allow for dual compilation via rerouting to normal \thanks +\long\def\IEEEcompsocitemizethanks#1{\thanks{#1}} +% redirect to "pseudo-par" \hfil\break\indent after swallowing [] from \IEEEcompsocthanksitem[] +\DeclareRobustCommand{\IEEEcompsocthanksitem}{\@ifnextchar [{\@IEEEthanksswallowoptionalarg}% +{\@IEEEthanksswallowoptionalarg[\relax]}} +% be sure and break only after first item, be sure and ignore spaces after optional argument +\def\@IEEEthanksswallowoptionalarg[#1]{\relax\if@IEEEbreakcompsocthanksitem\hfil\break +\indent\fi\@IEEEbreakcompsocthanksitemtrue\ignorespaces} +\fi + + +% V1.6b define the \IEEEpeerreviewmaketitle as needed +\ifCLASSOPTIONpeerreview +\def\IEEEpeerreviewmaketitle{\@IEEEcleardoublepage{empty}% +\ifCLASSOPTIONtwocolumn +\twocolumn[\@IEEEpeerreviewmaketitle\@IEEEdynamictitlevspace] +\else +\newpage\@IEEEpeerreviewmaketitle\@IEEEstatictitlevskip +\fi +\thispagestyle{IEEEtitlepagestyle}} +\else +% \IEEEpeerreviewmaketitle does nothing if peer review option has not been selected +\def\IEEEpeerreviewmaketitle{\relax} +\fi + +% peerreview formats the repeated title like the title in journal papers. +\def\@IEEEpeerreviewmaketitle{\begin{center}\@IEEEcompsoconly{\sffamily}% +\normalfont\normalsize\vskip0.2em{\Huge\@title\par}\vskip1.0em\par +\end{center}} + + + +% V1.6 +% this is a static rubber spacer between the title/authors and the main text +% used for single column text, or when the title appears in the first column +% of two column text (technotes). +\def\@IEEEstatictitlevskip{{\normalfont\normalsize +% adjust spacing to next text +% v1.6b handle peer review papers +\ifCLASSOPTIONpeerreview +% for peer review papers, the same value is used for both title pages +% regardless of the other paper modes + \vskip 1\baselineskip plus 0.375\baselineskip minus 0.1875\baselineskip +\else + \ifCLASSOPTIONconference% conference + \vskip 0.6\baselineskip + \else% + \ifCLASSOPTIONtechnote% technote + \vskip 1\baselineskip plus 0.375\baselineskip minus 0.1875\baselineskip% + \else% journal uses more space + \vskip 2.5\baselineskip plus 0.75\baselineskip minus 0.375\baselineskip% + \fi + \fi +\fi}} + + +% V1.6 +% This is a dynamically determined rigid spacer between the title/authors +% and the main text. This is used only for single column titles over two +% column text (most common) +% This is bit tricky because we have to ensure that the textheight of the +% main text is an integer multiple of \baselineskip +% otherwise underfull vbox problems may develop in the second column of the +% text on the titlepage +% The possible use of \IEEEpubid must also be taken into account. +\def\@IEEEdynamictitlevspace{{% + % we run within a group so that all the macros can be forgotten when we are done + \long\def\thanks##1{\relax}%don't allow \thanks to run when we evaluate the vbox height + \long\def\IEEEcompsocitemizethanks##1{\relax}%don't allow \IEEEcompsocitemizethanks to run when we evaluate the vbox height + \normalfont\normalsize% we declare more descriptive variable names + \let\@IEEEmaintextheight=\@IEEEtrantmpdimenA%height of the main text columns + \let\@IEEEINTmaintextheight=\@IEEEtrantmpdimenB%height of the main text columns with integer # lines + % set the nominal and minimum values for the title spacer + % the dynamic algorithm will not allow the spacer size to + % become less than \@IEEEMINtitlevspace - instead it will be + % lengthened + % default to journal values + \def\@IEEENORMtitlevspace{2.5\baselineskip}% + \def\@IEEEMINtitlevspace{2\baselineskip}% + % conferences and technotes need tighter spacing + \ifCLASSOPTIONconference%conference + \def\@IEEENORMtitlevspace{1\baselineskip}% + \def\@IEEEMINtitlevspace{0.75\baselineskip}% + \fi + \ifCLASSOPTIONtechnote%technote + \def\@IEEENORMtitlevspace{1\baselineskip}% + \def\@IEEEMINtitlevspace{0.75\baselineskip}% + \fi% + % get the height that the title will take up + \ifCLASSOPTIONpeerreview + \settoheight{\@IEEEmaintextheight}{\vbox{\hsize\textwidth \@IEEEpeerreviewmaketitle}}% + \else + \settoheight{\@IEEEmaintextheight}{\vbox{\hsize\textwidth \@maketitle}}% + \fi + \@IEEEmaintextheight=-\@IEEEmaintextheight% title takes away from maintext, so reverse sign + % add the height of the page textheight + \advance\@IEEEmaintextheight by \textheight% + % correct for title pages using pubid + \ifCLASSOPTIONpeerreview\else + % peerreview papers use the pubid on the cover page only. + % And the cover page uses a static spacer. + \if@IEEEusingpubid\advance\@IEEEmaintextheight by -\@IEEEpubidpullup\fi + \fi% + % subtract off the nominal value of the title bottom spacer + \advance\@IEEEmaintextheight by -\@IEEENORMtitlevspace% + % \topskip takes away some too + \advance\@IEEEmaintextheight by -\topskip% + % calculate the column height of the main text for lines + % now we calculate the main text height as if holding + % an integer number of \normalsize lines after the first + % and discard any excess fractional remainder + % we subtracted the first line, because the first line + % is placed \topskip into the maintext, not \baselineskip like the + % rest of the lines. + \@IEEEINTmaintextheight=\@IEEEmaintextheight% + \divide\@IEEEINTmaintextheight by \baselineskip% + \multiply\@IEEEINTmaintextheight by \baselineskip% + % now we calculate how much the title spacer height will + % have to be reduced from nominal (\@IEEEREDUCEmaintextheight is always + % a positive value) so that the maintext area will contain an integer + % number of normal size lines + % we change variable names here (to avoid confusion) as we no longer + % need \@IEEEINTmaintextheight and can reuse its dimen register + \let\@IEEEREDUCEmaintextheight=\@IEEEINTmaintextheight% + \advance\@IEEEREDUCEmaintextheight by -\@IEEEmaintextheight% + \advance\@IEEEREDUCEmaintextheight by \baselineskip% + % this is the calculated height of the spacer + % we change variable names here (to avoid confusion) as we no longer + % need \@IEEEmaintextheight and can reuse its dimen register + \let\@IEEECOMPENSATElen=\@IEEEmaintextheight% + \@IEEECOMPENSATElen=\@IEEENORMtitlevspace% set the nominal value + % we go with the reduced length if it is smaller than an increase + \ifdim\@IEEEREDUCEmaintextheight < 0.5\baselineskip\relax% + \advance\@IEEECOMPENSATElen by -\@IEEEREDUCEmaintextheight% + % if the resulting spacer is too small back out and go with an increase instead + \ifdim\@IEEECOMPENSATElen<\@IEEEMINtitlevspace\relax% + \advance\@IEEECOMPENSATElen by \baselineskip% + \fi% + \else% + % go with an increase because it is closer to the nominal than a decrease + \advance\@IEEECOMPENSATElen by -\@IEEEREDUCEmaintextheight% + \advance\@IEEECOMPENSATElen by \baselineskip% + \fi% + % set the calculated rigid spacer + \vspace{\@IEEECOMPENSATElen}}} + + + +% V1.6 +% we allow the user access to the last part of the title area +% useful in emergencies such as when a different spacing is needed +% This text is NOT compensated for in the dynamic sizer. +\let\@IEEEaftertitletext=\relax +\long\def\IEEEaftertitletext#1{\def\@IEEEaftertitletext{#1}} + +% V1.7 provide a way for users to enter abstract and keywords +% into the onecolumn title are. This text is compensated for +% in the dynamic sizer. +\let\@IEEEcompsoctitleabstractindextext=\relax +\long\def\IEEEcompsoctitleabstractindextext#1{\def\@IEEEcompsoctitleabstractindextext{#1}} +% V1.7 provide a way for users to get the \@IEEEcompsoctitleabstractindextext if +% not in compsoc journal mode - this way abstract and keywords can be placed +% in their conventional position if not in compsoc mode. +\def\IEEEdisplaynotcompsoctitleabstractindextext{% +\ifCLASSOPTIONcompsoc% display if compsoc conf +\ifCLASSOPTIONconference\@IEEEcompsoctitleabstractindextext\fi +\else% or if not compsoc +\@IEEEcompsoctitleabstractindextext\fi} + + +% command to allow alteration of baselinestretch, but only if the current +% baselineskip is unity. Used to tweak the compsoc abstract and keywords line spacing. +\def\@IEEEtweakunitybaselinestretch#1{{\def\baselinestretch{1}\selectfont +\global\@tempskipa\baselineskip}\ifnum\@tempskipa=\baselineskip% +\def\baselinestretch{#1}\selectfont\fi\relax} + + +% abstract and keywords are in \small, except +% for 9pt docs in which they are in \footnotesize +% Because 9pt docs use an 8pt footnotesize, \small +% becomes a rather awkward 8.5pt +\def\@IEEEabskeysecsize{\small} +\ifx\CLASSOPTIONpt\@IEEEptsizenine + \def\@IEEEabskeysecsize{\footnotesize} +\fi + +% compsoc journals use \footnotesize, compsoc conferences use normalsize +\@IEEEcompsoconly{\def\@IEEEabskeysecsize{\footnotesize}} +\@IEEEcompsocconfonly{\def\@IEEEabskeysecsize{\normalsize}} + + + + +% V1.6 have abstract and keywords strip leading spaces, pars and newlines +% so that spacing is more tightly controlled. +\def\abstract{\normalfont + \if@twocolumn + \par\@IEEEabskeysecsize\bfseries\leavevmode\kern-1pt\textit{\abstractname}---\relax + \else + \begin{center}\vspace{-1.78ex}\@IEEEabskeysecsize\textbf{\abstractname}\end{center}\quotation\@IEEEabskeysecsize + \fi\@IEEEgobbleleadPARNLSP} +% V1.6 IEEE wants only 1 pica from end of abstract to introduction heading when in +% conference mode (the heading already has this much above it) +\def\endabstract{\relax\ifCLASSOPTIONconference\vspace{0ex}\else\vspace{1.34ex}\fi\par\if@twocolumn\else\endquotation\fi + \normalfont\normalsize} + +\def\IEEEkeywords{\normalfont + \if@twocolumn + \@IEEEabskeysecsize\bfseries\leavevmode\kern-1pt\textit{\IEEEkeywordsname}---\relax + \else + \begin{center}\@IEEEabskeysecsize\textbf{\IEEEkeywordsname}\end{center}\quotation\@IEEEabskeysecsize + \fi\itshape\@IEEEgobbleleadPARNLSP} +\def\endIEEEkeywords{\relax\ifCLASSOPTIONtechnote\vspace{1.34ex}\else\vspace{0.5ex}\fi + \par\if@twocolumn\else\endquotation\fi% + \normalfont\normalsize} + +% V1.7 compsoc keywords index terms +\ifCLASSOPTIONcompsoc + \ifCLASSOPTIONconference% compsoc conference +\def\abstract{\normalfont + \begin{center}\@IEEEabskeysecsize\textbf{\large\abstractname}\end{center}\vskip 0.5\baselineskip plus 0.1\baselineskip minus 0.1\baselineskip + \if@twocolumn\else\quotation\fi\itshape\@IEEEabskeysecsize% + \par\@IEEEgobbleleadPARNLSP} +\def\IEEEkeywords{\normalfont\vskip 1.5\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip + \begin{center}\@IEEEabskeysecsize\textbf{\large\IEEEkeywordsname}\end{center}\vskip 0.5\baselineskip plus 0.1\baselineskip minus 0.1\baselineskip + \if@twocolumn\else\quotation\fi\itshape\@IEEEabskeysecsize% + \par\@IEEEgobbleleadPARNLSP} + \else% compsoc not conference +\def\abstract{\normalfont\@IEEEtweakunitybaselinestretch{1.15}\sffamily + \if@twocolumn + \@IEEEabskeysecsize\noindent\textbf{\abstractname}---\relax + \else + \begin{center}\vspace{-1.78ex}\@IEEEabskeysecsize\textbf{\abstractname}\end{center}\quotation\@IEEEabskeysecsize% + \fi\@IEEEgobbleleadPARNLSP} +\def\IEEEkeywords{\normalfont\@IEEEtweakunitybaselinestretch{1.15}\sffamily + \if@twocolumn + \@IEEEabskeysecsize\vskip 0.5\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip\noindent + \textbf{\IEEEkeywordsname}---\relax + \else + \begin{center}\@IEEEabskeysecsize\textbf{\IEEEkeywordsname}\end{center}\quotation\@IEEEabskeysecsize% + \fi\@IEEEgobbleleadPARNLSP} + \fi +\fi + + + +% gobbles all leading \, \\ and \par, upon finding first token that +% is not a \ , \\ or a \par, it ceases and returns that token +% +% used to strip leading \, \\ and \par from the input +% so that such things in the beginning of an environment will not +% affect the formatting of the text +\long\def\@IEEEgobbleleadPARNLSP#1{\let\@IEEEswallowthistoken=0% +\let\@IEEEgobbleleadPARNLSPtoken#1% +\let\@IEEEgobbleleadPARtoken=\par% +\let\@IEEEgobbleleadNLtoken=\\% +\let\@IEEEgobbleleadSPtoken=\ % +\def\@IEEEgobbleleadSPMACRO{\ }% +\ifx\@IEEEgobbleleadPARNLSPtoken\@IEEEgobbleleadPARtoken% +\let\@IEEEswallowthistoken=1% +\fi% +\ifx\@IEEEgobbleleadPARNLSPtoken\@IEEEgobbleleadNLtoken% +\let\@IEEEswallowthistoken=1% +\fi% +\ifx\@IEEEgobbleleadPARNLSPtoken\@IEEEgobbleleadSPtoken% +\let\@IEEEswallowthistoken=1% +\fi% +% a control space will come in as a macro +% when it is the last one on a line +\ifx\@IEEEgobbleleadPARNLSPtoken\@IEEEgobbleleadSPMACRO% +\let\@IEEEswallowthistoken=1% +\fi% +% if we have to swallow this token, do so and taste the next one +% else spit it out and stop gobbling +\ifx\@IEEEswallowthistoken 1\let\@IEEEnextgobbleleadPARNLSP=\@IEEEgobbleleadPARNLSP\else% +\let\@IEEEnextgobbleleadPARNLSP=#1\fi% +\@IEEEnextgobbleleadPARNLSP}% + + + + +% TITLING OF SECTIONS +\def\@IEEEsectpunct{:\ \,} % Punctuation after run-in section heading (headings which are + % part of the paragraphs), need little bit more than a single space + % spacing from section number to title +% compsoc conferences use regular period/space punctuation +\ifCLASSOPTIONcompsoc +\ifCLASSOPTIONconference +\def\@IEEEsectpunct{.\ } +\fi\fi + +\def\@seccntformat#1{\hb@xt@ 1.4em{\csname the#1dis\endcsname\hss\relax}} +\def\@seccntformatinl#1{\hb@xt@ 1.1em{\csname the#1dis\endcsname\hss\relax}} +\def\@seccntformatch#1{\csname the#1dis\endcsname\hskip 1em\relax} + +\ifCLASSOPTIONcompsoc +% compsoc journals need extra spacing +\ifCLASSOPTIONconference\else +\def\@seccntformat#1{\csname the#1dis\endcsname\hskip 1em\relax} +\fi\fi + +%v1.7 put {} after #6 to allow for some types of user font control +%and use \@@par rather than \par +\def\@sect#1#2#3#4#5#6[#7]#8{% + \ifnum #2>\c@secnumdepth + \let\@svsec\@empty + \else + \refstepcounter{#1}% + % load section label and spacer into \@svsec + \ifnum #2=1 + \protected@edef\@svsec{\@seccntformatch{#1}\relax}% + \else + \ifnum #2>2 + \protected@edef\@svsec{\@seccntformatinl{#1}\relax}% + \else + \protected@edef\@svsec{\@seccntformat{#1}\relax}% + \fi + \fi + \fi% + \@tempskipa #5\relax + \ifdim \@tempskipa>\z@% tempskipa determines whether is treated as a high + \begingroup #6{\relax% or low level heading + \noindent % subsections are NOT indented + % print top level headings. \@svsec is label, #8 is heading title + % IEEE does not block indent the section title text, it flows like normal + {\hskip #3\relax\@svsec}{\interlinepenalty \@M #8\@@par}}% + \endgroup + \addcontentsline{toc}{#1}{\ifnum #2>\c@secnumdepth\relax\else + \protect\numberline{\csname the#1\endcsname}\fi#7}% + \else % printout low level headings + % svsechd seems to swallow the trailing space, protect it with \mbox{} + % got rid of sectionmark stuff + \def\@svsechd{#6{\hskip #3\relax\@svsec #8\@IEEEsectpunct\mbox{}}% + \addcontentsline{toc}{#1}{\ifnum #2>\c@secnumdepth\relax\else + \protect\numberline{\csname the#1\endcsname}\fi#7}}% + \fi%skip down + \@xsect{#5}} + + +% section* handler +%v1.7 put {} after #4 to allow for some types of user font control +%and use \@@par rather than \par +\def\@ssect#1#2#3#4#5{\@tempskipa #3\relax + \ifdim \@tempskipa>\z@ + %\begingroup #4\@hangfrom{\hskip #1}{\interlinepenalty \@M #5\par}\endgroup + % IEEE does not block indent the section title text, it flows like normal + \begingroup \noindent #4{\relax{\hskip #1}{\interlinepenalty \@M #5\@@par}}\endgroup + % svsechd swallows the trailing space, protect it with \mbox{} + \else \def\@svsechd{#4{\hskip #1\relax #5\@IEEEsectpunct\mbox{}}}\fi + \@xsect{#3}} + + +%% SECTION heading spacing and font +%% +% arguments are: #1 - sectiontype name +% (for \@sect) #2 - section level +% #3 - section heading indent +% #4 - top separation (absolute value used, neg indicates not to indent main text) +% If negative, make stretch parts negative too! +% #5 - (absolute value used) positive: bottom separation after heading, +% negative: amount to indent main text after heading +% Both #4 and #5 negative means to indent main text and use negative top separation +% #6 - font control +% You've got to have \normalfont\normalsize in the font specs below to prevent +% trouble when you do something like: +% \section{Note}{\ttfamily TT-TEXT} is known to ... +% IEEE sometimes REALLY stretches the area before a section +% heading by up to about 0.5in. However, it may not be a good +% idea to let LaTeX have quite this much rubber. +\ifCLASSOPTIONconference% +% IEEE wants section heading spacing to decrease for conference mode +\def\section{\@startsection{section}{1}{\z@}{1.5ex plus 1.5ex minus 0.5ex}% +{1sp}{\normalfont\normalsize\centering\scshape}}% +\def\subsection{\@startsection{subsection}{2}{\z@}{1.5ex plus 1.5ex minus 0.5ex}% +{1sp}{\normalfont\normalsize\itshape}}% +\else % for journals +\def\section{\@startsection{section}{1}{\z@}{3.0ex plus 1.5ex minus 1.5ex}% V1.6 3.0ex from 3.5ex +{0.7ex plus 1ex minus 0ex}{\normalfont\normalsize\centering\scshape}}% +\def\subsection{\@startsection{subsection}{2}{\z@}{3.5ex plus 1.5ex minus 1.5ex}% +{0.7ex plus .5ex minus 0ex}{\normalfont\normalsize\itshape}}% +\fi + +% for both journals and conferences +% decided to put in a little rubber above the section, might help somebody +\def\subsubsection{\@startsection{subsubsection}{3}{\parindent}{0ex plus 0.1ex minus 0.1ex}% +{0ex}{\normalfont\normalsize\itshape}}% +\def\paragraph{\@startsection{paragraph}{4}{2\parindent}{0ex plus 0.1ex minus 0.1ex}% +{0ex}{\normalfont\normalsize\itshape}}% + + +% compsoc +\ifCLASSOPTIONcompsoc +\ifCLASSOPTIONconference +% compsoc conference +\def\section{\@startsection{section}{1}{\z@}{1\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip}% +{1\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip}{\normalfont\large\bfseries}}% +\def\subsection{\@startsection{subsection}{2}{\z@}{1\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip}% +{1\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip}{\normalfont\sublargesize\bfseries}}% +\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{1\baselineskip plus 0.25\baselineskip minus 0.25\baselineskip}% +{0ex}{\normalfont\normalsize\bfseries}}% +\def\paragraph{\@startsection{paragraph}{4}{2\parindent}{0ex plus 0.1ex minus 0.1ex}% +{0ex}{\normalfont\normalsize}}% +\else% compsoc journals +% use negative top separation as compsoc journals do not indent paragraphs after section titles +\def\section{\@startsection{section}{1}{\z@}{-3ex plus -2ex minus -1.5ex}% +{0.7ex plus 1ex minus 0ex}{\normalfont\large\sffamily\bfseries\scshape}}% +% Note that subsection and smaller may not be correct for the Computer Society, +% I have to look up an example. +\def\subsection{\@startsection{subsection}{2}{\z@}{-3.5ex plus -1.5ex minus -1.5ex}% +{0.7ex plus .5ex minus 0ex}{\normalfont\normalsize\sffamily\bfseries}}% +\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-2.5ex plus -1ex minus -1ex}% +{0.5ex plus 0.5ex minus 0ex}{\normalfont\normalsize\sffamily\itshape}}% +\def\paragraph{\@startsection{paragraph}{4}{2\parindent}{-0ex plus -0.1ex minus -0.1ex}% +{0ex}{\normalfont\normalsize}}% +\fi\fi + + + + +%% ENVIRONMENTS +% "box" symbols at end of proofs +\def\IEEEQEDclosed{\mbox{\rule[0pt]{1.3ex}{1.3ex}}} % for a filled box +% V1.6 some journals use an open box instead that will just fit around a closed one +\def\IEEEQEDopen{{\setlength{\fboxsep}{0pt}\setlength{\fboxrule}{0.2pt}\fbox{\rule[0pt]{0pt}{1.3ex}\rule[0pt]{1.3ex}{0pt}}}} +\ifCLASSOPTIONcompsoc +\def\IEEEQED{\IEEEQEDopen} % default to open for compsoc +\else +\def\IEEEQED{\IEEEQEDclosed} % otherwise default to closed +\fi + +% v1.7 name change to avoid namespace collision with amsthm. Also add support +% for an optional argument. +\def\IEEEproof{\@ifnextchar[{\@IEEEproof}{\@IEEEproof[\IEEEproofname]}} +\def\@IEEEproof[#1]{\par\noindent\hspace{2em}{\itshape #1: }} +\def\endIEEEproof{\hspace*{\fill}~\IEEEQED\par} + + +%\itemindent is set to \z@ by list, so define new temporary variable +\newdimen\@IEEEtmpitemindent +\def\@begintheorem#1#2{\@IEEEtmpitemindent\itemindent\topsep 0pt\rmfamily\trivlist% + \item[\hskip \labelsep{\indent\itshape #1\ #2:}]\itemindent\@IEEEtmpitemindent} +\def\@opargbegintheorem#1#2#3{\@IEEEtmpitemindent\itemindent\topsep 0pt\rmfamily \trivlist% +% V1.6 IEEE is back to using () around theorem names which are also in italics +% Thanks to Christian Peel for reporting this. + \item[\hskip\labelsep{\indent\itshape #1\ #2\ (#3):}]\itemindent\@IEEEtmpitemindent} +% V1.7 remove bogus \unskip that caused equations in theorems to collide with +% lines below. +\def\@endtheorem{\endtrivlist} + +% V1.6 +% display command for the section the theorem is in - so that \thesection +% is not used as this will be in Roman numerals when we want arabic. +% LaTeX2e uses \def\@thmcounter#1{\noexpand\arabic{#1}} for the theorem number +% (second part) display and \def\@thmcountersep{.} as a separator. +% V1.7 intercept calls to the section counter and reroute to \@IEEEthmcounterinsection +% to allow \appendix(ices} to override as needed. +% +% special handler for sections, allows appendix(ices) to override +\gdef\@IEEEthmcounterinsection#1{\arabic{#1}} +% string macro +\edef\@IEEEstringsection{section} + +% redefine the #1#2[#3] form of newtheorem to use a hook to \@IEEEthmcounterinsection +% if section in_counter is used +\def\@xnthm#1#2[#3]{% + \expandafter\@ifdefinable\csname #1\endcsname + {\@definecounter{#1}\@newctr{#1}[#3]% + \edef\@IEEEstringtmp{#3} + \ifx\@IEEEstringtmp\@IEEEstringsection + \expandafter\xdef\csname the#1\endcsname{% + \noexpand\@IEEEthmcounterinsection{#3}\@thmcountersep + \@thmcounter{#1}}% + \else + \expandafter\xdef\csname the#1\endcsname{% + \expandafter\noexpand\csname the#3\endcsname \@thmcountersep + \@thmcounter{#1}}% + \fi + \global\@namedef{#1}{\@thm{#1}{#2}}% + \global\@namedef{end#1}{\@endtheorem}}} + + + +%% SET UP THE DEFAULT PAGESTYLE +\ps@headings +\pagenumbering{arabic} + +% normally the page counter starts at 1 +\setcounter{page}{1} +% however, for peerreview the cover sheet is page 0 or page -1 +% (for duplex printing) +\ifCLASSOPTIONpeerreview + \if@twoside + \setcounter{page}{-1} + \else + \setcounter{page}{0} + \fi +\fi + +% standard book class behavior - let bottom line float up and down as +% needed when single sided +\ifCLASSOPTIONtwoside\else\raggedbottom\fi +% if two column - turn on twocolumn, allow word spacings to stretch more and +% enforce a rigid position for the last lines +\ifCLASSOPTIONtwocolumn +% the peer review option delays invoking twocolumn + \ifCLASSOPTIONpeerreview\else + \twocolumn + \fi +\sloppy +\flushbottom +\fi + + + + +% \APPENDIX and \APPENDICES definitions + +% This is the \@ifmtarg command from the LaTeX ifmtarg package +% by Peter Wilson (CUA) and Donald Arseneau +% \@ifmtarg is used to determine if an argument to a command +% is present or not. +% For instance: +% \@ifmtarg{#1}{\typeout{empty}}{\typeout{has something}} +% \@ifmtarg is used with our redefined \section command if +% \appendices is invoked. +% The command \section will behave slightly differently depending +% on whether the user specifies a title: +% \section{My appendix title} +% or not: +% \section{} +% This way, we can eliminate the blank lines where the title +% would be, and the unneeded : after Appendix in the table of +% contents +\begingroup +\catcode`\Q=3 +\long\gdef\@ifmtarg#1{\@xifmtarg#1QQ\@secondoftwo\@firstoftwo\@nil} +\long\gdef\@xifmtarg#1#2Q#3#4#5\@nil{#4} +\endgroup +% end of \@ifmtarg defs + + +% V1.7 +% command that allows the one time saving of the original definition +% of section to \@IEEEappendixsavesection for \appendix or \appendices +% we don't save \section here as it may be redefined later by other +% packages (hyperref.sty, etc.) +\def\@IEEEsaveoriginalsectiononce{\let\@IEEEappendixsavesection\section +\let\@IEEEsaveoriginalsectiononce\relax} + +% neat trick to grab and process the argument from \section{argument} +% we process differently if the user invoked \section{} with no +% argument (title) +% note we reroute the call to the old \section* +\def\@IEEEprocessthesectionargument#1{% +\@ifmtarg{#1}{% +\@IEEEappendixsavesection*{\appendixname~\thesectiondis}% +\addcontentsline{toc}{section}{\appendixname~\thesection}}{% +\@IEEEappendixsavesection*{\appendixname~\thesectiondis \\* #1}% +\addcontentsline{toc}{section}{\appendixname~\thesection: #1}}} + +% we use this if the user calls \section{} after +% \appendix-- which has no meaning. So, we ignore the +% command and its argument. Then, warn the user. +\def\@IEEEdestroythesectionargument#1{\typeout{** WARNING: Ignoring useless +\protect\section\space in Appendix (line \the\inputlineno).}} + + +% remember \thesection forms will be displayed in \ref calls +% and in the Table of Contents. +% The \sectiondis form is used in the actual heading itself + +% appendix command for one single appendix +% normally has no heading. However, if you want a +% heading, you can do so via the optional argument: +% \appendix[Optional Heading] +\def\appendix{\relax} +\renewcommand{\appendix}[1][]{\@IEEEsaveoriginalsectiononce\par + % v1.6 keep hyperref's identifiers unique + \gdef\theHsection{Appendix.A}% + % v1.6 adjust hyperref's string name for the section + \xdef\Hy@chapapp{appendix}% + \setcounter{section}{0}% + \setcounter{subsection}{0}% + \setcounter{subsubsection}{0}% + \setcounter{paragraph}{0}% + \gdef\thesection{A}% + \gdef\thesectiondis{}% + \gdef\thesubsection{\Alph{subsection}}% + \gdef\@IEEEthmcounterinsection##1{A} + \refstepcounter{section}% update the \ref counter + \@ifmtarg{#1}{\@IEEEappendixsavesection*{\appendixname}% + \addcontentsline{toc}{section}{\appendixname}}{% + \@IEEEappendixsavesection*{\appendixname~\\* #1}% + \addcontentsline{toc}{section}{\appendixname: #1}}% + % redefine \section command for appendix + % leave \section* as is + \def\section{\@ifstar{\@IEEEappendixsavesection*}{% + \@IEEEdestroythesectionargument}}% throw out the argument + % of the normal form +} + + + +% appendices command for multiple appendices +% user then calls \section with an argument (possibly empty) to +% declare the individual appendices +\def\appendices{\@IEEEsaveoriginalsectiononce\par + % v1.6 keep hyperref's identifiers unique + \gdef\theHsection{Appendix.\Alph{section}}% + % v1.6 adjust hyperref's string name for the section + \xdef\Hy@chapapp{appendix}% + \setcounter{section}{-1}% we want \refstepcounter to use section 0 + \setcounter{subsection}{0}% + \setcounter{subsubsection}{0}% + \setcounter{paragraph}{0}% + \ifCLASSOPTIONromanappendices% + \gdef\thesection{\Roman{section}}% + \gdef\thesectiondis{\Roman{section}}% + \@IEEEcompsocconfonly{\gdef\thesectiondis{\Roman{section}.}}% + \gdef\@IEEEthmcounterinsection##1{A\arabic{##1}} + \else% + \gdef\thesection{\Alph{section}}% + \gdef\thesectiondis{\Alph{section}}% + \@IEEEcompsocconfonly{\gdef\thesectiondis{\Alph{section}.}}% + \gdef\@IEEEthmcounterinsection##1{\Alph{##1}} + \fi% + \refstepcounter{section}% update the \ref counter + \setcounter{section}{0}% NEXT \section will be the FIRST appendix + % redefine \section command for appendices + % leave \section* as is + \def\section{\@ifstar{\@IEEEappendixsavesection*}{% process the *-form + \refstepcounter{section}% or is a new section so, + \@IEEEprocessthesectionargument}}% process the argument + % of the normal form +} + + + +% \IEEEPARstart +% Definition for the big two line drop cap letter at the beginning of the +% first paragraph of journal papers. The first argument is the first letter +% of the first word, the second argument is the remaining letters of the +% first word which will be rendered in upper case. +% In V1.6 this has been completely rewritten to: +% +% 1. no longer have problems when the user begins an environment +% within the paragraph that uses \IEEEPARstart. +% 2. auto-detect and use the current font family +% 3. revise handling of the space at the end of the first word so that +% interword glue will now work as normal. +% 4. produce correctly aligned edges for the (two) indented lines. +% +% We generalize things via control macros - playing with these is fun too. +% +% V1.7 added more control macros to make it easy for IEEEtrantools.sty users +% to change the font style. +% +% the number of lines that are indented to clear it +% may need to increase if using decenders +\def\@IEEEPARstartDROPLINES{2} +% minimum number of lines left on a page to allow a \@IEEEPARstart +% Does not take into consideration rubber shrink, so it tends to +% be overly cautious +\def\@IEEEPARstartMINPAGELINES{2} +% V1.7 the height of the drop cap is adjusted to match the height of this text +% in the current font (when \IEEEPARstart is called). +\def\@IEEEPARstartHEIGHTTEXT{T} +% the depth the letter is lowered below the baseline +% the height (and size) of the letter is determined by the sum +% of this value and the height of the \@IEEEPARstartHEIGHTTEXT in the current +% font. It is a good idea to set this value in terms of the baselineskip +% so that it can respond to changes therein. +\def\@IEEEPARstartDROPDEPTH{1.1\baselineskip} +% V1.7 the font the drop cap will be rendered in, +% can take zero or one argument. +\def\@IEEEPARstartFONTSTYLE{\bfseries} +% V1.7 any additional, non-font related commands needed to modify +% the drop cap letter, can take zero or one argument. +\def\@IEEEPARstartCAPSTYLE{\MakeUppercase} +% V1.7 the font that will be used to render the rest of the word, +% can take zero or one argument. +\def\@IEEEPARstartWORDFONTSTYLE{\relax} +% V1.7 any additional, non-font related commands needed to modify +% the rest of the word, can take zero or one argument. +\def\@IEEEPARstartWORDCAPSTYLE{\MakeUppercase} +% This is the horizontal separation distance from the drop letter to the main text. +% Lengths that depend on the font (e.g., ex, em, etc.) will be referenced +% to the font that is active when \IEEEPARstart is called. +\def\@IEEEPARstartSEP{0.15em} +% V1.7 horizontal offset applied to the left of the drop cap. +\def\@IEEEPARstartHOFFSET{0em} +% V1.7 Italic correction command applied at the end of the drop cap. +\def\@IEEEPARstartITLCORRECT{\/} + +% V1.7 compoc uses nonbold drop cap and small caps word style +\ifCLASSOPTIONcompsoc +\def\@IEEEPARstartFONTSTYLE{\mdseries} +\def\@IEEEPARstartWORDFONTSTYLE{\scshape} +\def\@IEEEPARstartWORDCAPSTYLE{\relax} +\fi + +% definition of \IEEEPARstart +% THIS IS A CONTROLLED SPACING AREA, DO NOT ALLOW SPACES WITHIN THESE LINES +% +% The token \@IEEEPARstartfont will be globally defined after the first use +% of \IEEEPARstart and will be a font command which creates the big letter +% The first argument is the first letter of the first word and the second +% argument is the rest of the first word(s). +\def\IEEEPARstart#1#2{\par{% +% if this page does not have enough space, break it and lets start +% on a new one +\@IEEEtranneedspace{\@IEEEPARstartMINPAGELINES\baselineskip}{\relax}% +% V1.7 move this up here in case user uses \textbf for \@IEEEPARstartFONTSTYLE +% which uses command \leavevmode which causes an unwanted \indent to be issued +\noindent +% calculate the desired height of the big letter +% it extends from the top of \@IEEEPARstartHEIGHTTEXT in the current font +% down to \@IEEEPARstartDROPDEPTH below the current baseline +\settoheight{\@IEEEtrantmpdimenA}{\@IEEEPARstartHEIGHTTEXT}% +\addtolength{\@IEEEtrantmpdimenA}{\@IEEEPARstartDROPDEPTH}% +% extract the name of the current font in bold +% and place it in \@IEEEPARstartFONTNAME +\def\@IEEEPARstartGETFIRSTWORD##1 ##2\relax{##1}% +{\@IEEEPARstartFONTSTYLE{\selectfont\edef\@IEEEPARstartFONTNAMESPACE{\fontname\font\space}% +\xdef\@IEEEPARstartFONTNAME{\expandafter\@IEEEPARstartGETFIRSTWORD\@IEEEPARstartFONTNAMESPACE\relax}}}% +% define a font based on this name with a point size equal to the desired +% height of the drop letter +\font\@IEEEPARstartsubfont\@IEEEPARstartFONTNAME\space at \@IEEEtrantmpdimenA\relax% +% save this value as a counter (integer) value (sp points) +\@IEEEtrantmpcountA=\@IEEEtrantmpdimenA% +% now get the height of the actual letter produced by this font size +\settoheight{\@IEEEtrantmpdimenB}{\@IEEEPARstartsubfont\@IEEEPARstartCAPSTYLE{#1}}% +% If something bogus happens like the first argument is empty or the +% current font is strange, do not allow a zero height. +\ifdim\@IEEEtrantmpdimenB=0pt\relax% +\typeout{** WARNING: IEEEPARstart drop letter has zero height! (line \the\inputlineno)}% +\typeout{ Forcing the drop letter font size to 10pt.}% +\@IEEEtrantmpdimenB=10pt% +\fi% +% and store it as a counter +\@IEEEtrantmpcountB=\@IEEEtrantmpdimenB% +% Since a font size doesn't exactly correspond to the height of the capital +% letters in that font, the actual height of the letter, \@IEEEtrantmpcountB, +% will be less than that desired, \@IEEEtrantmpcountA +% we need to raise the font size, \@IEEEtrantmpdimenA +% by \@IEEEtrantmpcountA / \@IEEEtrantmpcountB +% But, TeX doesn't have floating point division, so we have to use integer +% division. Hence the use of the counters. +% We need to reduce the denominator so that the loss of the remainder will +% have minimal affect on the accuracy of the result +\divide\@IEEEtrantmpcountB by 200% +\divide\@IEEEtrantmpcountA by \@IEEEtrantmpcountB% +% Then reequalize things when we use TeX's ability to multiply by +% floating point values +\@IEEEtrantmpdimenB=0.005\@IEEEtrantmpdimenA% +\multiply\@IEEEtrantmpdimenB by \@IEEEtrantmpcountA% +% \@IEEEPARstartfont is globaly set to the calculated font of the big letter +% We need to carry this out of the local calculation area to to create the +% big letter. +\global\font\@IEEEPARstartfont\@IEEEPARstartFONTNAME\space at \@IEEEtrantmpdimenB% +% Now set \@IEEEtrantmpdimenA to the width of the big letter +% We need to carry this out of the local calculation area to set the +% hanging indent +\settowidth{\global\@IEEEtrantmpdimenA}{\@IEEEPARstartfont +\@IEEEPARstartCAPSTYLE{#1\@IEEEPARstartITLCORRECT}}}% +% end of the isolated calculation environment +% add in the extra clearance we want +\advance\@IEEEtrantmpdimenA by \@IEEEPARstartSEP\relax% +% add in the optional offset +\advance\@IEEEtrantmpdimenA by \@IEEEPARstartHOFFSET\relax% +% V1.7 don't allow negative offsets to produce negative hanging indents +\@IEEEtrantmpdimenB\@IEEEtrantmpdimenA +\ifnum\@IEEEtrantmpdimenB < 0 \@IEEEtrantmpdimenB 0pt\fi +% \@IEEEtrantmpdimenA has the width of the big letter plus the +% separation space and \@IEEEPARstartfont is the font we need to use +% Now, we make the letter and issue the hanging indent command +% The letter is placed in a box of zero width and height so that other +% text won't be displaced by it. +\hangindent\@IEEEtrantmpdimenB\hangafter=-\@IEEEPARstartDROPLINES% +\makebox[0pt][l]{\hspace{-\@IEEEtrantmpdimenA}% +\raisebox{-\@IEEEPARstartDROPDEPTH}[0pt][0pt]{\hspace{\@IEEEPARstartHOFFSET}% +\@IEEEPARstartfont\@IEEEPARstartCAPSTYLE{#1\@IEEEPARstartITLCORRECT}% +\hspace{\@IEEEPARstartSEP}}}% +{\@IEEEPARstartWORDFONTSTYLE{\@IEEEPARstartWORDCAPSTYLE{\selectfont#2}}}} + + + + + + +% determines if the space remaining on a given page is equal to or greater +% than the specified space of argument one +% if not, execute argument two (only if the remaining space is greater than zero) +% and issue a \newpage +% +% example: \@IEEEtranneedspace{2in}{\vfill} +% +% Does not take into consideration rubber shrinkage, so it tends to +% be overly cautious +% Based on an example posted by Donald Arseneau +% Note this macro uses \@IEEEtrantmpdimenB internally for calculations, +% so DO NOT PASS \@IEEEtrantmpdimenB to this routine +% if you need a dimen register, import with \@IEEEtrantmpdimenA instead +\def\@IEEEtranneedspace#1#2{\penalty-100\begingroup%shield temp variable +\@IEEEtrantmpdimenB\pagegoal\advance\@IEEEtrantmpdimenB-\pagetotal% space left +\ifdim #1>\@IEEEtrantmpdimenB\relax% not enough space left +\ifdim\@IEEEtrantmpdimenB>\z@\relax #2\fi% +\newpage% +\fi\endgroup} + + + +% IEEEbiography ENVIRONMENT +% Allows user to enter biography leaving place for picture (adapts to font size) +% As of V1.5, a new optional argument allows you to have a real graphic! +% V1.5 and later also fixes the "colliding biographies" which could happen when a +% biography's text was shorter than the space for the photo. +% MDS 7/2001 +% V1.6 prevent multiple biographies from making multiple TOC entries +\newif\if@IEEEbiographyTOCentrynotmade +\global\@IEEEbiographyTOCentrynotmadetrue + +% biography counter so hyperref can jump directly to the biographies +% and not just the previous section +\newcounter{IEEEbiography} +\setcounter{IEEEbiography}{0} + +% photo area size +\def\@IEEEBIOphotowidth{1.0in} % width of the biography photo area +\def\@IEEEBIOphotodepth{1.25in} % depth (height) of the biography photo area +% area cleared for photo +\def\@IEEEBIOhangwidth{1.14in} % width cleared for the biography photo area +\def\@IEEEBIOhangdepth{1.25in} % depth cleared for the biography photo area + % actual depth will be a multiple of + % \baselineskip, rounded up +\def\@IEEEBIOskipN{4\baselineskip}% nominal value of the vskip above the biography + +\newenvironment{IEEEbiography}[2][]{\normalfont\@IEEEcompsoconly{\sffamily}\footnotesize% +\unitlength 1in\parskip=0pt\par\parindent 1em\interlinepenalty500% +% we need enough space to support the hanging indent +% the nominal value of the spacer +% and one extra line for good measure +\@IEEEtrantmpdimenA=\@IEEEBIOhangdepth% +\advance\@IEEEtrantmpdimenA by \@IEEEBIOskipN% +\advance\@IEEEtrantmpdimenA by 1\baselineskip% +% if this page does not have enough space, break it and lets start +% with a new one +\@IEEEtranneedspace{\@IEEEtrantmpdimenA}{\relax}% +% nominal spacer can strech, not shrink use 1fil so user can out stretch with \vfill +\vskip \@IEEEBIOskipN plus 1fil minus 0\baselineskip% +% the default box for where the photo goes +\def\@IEEEtempbiographybox{{\setlength{\fboxsep}{0pt}\framebox{% +\begin{minipage}[b][\@IEEEBIOphotodepth][c]{\@IEEEBIOphotowidth}\centering PLACE\\ PHOTO\\ HERE \end{minipage}}}}% +% +% detect if the optional argument was supplied, this requires the +% \@ifmtarg command as defined in the appendix section above +% and if so, override the default box with what they want +\@ifmtarg{#1}{\relax}{\def\@IEEEtempbiographybox{\mbox{\begin{minipage}[b][\@IEEEBIOphotodepth][c]{\@IEEEBIOphotowidth}% +\centering% +#1% +\end{minipage}}}}% end if optional argument supplied +% Make an entry into the table of contents only if we have not done so before +\if@IEEEbiographyTOCentrynotmade% +% link labels to the biography counter so hyperref will jump +% to the biography, not the previous section +\setcounter{IEEEbiography}{-1}% +\refstepcounter{IEEEbiography}% +\addcontentsline{toc}{section}{Biographies}% +\global\@IEEEbiographyTOCentrynotmadefalse% +\fi% +% one more biography +\refstepcounter{IEEEbiography}% +% Make an entry for this name into the table of contents +\addcontentsline{toc}{subsection}{#2}% +% V1.6 properly handle if a new paragraph should occur while the +% hanging indent is still active. Do this by redefining \par so +% that it will not start a new paragraph. (But it will appear to the +% user as if it did.) Also, strip any leading pars, newlines, or spaces. +\let\@IEEEBIOORGparCMD=\par% save the original \par command +\edef\par{\hfil\break\indent}% the new \par will not be a "real" \par +\settoheight{\@IEEEtrantmpdimenA}{\@IEEEtempbiographybox}% get height of biography box +\@IEEEtrantmpdimenB=\@IEEEBIOhangdepth% +\@IEEEtrantmpcountA=\@IEEEtrantmpdimenB% countA has the hang depth +\divide\@IEEEtrantmpcountA by \baselineskip% calculates lines needed to produce the hang depth +\advance\@IEEEtrantmpcountA by 1% ensure we overestimate +% set the hanging indent +\hangindent\@IEEEBIOhangwidth% +\hangafter-\@IEEEtrantmpcountA% +% reference the top of the photo area to the top of a capital T +\settoheight{\@IEEEtrantmpdimenB}{\mbox{T}}% +% set the photo box, give it zero width and height so as not to disturb anything +\noindent\makebox[0pt][l]{\hspace{-\@IEEEBIOhangwidth}\raisebox{\@IEEEtrantmpdimenB}[0pt][0pt]{% +\raisebox{-\@IEEEBIOphotodepth}[0pt][0pt]{\@IEEEtempbiographybox}}}% +% now place the author name and begin the bio text +\noindent\textbf{#2\ }\@IEEEgobbleleadPARNLSP}{\relax\let\par=\@IEEEBIOORGparCMD\par% +% 7/2001 V1.5 detect when the biography text is shorter than the photo area +% and pad the unused area - preventing a collision from the next biography entry +% MDS +\ifnum \prevgraf <\@IEEEtrantmpcountA\relax% detect when the biography text is shorter than the photo + \advance\@IEEEtrantmpcountA by -\prevgraf% calculate how many lines we need to pad + \advance\@IEEEtrantmpcountA by -1\relax% we compensate for the fact that we indented an extra line + \@IEEEtrantmpdimenA=\baselineskip% calculate the length of the padding + \multiply\@IEEEtrantmpdimenA by \@IEEEtrantmpcountA% + \noindent\rule{0pt}{\@IEEEtrantmpdimenA}% insert an invisible support strut +\fi% +\par\normalfont} + + + +% V1.6 +% added biography without a photo environment +\newenvironment{IEEEbiographynophoto}[1]{% +% Make an entry into the table of contents only if we have not done so before +\if@IEEEbiographyTOCentrynotmade% +% link labels to the biography counter so hyperref will jump +% to the biography, not the previous section +\setcounter{IEEEbiography}{-1}% +\refstepcounter{IEEEbiography}% +\addcontentsline{toc}{section}{Biographies}% +\global\@IEEEbiographyTOCentrynotmadefalse% +\fi% +% one more biography +\refstepcounter{IEEEbiography}% +% Make an entry for this name into the table of contents +\addcontentsline{toc}{subsection}{#1}% +\normalfont\@IEEEcompsoconly{\sffamily}\footnotesize\interlinepenalty500% +\vskip 4\baselineskip plus 1fil minus 0\baselineskip% +\parskip=0pt\par% +\noindent\textbf{#1\ }\@IEEEgobbleleadPARNLSP}{\relax\par\normalfont} + + +% provide the user with some old font commands +% got this from article.cls +\DeclareOldFontCommand{\rm}{\normalfont\rmfamily}{\mathrm} +\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf} +\DeclareOldFontCommand{\tt}{\normalfont\ttfamily}{\mathtt} +\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf} +\DeclareOldFontCommand{\it}{\normalfont\itshape}{\mathit} +\DeclareOldFontCommand{\sl}{\normalfont\slshape}{\@nomath\sl} +\DeclareOldFontCommand{\sc}{\normalfont\scshape}{\@nomath\sc} +\DeclareRobustCommand*\cal{\@fontswitch\relax\mathcal} +\DeclareRobustCommand*\mit{\@fontswitch\relax\mathnormal} + + +% SPECIAL PAPER NOTICE COMMANDS +% +% holds the special notice text +\def\@IEEEspecialpapernotice{\relax} + +% for special papers, like invited papers, the user can do: +% \IEEEspecialpapernotice{(Invited Paper)} before \maketitle +\def\IEEEspecialpapernotice#1{\ifCLASSOPTIONconference% +\def\@IEEEspecialpapernotice{{\Large#1\vspace*{1em}}}% +\else% +\def\@IEEEspecialpapernotice{{\\*[1.5ex]\sublargesize\textit{#1}}\vspace*{-2ex}}% +\fi} + + + + +% PUBLISHER ID COMMANDS +% to insert a publisher's ID footer +% V1.6 \IEEEpubid has been changed so that the change in page size and style +% occurs in \maketitle. \IEEEpubid must now be issued prior to \maketitle +% use \IEEEpubidadjcol as before - in the second column of the title page +% These changes allow \maketitle to take the reduced page height into +% consideration when dynamically setting the space between the author +% names and the maintext. +% +% the amount the main text is pulled up to make room for the +% publisher's ID footer +% IEEE uses about 1.3\baselineskip for journals, +% dynamic title spacing will clean up the fraction +\def\@IEEEpubidpullup{1.3\baselineskip} +\ifCLASSOPTIONtechnote +% for technotes it must be an integer of baselineskip as there can be no +% dynamic title spacing for two column mode technotes (the title is in the +% in first column) and we should maintain an integer number of lines in the +% second column +% There are some examples (such as older issues of "Transactions on +% Information Theory") in which IEEE really pulls the text off the ID for +% technotes - about 0.55in (or 4\baselineskip). We'll use 2\baselineskip +% and call it even. +\def\@IEEEpubidpullup{2\baselineskip} +\fi + +% V1.7 compsoc does not use a pullup +\ifCLASSOPTIONcompsoc +\def\@IEEEpubidpullup{0pt} +\fi + +% holds the ID text +\def\@IEEEpubid{\relax} + +% flag so \maketitle can tell if \IEEEpubid was called +\newif\if@IEEEusingpubid +\global\@IEEEusingpubidfalse +% issue this command in the page to have the ID at the bottom +% V1.6 use before \maketitle +\def\IEEEpubid#1{\def\@IEEEpubid{#1}\global\@IEEEusingpubidtrue} + + +% command which will pull up (shorten) the column it is executed in +% to make room for the publisher ID. Place in the second column of +% the title page when using \IEEEpubid +% Is smart enough not to do anything when in single column text or +% if the user hasn't called \IEEEpubid +% currently needed in for the second column of a page with the +% publisher ID. If not needed in future releases, please provide this +% command and define it as \relax for backward compatibility +% v1.6b do not allow command to operate if the peer review option has been +% selected because \IEEEpubidadjcol will not be on the cover page. +% V1.7 do nothing if compsoc +\def\IEEEpubidadjcol{\ifCLASSOPTIONcompsoc\else\ifCLASSOPTIONpeerreview\else +\if@twocolumn\if@IEEEusingpubid\enlargethispage{-\@IEEEpubidpullup}\fi\fi\fi\fi} + +% Special thanks to Peter Wilson, Daniel Luecking, and the other +% gurus at comp.text.tex, for helping me to understand how best to +% implement the IEEEpubid command in LaTeX. + + + +%% Lockout some commands under various conditions + +% general purpose bit bucket +\newsavebox{\@IEEEtranrubishbin} + +% flags to prevent multiple warning messages +\newif\if@IEEEWARNthanks +\newif\if@IEEEWARNIEEEPARstart +\newif\if@IEEEWARNIEEEbiography +\newif\if@IEEEWARNIEEEbiographynophoto +\newif\if@IEEEWARNIEEEpubid +\newif\if@IEEEWARNIEEEpubidadjcol +\newif\if@IEEEWARNIEEEmembership +\newif\if@IEEEWARNIEEEaftertitletext +\@IEEEWARNthankstrue +\@IEEEWARNIEEEPARstarttrue +\@IEEEWARNIEEEbiographytrue +\@IEEEWARNIEEEbiographynophototrue +\@IEEEWARNIEEEpubidtrue +\@IEEEWARNIEEEpubidadjcoltrue +\@IEEEWARNIEEEmembershiptrue +\@IEEEWARNIEEEaftertitletexttrue + + +%% Lockout some commands when in various modes, but allow them to be restored if needed +%% +% save commands which might be locked out +% so that the user can later restore them if needed +\let\@IEEESAVECMDthanks\thanks +\let\@IEEESAVECMDIEEEPARstart\IEEEPARstart +\let\@IEEESAVECMDIEEEbiography\IEEEbiography +\let\@IEEESAVECMDendIEEEbiography\endIEEEbiography +\let\@IEEESAVECMDIEEEbiographynophoto\IEEEbiographynophoto +\let\@IEEESAVECMDendIEEEbiographynophoto\endIEEEbiographynophoto +\let\@IEEESAVECMDIEEEpubid\IEEEpubid +\let\@IEEESAVECMDIEEEpubidadjcol\IEEEpubidadjcol +\let\@IEEESAVECMDIEEEmembership\IEEEmembership +\let\@IEEESAVECMDIEEEaftertitletext\IEEEaftertitletext + + +% disable \IEEEPARstart when in draft mode +% This may have originally been done because the pre-V1.6 drop letter +% algorithm had problems with a non-unity baselinestretch +% At any rate, it seems too formal to have a drop letter in a draft +% paper. +\ifCLASSOPTIONdraftcls +\def\IEEEPARstart#1#2{#1#2\if@IEEEWARNIEEEPARstart\typeout{** ATTENTION: \noexpand\IEEEPARstart + is disabled in draft mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEPARstartfalse} +\fi +% and for technotes +\ifCLASSOPTIONtechnote +\def\IEEEPARstart#1#2{#1#2\if@IEEEWARNIEEEPARstart\typeout{** WARNING: \noexpand\IEEEPARstart + is locked out for technotes (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEPARstartfalse} +\fi + + +% lockout unneeded commands when in conference mode +\ifCLASSOPTIONconference +% when locked out, \thanks, \IEEEbiography, \IEEEbiographynophoto, \IEEEpubid, +% \IEEEmembership and \IEEEaftertitletext will all swallow their given text. +% \IEEEPARstart will output a normal character instead +% warn the user about these commands only once to prevent the console screen +% from filling up with redundant messages +\def\thanks#1{\if@IEEEWARNthanks\typeout{** WARNING: \noexpand\thanks + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNthanksfalse} +\def\IEEEPARstart#1#2{#1#2\if@IEEEWARNIEEEPARstart\typeout{** WARNING: \noexpand\IEEEPARstart + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEPARstartfalse} + + +% LaTeX treats environments and commands with optional arguments differently. +% the actual ("internal") command is stored as \\commandname +% (accessed via \csname\string\commandname\endcsname ) +% the "external" command \commandname is a macro with code to determine +% whether or not the optional argument is presented and to provide the +% default if it is absent. So, in order to save and restore such a command +% we would have to save and restore \\commandname as well. But, if LaTeX +% ever changes the way it names the internal names, the trick would break. +% Instead let us just define a new environment so that the internal +% name can be left undisturbed. +\newenvironment{@IEEEbogusbiography}[2][]{\if@IEEEWARNIEEEbiography\typeout{** WARNING: \noexpand\IEEEbiography + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEbiographyfalse% +\setbox\@IEEEtranrubishbin\vbox\bgroup}{\egroup\relax} +% and make biography point to our bogus biography +\let\IEEEbiography=\@IEEEbogusbiography +\let\endIEEEbiography=\end@IEEEbogusbiography + +\renewenvironment{IEEEbiographynophoto}[1]{\if@IEEEWARNIEEEbiographynophoto\typeout{** WARNING: \noexpand\IEEEbiographynophoto + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEbiographynophotofalse% +\setbox\@IEEEtranrubishbin\vbox\bgroup}{\egroup\relax} + +\def\IEEEpubid#1{\if@IEEEWARNIEEEpubid\typeout{** WARNING: \noexpand\IEEEpubid + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEpubidfalse} +\def\IEEEpubidadjcol{\if@IEEEWARNIEEEpubidadjcol\typeout{** WARNING: \noexpand\IEEEpubidadjcol + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEpubidadjcolfalse} +\def\IEEEmembership#1{\if@IEEEWARNIEEEmembership\typeout{** WARNING: \noexpand\IEEEmembership + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEmembershipfalse} +\def\IEEEaftertitletext#1{\if@IEEEWARNIEEEaftertitletext\typeout{** WARNING: \noexpand\IEEEaftertitletext + is locked out when in conference mode (line \the\inputlineno).}\fi\global\@IEEEWARNIEEEaftertitletextfalse} +\fi + + +% provide a way to restore the commands that are locked out +\def\IEEEoverridecommandlockouts{% +\typeout{** ATTENTION: Overriding command lockouts (line \the\inputlineno).}% +\let\thanks\@IEEESAVECMDthanks% +\let\IEEEPARstart\@IEEESAVECMDIEEEPARstart% +\let\IEEEbiography\@IEEESAVECMDIEEEbiography% +\let\endIEEEbiography\@IEEESAVECMDendIEEEbiography% +\let\IEEEbiographynophoto\@IEEESAVECMDIEEEbiographynophoto% +\let\endIEEEbiographynophoto\@IEEESAVECMDendIEEEbiographynophoto% +\let\IEEEpubid\@IEEESAVECMDIEEEpubid% +\let\IEEEpubidadjcol\@IEEESAVECMDIEEEpubidadjcol% +\let\IEEEmembership\@IEEESAVECMDIEEEmembership% +\let\IEEEaftertitletext\@IEEESAVECMDIEEEaftertitletext} + + + +% need a backslash character for typeout output +{\catcode`\|=0 \catcode`\\=12 +|xdef|@IEEEbackslash{\}} + + +% hook to allow easy disabling of all legacy warnings +\def\@IEEElegacywarn#1#2{\typeout{** ATTENTION: \@IEEEbackslash #1 is deprecated (line \the\inputlineno). +Use \@IEEEbackslash #2 instead.}} + + +% provide for legacy commands +\def\authorblockA{\@IEEElegacywarn{authorblockA}{IEEEauthorblockA}\IEEEauthorblockA} +\def\authorblockN{\@IEEElegacywarn{authorblockN}{IEEEauthorblockN}\IEEEauthorblockN} +\def\authorrefmark{\@IEEElegacywarn{authorrefmark}{IEEEauthorrefmark}\IEEEauthorrefmark} +\def\PARstart{\@IEEElegacywarn{PARstart}{IEEEPARstart}\IEEEPARstart} +\def\pubid{\@IEEElegacywarn{pubid}{IEEEpubid}\IEEEpubid} +\def\pubidadjcol{\@IEEElegacywarn{pubidadjcol}{IEEEpubidadjcol}\IEEEpubidadjcol} +\def\QED{\@IEEElegacywarn{QED}{IEEEQED}\IEEEQED} +\def\QEDclosed{\@IEEElegacywarn{QEDclosed}{IEEEQEDclosed}\IEEEQEDclosed} +\def\QEDopen{\@IEEElegacywarn{QEDopen}{IEEEQEDopen}\IEEEQEDopen} +\def\specialpapernotice{\@IEEElegacywarn{specialpapernotice}{IEEEspecialpapernotice}\IEEEspecialpapernotice} + + + +% provide for legacy environments +\def\biography{\@IEEElegacywarn{biography}{IEEEbiography}\IEEEbiography} +\def\biographynophoto{\@IEEElegacywarn{biographynophoto}{IEEEbiographynophoto}\IEEEbiographynophoto} +\def\keywords{\@IEEElegacywarn{keywords}{IEEEkeywords}\IEEEkeywords} +\def\endbiography{\endIEEEbiography} +\def\endbiographynophoto{\endIEEEbiographynophoto} +\def\endkeywords{\endIEEEkeywords} + + +% provide for legacy IED commands/lengths when possible +\let\labelindent\IEEElabelindent +\def\calcleftmargin{\@IEEElegacywarn{calcleftmargin}{IEEEcalcleftmargin}\IEEEcalcleftmargin} +\def\setlabelwidth{\@IEEElegacywarn{setlabelwidth}{IEEEsetlabelwidth}\IEEEsetlabelwidth} +\def\usemathlabelsep{\@IEEElegacywarn{usemathlabelsep}{IEEEusemathlabelsep}\IEEEusemathlabelsep} +\def\iedlabeljustifyc{\@IEEElegacywarn{iedlabeljustifyc}{IEEEiedlabeljustifyc}\IEEEiedlabeljustifyc} +\def\iedlabeljustifyl{\@IEEElegacywarn{iedlabeljustifyl}{IEEEiedlabeljustifyl}\IEEEiedlabeljustifyl} +\def\iedlabeljustifyr{\@IEEElegacywarn{iedlabeljustifyr}{IEEEiedlabeljustifyr}\IEEEiedlabeljustifyr} + + + +% let \proof use the IEEEtran version even after amsthm is loaded +% \proof is now deprecated in favor of \IEEEproof +\AtBeginDocument{\def\proof{\@IEEElegacywarn{proof}{IEEEproof}\IEEEproof}\def\endproof{\endIEEEproof}} + +% V1.7 \overrideIEEEmargins is no longer supported. +\def\overrideIEEEmargins{% +\typeout{** WARNING: \string\overrideIEEEmargins \space no longer supported (line \the\inputlineno).}% +\typeout{** Use the \string\CLASSINPUTinnersidemargin, \string\CLASSINPUToutersidemargin \space controls instead.}} + + +\endinput + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% End of IEEEtran.cls %%%%%%%%%%%%%%%%%%%%%%%%%%%% +% That's all folks! + diff --git a/spec/consensus/consensus-paper/README.md b/spec/consensus/consensus-paper/README.md new file mode 100644 index 000000000..33e395806 --- /dev/null +++ b/spec/consensus/consensus-paper/README.md @@ -0,0 +1,24 @@ +# Tendermint-spec + +The repository contains the specification (and the proofs) of the Tendermint +consensus protocol. + +## How to install Latex on Mac OS + +MacTex is Latex distribution for Mac OS. You can download it [here](http://www.tug.org/mactex/mactex-download.html). + +Popular IDE for Latex-based projects is TexStudio. It can be downloaded +[here](https://www.texstudio.org/). + +## How to build project + +In order to compile the latex files (and write bibliography), execute + +`$ pdflatex paper`
+`$ bibtex paper`
+`$ pdflatex paper`
+`$ pdflatex paper`
+ +The generated file is paper.pdf. You can open it with + +`$ open paper.pdf` diff --git a/spec/consensus/consensus-paper/algorithmicplus.sty b/spec/consensus/consensus-paper/algorithmicplus.sty new file mode 100644 index 000000000..de7ca01ea --- /dev/null +++ b/spec/consensus/consensus-paper/algorithmicplus.sty @@ -0,0 +1,195 @@ +% ALGORITHMICPLUS STYLE +% for LaTeX version 2e +% Original ``algorithmic.sty'' by -- 1994 Peter Williams +% Bug fix (13 July 2004) by Arnaud Giersch +% Includes ideas from 'algorithmicext' by Martin Biely +% and 'distribalgo' by Xavier Defago +% Modifications: Martin Hutle +% +% This style file is free software; you can redistribute it and/or +% modify it under the terms of the GNU Lesser General Public +% License as published by the Free Software Foundation; either +% version 2 of the License, or (at your option) any later version. +% +% This style file is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% Lesser General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public +% License along with this style file; if not, write to the +% Free Software Foundation, Inc., 59 Temple Place - Suite 330, +% Boston, MA 02111-1307, USA. +% +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{algorithmicplus} +\typeout{Document Style `algorithmicplus' - environment, replaces `algorithmic'} +% +\RequirePackage{ifthen} +\RequirePackage{calc} +\newboolean{ALC@noend} +\setboolean{ALC@noend}{false} +\newcounter{ALC@line} +\newcounter{ALC@rem} +\newcounter{ALC@depth} +\newcounter{ALCPLUS@lastline} +\newlength{\ALC@tlm} +% +\DeclareOption{noend}{\setboolean{ALC@noend}{true}} +% +\ProcessOptions +% +% ALGORITHMIC +\newcommand{\algorithmiclnosize}{\small} +\newcommand{\algorithmiclnofont}{\tt} +\newcommand{\algorithmiclnodelimiter}{:} +% +\newcommand{\algorithmicrequire}{\textbf{Require:}} +\newcommand{\algorithmicensure}{\textbf{Ensure:}} +\newcommand{\algorithmiccomment}[1]{\{#1\}} +\newcommand{\algorithmicend}{\textbf{end}} +\newcommand{\algorithmicif}{\textbf{if}} +\newcommand{\algorithmicthen}{\textbf{then}} +\newcommand{\algorithmicelse}{\textbf{else}} +\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif} +\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif} +\newcommand{\algorithmicfor}{\textbf{for}} +\newcommand{\algorithmicforall}{\textbf{for all}} +\newcommand{\algorithmicdo}{\textbf{do}} +\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor} +\newcommand{\algorithmicwhile}{\textbf{while}} +\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile} +\newcommand{\algorithmicloop}{\textbf{loop}} +\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop} +\newcommand{\algorithmicrepeat}{\textbf{repeat}} +\newcommand{\algorithmicuntil}{\textbf{until}} +\def\ALC@item[#1]{% +\if@noparitem \@donoparitem + \else \if@inlabel \indent \par \fi + \ifhmode \unskip\unskip \par \fi + \if@newlist \if@nobreak \@nbitem \else + \addpenalty\@beginparpenalty + \addvspace\@topsep \addvspace{-\parskip}\fi + \else \addpenalty\@itempenalty \addvspace\itemsep + \fi + \global\@inlabeltrue +\fi +\everypar{\global\@minipagefalse\global\@newlistfalse + \if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels + \penalty\z@ \fi + \everypar{}}\global\@nobreakfalse +\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi +\sbox\@tempboxa{\makelabel{#1}}% +\global\setbox\@labels + \hbox{\unhbox\@labels \hskip \itemindent + \hskip -\labelwidth \hskip -\ALC@tlm + \ifdim \wd\@tempboxa >\labelwidth + \box\@tempboxa + \else \hbox to\labelwidth {\unhbox\@tempboxa}\fi + \hskip \ALC@tlm}\ignorespaces} +% +\newenvironment{algorithmic}[1][0]{ +\setcounter{ALC@depth}{\@listdepth}% +\let\@listdepth\c@ALC@depth% +\let\@item\ALC@item + \newcommand{\ALC@lno}{% +\ifthenelse{\equal{\arabic{ALC@rem}}{0}} +{{\algorithmiclnosize\algorithmiclnofont \arabic{ALC@line}\algorithmiclnodelimiter}}{}% +} +\let\@listii\@listi +\let\@listiii\@listi +\let\@listiv\@listi +\let\@listv\@listi +\let\@listvi\@listi +\let\@listvii\@listi + \newenvironment{ALC@g}{ + \begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@ + \listparindent\z@ \rightmargin\z@ + \topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@ + \leftmargin 1em + \addtolength{\ALC@tlm}{\leftmargin} + } + } + {\end{list}} + \newcommand{\ALC@it}{\refstepcounter{ALC@line}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item} + \newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}% +{}{\ \algorithmiccomment{##1}}} + \newcommand{\REQUIRE}{\item[\algorithmicrequire]} + \newcommand{\ENSURE}{\item[\algorithmicensure]} + \newcommand{\STATE}{\ALC@it} + \newcommand{\COMMENT}[1]{\algorithmiccomment{##1}} + \newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}} + \newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}} + \newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}} + \newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}} + \newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}} + \renewcommand{\\}{\@centercr} + \newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen% +\ALC@com{##1}\begin{ALC@if}} + \newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse% +\ALC@com{##1}\begin{ALC@if}} + \newcommand{\ELSIF}[2][default]% +{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen% +\ALC@com{##1}\begin{ALC@if}} + \newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo% +\ALC@com{##1}\begin{ALC@for}} + \newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ % +\algorithmicdo% +\ALC@com{##1}\begin{ALC@for}} + \newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ % +\algorithmicdo% +\ALC@com{##1}\begin{ALC@whl}} + \newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop% +\ALC@com{##1}\begin{ALC@loop}} + \newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat% +\ALC@com{##1}\begin{ALC@rpt}} + \newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1} + \ifthenelse{\boolean{ALC@noend}}{ + \newcommand{\ENDIF}{\end{ALC@if}} + \newcommand{\ENDFOR}{\end{ALC@for}} + \newcommand{\ENDWHILE}{\end{ALC@whl}} + \newcommand{\ENDLOOP}{\end{ALC@loop}} + }{ + \newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif} + \newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor} + \newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile} + \newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop} + } + \renewcommand{\@toodeep}{} + \begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}% + \itemsep\z@ \itemindent\z@ \listparindent\z@% + \partopsep\z@ \parskip\z@ \parsep\z@% + \labelsep 0.5em \topsep 0.2em% +\ifthenelse{\equal{#1}{0}} + {\labelwidth 0.5em } + {\labelwidth 1.2em } +\leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep} + \ALC@tlm\labelsep + } +} +{% +\setcounter{ALCPLUS@lastline}{\value{ALC@line}}% +\end{list}} + +\newcommand{\continuecounting}{\setcounter{ALC@line}{\value{ALCPLUS@lastline}}} +\newcommand{\startcounting}[1]{\setcounter{ALC@line}{#1}\addtocounter{ALC@line}{-1}} + +\newcommand{\EMPTY}{\item[]} +\newcommand{\SPACE}{\vspace{3mm}} +\newcommand{\SHORTSPACE}{\vspace{1mm}} +\newcommand{\newlinetag}[3]{\newcommand{#1}[#2]{\item[#3]}} +\newcommand{\newconstruct}[5]{% + \newenvironment{ALC@\string#1}{\begin{ALC@g}}{\end{ALC@g}} + \newcommand{#1}[2][default]{\ALC@it#2\ ##2\ #3% + \ALC@com{##1}\begin{ALC@\string#1}} + \ifthenelse{\boolean{ALC@noend}}{ + \newcommand{#4}{\end{ALC@\string#1}} + }{ + \newcommand{#4}{\end{ALC@\string#1}\ALC@it#5} + } +} + +\newconstruct{\INDENT}{}{}{\ENDINDENT}{} + +\newcommand{\setlinenosize}[1]{\renewcommand{\algorithmiclnosize}{#1}} +\newcommand{\setlinenofont}[1]{\renewcommand{\algorithmiclnofont}{#1}} diff --git a/spec/consensus/consensus-paper/conclusion.tex b/spec/consensus/consensus-paper/conclusion.tex new file mode 100644 index 000000000..dd17ccf44 --- /dev/null +++ b/spec/consensus/consensus-paper/conclusion.tex @@ -0,0 +1,16 @@ +\section{Conclusion} \label{sec:conclusion} + +We have proposed a new Byzantine-fault tolerant consensus algorithm that is the +core of the Tendermint BFT SMR platform. The algorithm is designed for the wide +area network with high number of mutually distrusted nodes that communicate +over gossip based peer-to-peer network. It has only a single mode of execution +and the communication pattern is very similar to the "normal" case of the +state-of-the art PBFT algorithm. The algorithm ensures termination with a novel +mechanism that takes advantage of the gossip based communication between nodes. +The proposed algorithm and the proofs are simple and elegant, and we believe +that this makes it easier to understand and implement correctly. + +\section*{Acknowledgment} + +We would like to thank Anton Kaliaev, Ismail Khoffi and Dahlia Malkhi for comments on an earlier version of the paper. We also want to thank Marko Vukolic, Ming Chuan Lin, Maria Potop-Butucaru, Sara Tucci, Antonella Del Pozzo and Yackolley Amoussou-Guenou for pointing out the liveness issues +in the previous version of the algorithm. Finally, we want to thank the Tendermint team members and all project contributors for making Tendermint such a great platform. diff --git a/spec/consensus/consensus-paper/consensus.tex b/spec/consensus/consensus-paper/consensus.tex new file mode 100644 index 000000000..3265b61c7 --- /dev/null +++ b/spec/consensus/consensus-paper/consensus.tex @@ -0,0 +1,397 @@ + +\section{Tendermint consensus algorithm} \label{sec:tendermint} + +\newcommand\Disseminate{\textbf{Disseminate}} + +\newcommand\Proposal{\mathsf{PROPOSAL}} +\newcommand\ProposalPart{\mathsf{PROPOSAL\mbox{-}PART}} +\newcommand\PrePrepare{\mathsf{INIT}} \newcommand\Prevote{\mathsf{PREVOTE}} +\newcommand\Precommit{\mathsf{PRECOMMIT}} +\newcommand\Decision{\mathsf{DECISION}} + +\newcommand\ViewChange{\mathsf{VC}} +\newcommand\ViewChangeAck{\mathsf{VC\mbox{-}ACK}} +\newcommand\NewPrePrepare{\mathsf{VC\mbox{-}INIT}} +\newcommand\coord{\mathsf{proposer}} + +\newcommand\newHeight{newHeight} \newcommand\newRound{newRound} +\newcommand\nil{nil} \newcommand\id{id} \newcommand{\propose}{propose} +\newcommand\prevote{prevote} \newcommand\prevoteWait{prevoteWait} +\newcommand\precommit{precommit} \newcommand\precommitWait{precommitWait} +\newcommand\commit{commit} + +\newcommand\timeoutPropose{timeoutPropose} +\newcommand\timeoutPrevote{timeoutPrevote} +\newcommand\timeoutPrecommit{timeoutPrecommit} +\newcommand\proofOfLocking{proof\mbox{-}of\mbox{-}locking} + +\begin{algorithm}[htb!] \def\baselinestretch{1} \scriptsize\raggedright + \begin{algorithmic}[1] + \SHORTSPACE + \INIT{} + \STATE $h_p := 0$ + \COMMENT{current height, or consensus instance we are currently executing} + \STATE $round_p := 0$ \COMMENT{current round number} + \STATE $step_p \in \set{\propose, \prevote, \precommit}$ + \STATE $decision_p[] := nil$ + \STATE $lockedValue_p := nil$ + \STATE $lockedRound_p := -1$ + \STATE $validValue_p := nil$ + \STATE $validRound_p := -1$ + \ENDINIT + \SHORTSPACE + \STATE \textbf{upon} start \textbf{do} $StartRound(0)$ + \SHORTSPACE + \FUNCTION{$StartRound(round)$} \label{line:tab:startRound} + \STATE $round_p \assign round$ + \STATE $step_p \assign \propose$ + \IF{$\coord(h_p, round_p) = p$} + \IF{$validValue_p \neq \nil$} \label{line:tab:isThereLockedValue} + \STATE $proposal \assign validValue_p$ \ELSE \STATE $proposal \assign + getValue()$ + \label{line:tab:getValidValue} + \ENDIF + \STATE \Broadcast\ $\li{\Proposal,h_p, round_p, proposal, validRound_p}$ + \label{line:tab:send-proposal} + \ELSE + \STATE \textbf{schedule} $OnTimeoutPropose(h_p, + round_p)$ to be executed \textbf{after} $\timeoutPropose(round_p)$ + \ENDIF + \ENDFUNCTION + + \SPACE + \UPON{$\li{\Proposal,h_p,round_p, v, -1}$ \From\ $\coord(h_p,round_p)$ + \With\ $step_p = \propose$} \label{line:tab:recvProposal} + \IF{$valid(v) \wedge (lockedRound_p = -1 \vee lockedValue_p = v$)} + \label{line:tab:accept-proposal-2} + \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$ + \label{line:tab:prevote-proposal} + \ELSE + \label{line:tab:acceptProposal1} + \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$ + \label{line:tab:prevote-nil} + \ENDIF + \STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote1} + \ENDUPON + + \SPACE + \UPON{$\li{\Proposal,h_p,round_p, v, vr}$ \From\ $\coord(h_p,round_p)$ + \textbf{AND} $2f+1$ $\li{\Prevote,h_p, vr,id(v)}$ \With\ $step_p = \propose \wedge (vr \ge 0 \wedge vr < round_p)$} + \label{line:tab:acceptProposal} + \IF{$valid(v) \wedge (lockedRound_p \le vr + \vee lockedValue_p = v)$} \label{line:tab:cond-prevote-higher-proposal} + \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,id(v)}$ + \label{line:tab:prevote-higher-proposal} + \ELSE + \label{line:tab:acceptProposal2} + \STATE \Broadcast \ $\li{\Prevote,h_p,round_p,\nil}$ + \label{line:tab:prevote-nil2} + \ENDIF + \STATE $step_p \assign \prevote$ \label{line:tab:setStateToPrevote3} + \ENDUPON + + \SPACE + \UPON{$2f+1$ $\li{\Prevote,h_p, round_p,*}$ \With\ $step_p = \prevote$ for the first time} + \label{line:tab:recvAny2/3Prevote} + \STATE \textbf{schedule} $OnTimeoutPrevote(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrevote(round_p)$ \label{line:tab:timeoutPrevote} + \ENDUPON + + \SPACE + \UPON{$\li{\Proposal,h_p,round_p, v, *}$ \From\ $\coord(h_p,round_p)$ + \textbf{AND} $2f+1$ $\li{\Prevote,h_p, round_p,id(v)}$ \With\ $valid(v) \wedge step_p \ge \prevote$ for the first time} + \label{line:tab:recvPrevote} + \IF{$step_p = \prevote$} + \STATE $lockedValue_p \assign v$ \label{line:tab:setLockedValue} + \STATE $lockedRound_p \assign round_p$ \label{line:tab:setLockedRound} + \STATE \Broadcast \ $\li{\Precommit,h_p,round_p,id(v))}$ + \label{line:tab:precommit-v} + \STATE $step_p \assign \precommit$ \label{line:tab:setStateToCommit} + \ENDIF + \STATE $validValue_p \assign v$ \label{line:tab:setValidRound} + \STATE $validRound_p \assign round_p$ \label{line:tab:setValidValue} + \ENDUPON + + \SHORTSPACE + \UPON{$2f+1$ $\li{\Prevote,h_p,round_p, \nil}$ + \With\ $step_p = \prevote$} + \STATE \Broadcast \ $\li{\Precommit,h_p,round_p, \nil}$ + \label{line:tab:precommit-v-1} + \STATE $step_p \assign \precommit$ + \ENDUPON + + \SPACE + \UPON{$2f+1$ $\li{\Precommit,h_p,round_p,*}$ for the first time} + \label{line:tab:startTimeoutPrecommit} + \STATE \textbf{schedule} $OnTimeoutPrecommit(h_p, round_p)$ to be executed \textbf{after} $\timeoutPrecommit(round_p)$ + + \ENDUPON + + \SPACE + \UPON{$\li{\Proposal,h_p,r, v, *}$ \From\ $\coord(h_p,r)$ \textbf{AND} + $2f+1$ $\li{\Precommit,h_p,r,id(v)}$ \With\ $decision_p[h_p] = \nil$} + \label{line:tab:onDecideRule} + \IF{$valid(v)$} \label{line:tab:validDecisionValue} + \STATE $decision_p[h_p] = v$ \label{line:tab:decide} + \STATE$h_p \assign h_p + 1$ \label{line:tab:increaseHeight} + \STATE reset $lockedRound_p$, $lockedValue_p$, $validRound_p$ and $validValue_p$ to initial values + and empty message log + \STATE $StartRound(0)$ + \ENDIF + \ENDUPON + + \SHORTSPACE + \UPON{$f+1$ $\li{*,h_p,round, *, *}$ \textbf{with} $round > round_p$} + \label{line:tab:skipRounds} + \STATE $StartRound(round)$ \label{line:tab:nextRound2} + \ENDUPON + + \SHORTSPACE + \FUNCTION{$OnTimeoutPropose(height,round)$} \label{line:tab:onTimeoutPropose} + \IF{$height = h_p \wedge round = round_p \wedge step_p = \propose$} + \STATE \Broadcast \ $\li{\Prevote,h_p,round_p, \nil}$ + \label{line:tab:prevote-nil-on-timeout} + \STATE $step_p \assign \prevote$ + \ENDIF + \ENDFUNCTION + + \SHORTSPACE + \FUNCTION{$OnTimeoutPrevote(height,round)$} \label{line:tab:onTimeoutPrevote} + \IF{$height = h_p \wedge round = round_p \wedge step_p = \prevote$} + \STATE \Broadcast \ $\li{\Precommit,h_p,round_p,\nil}$ + \label{line:tab:precommit-nil-onTimeout} + \STATE $step_p \assign \precommit$ + \ENDIF + \ENDFUNCTION + + \SHORTSPACE + \FUNCTION{$OnTimeoutPrecommit(height,round)$} \label{line:tab:onTimeoutPrecommit} + \IF{$height = h_p \wedge round = round_p$} + \STATE $StartRound(round_p + 1)$ \label{line:tab:nextRound} + \ENDIF + \ENDFUNCTION + \end{algorithmic} \caption{Tendermint consensus algorithm} + \label{alg:tendermint} +\end{algorithm} + +In this section we present the Tendermint Byzantine fault-tolerant consensus +algorithm. The algorithm is specified by the pseudo-code shown in +Algorithm~\ref{alg:tendermint}. We present the algorithm as a set of \emph{upon +rules} that are executed atomically\footnote{In case several rules are active +at the same time, the first rule to be executed is picked randomly. The +correctness of the algorithm does not depend on the order in which rules are +executed.}. We assume that processes exchange protocol messages using a gossip +protocol and that both sent and received messages are stored in a local message +log for every process. An upon rule is triggered once the message log contains +messages such that the corresponding condition evaluates to $\tt{true}$. The +condition that assumes reception of $X$ messages of a particular type and +content denotes reception of messages whose senders have aggregate voting power at +least equal to $X$. For example, the condition $2f+1$ $\li{\Precommit,h_p,r,id(v)}$, +evaluates to true upon reception of $\Precommit$ messages for height $h_p$, +a round $r$ and with value equal to $id(v)$ whose senders have aggregate voting +power at least equal to $2f+1$. Some of the rules ends with "for the first time" constraint +to denote that it is triggered only the first time a corresponding condition evaluates +to $\tt{true}$. This is because those rules do not always change the state of algorithm +variables so without this constraint, the algorithm could keep +executing those rules forever. The variables with index $p$ are process local state +variables, while variables without index $p$ are value placeholders. The sign +$*$ denotes any value. + +We denote with $n$ the total voting power of processes in the system, and we +assume that the total voting power of faulty processes in the system is bounded +with a system parameter $f$. The algorithm assumes that $n > 3f$, i.e., it +requires that the total voting power of faulty processes is smaller than one +third of the total voting power. For simplicity we present the algorithm for +the case $n = 3f + 1$. + +The algorithm proceeds in rounds, where each round has a dedicated +\emph{proposer}. The mapping of rounds to proposers is known to all processes +and is given as a function $\coord(h, round)$, returning the proposer for +the round $round$ in the consensus instance $h$. We +assume that the proposer selection function is weighted round-robin, where +processes are rotated proportional to their voting power\footnote{A validator +with more voting power is selected more frequently, proportional to its power. +More precisely, during a sequence of rounds of size $n$, every process is +proposer in a number of rounds equal to its voting power.}. +The internal protocol state transitions are triggered by message reception and +by expiration of timeouts. There are three timeouts in Algorithm \ref{alg:tendermint}: +$\timeoutPropose$, $\timeoutPrevote$ and $\timeoutPrecommit$. +The timeouts prevent the algorithm from blocking and +waiting forever for some condition to be true, ensure that processes continuously +transition between rounds, and guarantee that eventually (after GST) communication +between correct processes is timely and reliable so they can decide. +The last role is achieved by increasing the timeouts with every new round $r$, +i.e, $timeoutX(r) = initTimeoutX + r*timeoutDelta$; +they are reset for every new height (consensus +instance). + +Processes exchange the following messages in Tendermint: $\Proposal$, +$\Prevote$ and $\Precommit$. The $\Proposal$ message is used by the proposer of +the current round to suggest a potential decision value, while $\Prevote$ and +$\Precommit$ are votes for a proposed value. According to the classification of +consensus algorithms from \cite{RMS10:dsn}, Tendermint, like PBFT +\cite{CL02:tcs} and DLS \cite{DLS88:jacm}, belongs to class 3, so it requires +two voting steps (three communication exchanges in total) to decide a value. +The Tendermint consensus algorithm is designed for the blockchain context where +the value to decide is a block of transactions (ie. it is potentially quite +large, consisting of many transactions). Therefore, in the Algorithm +\ref{alg:tendermint} (similar as in \cite{CL02:tcs}) we are explicit about +sending a value (block of transactions) and a small, constant size value id (a +unique value identifier, normally a hash of the value, i.e., if $\id(v) = +\id(v')$, then $v=v'$). The $\Proposal$ message is the only one carrying the +value; $\Prevote$ and $\Precommit$ messages carry the value id. A correct +process decides on a value $v$ in Tendermint upon receiving the $\Proposal$ for +$v$ and $2f+1$ voting-power equivalent $\Precommit$ messages for $\id(v)$ in +some round $r$. In order to send $\Precommit$ message for $v$ in a round $r$, a +correct process waits to receive the $\Proposal$ and $2f+1$ of the +corresponding $\Prevote$ messages in the round $r$. Otherwise, +it sends $\Precommit$ message with a special $\nil$ value. +This ensures that correct processes can $\Precommit$ only a +single value (or $\nil$) in a round. As +proposers may be faulty, the proposed value is treated by correct processes as +a suggestion (it is not blindly accepted), and a correct process tells others +if it accepted the $\Proposal$ for value $v$ by sending $\Prevote$ message for +$\id(v)$; otherwise it sends $\Prevote$ message with the special $\nil$ value. + +Every process maintains the following variables in the Algorithm +\ref{alg:tendermint}: $step$, $lockedValue$, $lockedRound$, $validValue$ and +$validRound$. The $step$ denotes the current state of the internal Tendermint +state machine, i.e., it reflects the stage of the algorithm execution in the +current round. The $lockedValue$ stores the most recent value (with respect to +a round number) for which a $\Precommit$ message has been sent. The +$lockedRound$ is the last round in which the process sent a $\Precommit$ +message that is not $\nil$. We also say that a correct process locks a value +$v$ in a round $r$ by setting $lockedValue = v$ and $lockedRound = r$ before +sending $\Precommit$ message for $\id(v)$. As a correct process can decide a +value $v$ only if $2f+1$ $\Precommit$ messages for $\id(v)$ are received, this +implies that a possible decision value is a value that is locked by at least +$f+1$ voting power equivalent of correct processes. Therefore, any value $v$ +for which $\Proposal$ and $2f+1$ of the corresponding $\Prevote$ messages are +received in some round $r$ is a \emph{possible decision} value. The role of the +$validValue$ variable is to store the most recent possible decision value; the +$validRound$ is the last round in which $validValue$ is updated. Apart from +those variables, a process also stores the current consensus instance ($h_p$, +called \emph{height} in Tendermint), and the current round number ($round_p$) +and attaches them to every message. Finally, a process also stores an array of +decisions, $decision_p$ (Tendermint assumes a sequence of consensus instances, +one for each height). + +Every round starts by a proposer suggesting a value with the $\Proposal$ +message (see line \ref{line:tab:send-proposal}). In the initial round of each +height, the proposer is free to chose the value to suggest. In the +Algorithm~\ref{alg:tendermint}, a correct process obtains a value to propose +using an external function $getValue()$ that returns a valid value to +propose. In the following rounds, a correct proposer will suggest a new value +only if $validValue = \nil$; otherwise $validValue$ is proposed (see +lines~\ref{line:tab:isThereLockedValue}-\ref{line:tab:getValidValue}). +In addition to the value proposed, the $\Proposal$ message also +contains the $validRound$ so other processes are informed about the last round +in which the proposer observed $validValue$ as a possible decision value. +Note that if a correct proposer $p$ sends $validValue$ with the $validRound$ in the +$\Proposal$, this implies that the process $p$ received $\Proposal$ and the +corresponding $2f+1$ $\Prevote$ messages for $validValue$ in the round +$validRound$. +If a correct process sends $\Proposal$ message with $validValue$ ($validRound > -1$) +at time $t > GST$, by the \emph{Gossip communication} property, the +corresponding $\Proposal$ and the $\Prevote$ messages will be received by all +correct processes before time $t+\Delta$. Therefore, all correct processes will +be able to verify the correctness of the suggested value as it is supported by +the $\Proposal$ and the corresponding $2f+1$ voting power equivalent $\Prevote$ +messages. + +A correct process $p$ accepts the proposal for a value $v$ (send $\Prevote$ +for $id(v)$) if an external \emph{valid} function returns $true$ for the value +$v$, and if $p$ hasn't locked any value ($lockedRound = -1$) or $p$ has locked +the value $v$ ($lockedValue = v$); see the line +\ref{line:tab:accept-proposal-2}. In case the proposed pair is $(v,vr \ge 0)$ and a +correct process $p$ has locked some value, it will accept +$v$ if it is a more recent possible decision value\footnote{As +explained above, the possible decision value in a round $r$ is the one for +which $\Proposal$ and the corresponding $2f+1$ $\Prevote$ messages are received +for the round $r$.}, $vr > lockedRound_p$, or if $lockedValue = v$ +(see line~\ref{line:tab:cond-prevote-higher-proposal}). Otherwise, a correct +process will reject the proposal by sending $\Prevote$ message with $\nil$ +value. A correct process will send $\Prevote$ message with $\nil$ value also in +case $\timeoutPropose$ expired (it is triggered when a correct process starts a +new round) and a process has not sent $\Prevote$ message in the current round +yet (see the line \ref{line:tab:onTimeoutPropose}). + +If a correct process receives $\Proposal$ message for some value $v$ and $2f+1$ +$\Prevote$ messages for $\id(v)$, then it sends $\Precommit$ message with +$\id(v)$. Otherwise, it sends $\Precommit$ $\nil$. A correct process will send +$\Precommit$ message with $\nil$ value also in case $\timeoutPrevote$ expired +(it is started when a correct process sent $\Prevote$ message and received any +$2f+1$ $\Prevote$ messages) and a process has not sent $\Precommit$ message in +the current round yet (see the line \ref{line:tab:onTimeoutPrecommit}). A +correct process decides on some value $v$ if it receives in some round $r$ +$\Proposal$ message for $v$ and $2f+1$ $\Precommit$ messages with $\id(v)$ (see +the line \ref{line:tab:decide}). To prevent the algorithm from blocking and +waiting forever for this condition to be true, the Algorithm +\ref{alg:tendermint} relies on $\timeoutPrecommit$. It is triggered after a +process receives any set of $2f+1$ $\Precommit$ messages for the current round. +If the $\timeoutPrecommit$ expires and a process has not decided yet, the +process starts the next round (see the line \ref{line:tab:onTimeoutPrecommit}). +When a correct process $p$ decides, it starts the next consensus instance +(for the next height). The \emph{Gossip communication} property ensures +that $\Proposal$ and $2f+1$ $\Prevote$ messages that led $p$ to decide +are eventually received by all correct processes, so they will also decide. + +\subsection{Termination mechanism} + +Tendermint ensures termination by a novel mechanism that benefits from the +gossip based nature of communication (see \emph{Gossip communication} +property). It requires managing two additional variables, $validValue$ and +$validRound$ that are then used by the proposer during the propose step as +explained above. The $validValue$ and $validRound$ are updated to $v$ and $r$ +by a correct process in a round $r$ when the process receives valid $\Proposal$ +message for the value $v$ and the corresponding $2f+1$ $\Prevote$ messages for +$id(v)$ in the round $r$ (see the rule at line~\ref{line:tab:recvPrevote}). + +We now give briefly the intuition how managing and proposing $validValue$ +and $validRound$ ensures termination. Formal treatment is left for +Section~\ref{sec:proof}. + +The first thing to note is that during good period, because of the +\emph{Gossip communication} property, if a correct process $p$ locks a value +$v$ in some round $r$, all correct processes will update $validValue$ to $v$ +and $validRound$ to $r$ before the end of the round $r$ (we prove this formally +in the Section~\ref{sec:proof}). The intuition is that messages that led to $p$ +locking a value $v$ in the round $r$ will be gossiped to all correct processes +before the end of the round $r$, so it will update $validValue$ and +$validRound$ (the line~\ref{line:tab:recvPrevote}). Therefore, if a correct +process locks some value during good period, $validValue$ and $validRound$ are +updated by all correct processes so that the value proposed in the following +rounds will be acceptable by all correct processes. Note +that it could happen that during good period, no correct process locks a value, +but some correct process $q$ updates $validValue$ and $validRound$ during some +round. As no correct process locks a value in this case, $validValue_q$ and +$validRound_q$ will also be acceptable by all correct processes as +$validRound_q > lockedRound_c$ for every correct process $c$ and as the +\emph{Gossip communication} property ensures that the corresponding $\Prevote$ +messages that $q$ received in the round $validRound_q$ are received by all +correct processes $\Delta$ time later. + +Finally, it could happen that after GST, there is a long sequence of rounds in which +no correct process neither locks a value nor update $validValue$ and $validRound$. +In this case, during this sequence of rounds, the proposed value suggested by correct +processes was not accepted by all correct processes. Note that this sequence of rounds +is always finite as at the beginning of every +round there is at least a single correct process $c$ such that $validValue_c$ +and $validRound_c$ are acceptable by every correct process. This is true as +there exists a correct process $c$ such that for every other correct process +$p$, $validRound_c > lockedRound_p$ or $validValue_c = lockedValue_p$. This is +true as $c$ is the process that has locked a value in the most recent round +among all correct processes (or no correct process locked any value). Therefore, +eventually $c$ will be the proper in some round and the proposed value will be accepted +by all correct processes, terminating therefore this sequence of +rounds. + +Therefore, updating $validValue$ and $validRound$ variables, and the +\emph{Gossip communication} property, together ensures that eventually, during +the good period, there exists a round with a correct proposer whose proposed +value will be accepted by all correct processes, and all correct processes will +terminate in that round. Note that this mechanism, contrary to the common +termination mechanism illustrated in the +Figure~\ref{ch3:fig:coordinator-change}, does not require exchanging any +additional information in addition to messages already sent as part of what is +normally being called "normal" case. + diff --git a/spec/consensus/consensus-paper/definitions.tex b/spec/consensus/consensus-paper/definitions.tex new file mode 100644 index 000000000..454dd445d --- /dev/null +++ b/spec/consensus/consensus-paper/definitions.tex @@ -0,0 +1,126 @@ +\section{Definitions} \label{sec:definitions} + +\subsection{Model} + +We consider a system of processes that communicate by exchanging messages. +Processes can be correct or faulty, where a faulty process can behave in an +arbitrary way, i.e., we consider Byzantine faults. We assume that each process +has some amount of voting power (voting power of a process can be $0$). +Processes in our model are not part of a single administrative domain; +therefore we cannot enforce a direct network connectivity between all +processes. Instead, we assume that each process is connected to a subset of +processes called peers, such that there is an indirect communication channel +between all correct processes. Communication between processes is established +using a gossip protocol \cite{Dem1987:gossip}. + +Formally, we model the network communication using a variant of the \emph{partially +synchronous system model}~\cite{DLS88:jacm}: in all executions of the system +there is a bound $\Delta$ and an instant GST (Global Stabilization Time) such +that all communication among correct processes after GST is reliable and +$\Delta$-timely, i.e., if a correct process $p$ sends message $m$ at time $t +\ge GST$ to a correct process $q$, then $q$ will receive $m$ before $t + +\Delta$\footnote{Note that as we do not assume direct communication channels + among all correct processes, this implies that before the message $m$ + reaches $q$, it might pass through a number of correct processes that will +forward the message $m$ using gossip protocol towards $q$.}. +In addition to the standard \emph{partially + synchronous system model}~\cite{DLS88:jacm}, we assume an auxiliary property +that captures gossip-based nature of communication\footnote{The details of the Tendermint gossip protocol will be discussed in a separate + technical report. }: + + +\begin{itemize} \item \emph{Gossip communication:} If a correct process $p$ + sends some message $m$ at time $t$, all correct processes will receive + $m$ before $max\{t, GST\} + \Delta$. Furthermore, if a correct process $p$ + receives some message $m$ at time $t$, all correct processes will receive + $m$ before $max\{t, GST\} + \Delta$. \end{itemize} + + +The bound $\Delta$ and GST are system +parameters whose values are not required to be known for the safety of our +algorithm. Termination of the algorithm is guaranteed within a bounded duration +after GST. In practice, the algorithm will work correctly in the slightly +weaker variant of the model where the system alternates between (long enough) +good periods (corresponds to the \emph{after} GST period where system is +reliable and $\Delta$-timely) and bad periods (corresponds to the period +\emph{before} GST during which the system is asynchronous and messages can be +lost), but consideration of the GST model simplifies the discussion. + +We assume that process steps (which might include sending and receiving +messages) take zero time. Processes are equipped with clocks so they can +measure local timeouts. +Spoofing/impersonation attacks are assumed to be impossible at all times due to +the use of public-key cryptography, i.e., we assume that all protocol messages contains a digital signature. +Therefore, when a correct +process $q$ receives a signed message $m$ from its peer, the process $q$ can +verify who was the original sender of the message $m$ and if the message signature is valid. +We do not explicitly state a signature verification step in the pseudo-code of the algorithm to improve readability; +we assume that only messages with the valid signature are considered at that level (and messages with invalid signatures +are dropped). + + + +%Messages that are being gossiped are created by the consensus layer. We can + %think about consensus protocol as a content creator, which %defines what + %messages should be disseminated using the gossip protocol. A correct + %process creates the message for dissemination either i) %explicitly, by + %invoking \emph{send} function as part of the consensus protocol or ii) + %implicitly, by receiving a message from some other %process. Note that in + %the case ii) gossiping of messages is implicit, i.e., it happens without + %explicit send clause in the consensus algorithm %whenever a correct + %process receives some messages in the consensus algorithm\footnote{If a + %message is received by a correct process at %the consensus level then it + %is considered valid from the protocol point of view, i.e., it has a + %correct signature, a proper message structure %and a valid height and + %round number.}. + +%\item Processes keep resending messages (in case of failures or message loss) + %until all its peers get them. This ensures that every message %sent or + %received by a correct process is eventually received by all correct + %processes. + +\subsection{State Machine Replication} + +State machine replication (SMR) is a general approach for replicating services +modeled as a deterministic state machine~\cite{Lam78:cacm,Sch90:survey}. The +key idea of this approach is to guarantee that all replicas start in the same +state and then apply requests from clients in the same order, thereby +guaranteeing that the replicas' states will not diverge. Following +Schneider~\cite{Sch90:survey}, we note that the following is key for +implementing a replicated state machine tolerant to (Byzantine) faults: + +\begin{itemize} \item \emph{Replica Coordination.} All [non-faulty] replicas + receive and process the same sequence of requests. \end{itemize} + +Moreover, as Schneider also notes, this property can be decomposed into two +parts, \emph{Agreement} and \emph{Order}: Agreement requires all (non-faulty) +replicas to receive all requests, and Order requires that the order of received +requests is the same at all replicas. + +There is an additional requirement that needs to be ensured by Byzantine +tolerant state machine replication: only requests (called transactions in the +Tendermint terminology) proposed by clients are executed. In Tendermint, +transaction verification is the responsibility of the service that is being +replicated; upon receiving a transaction from the client, the Tendermint +process will ask the service if the request is valid, and only valid requests +will be processed. + + \subsection{Consensus} \label{sec:consensus} + +Tendermint solves state machine replication by sequentially executing consensus +instances to agree on each block of transactions that are +then executed by the service being replicated. We consider a variant of the +Byzantine consensus problem called Validity Predicate-based Byzantine consensus +that is motivated by blockchain systems~\cite{GLR17:red-belly-bc}. The problem +is defined by an agreement, a termination, and a validity property. + + \begin{itemize} \item \emph{Agreement:} No two correct processes decide on + different values. \item \emph{Termination:} All correct processes + eventually decide on a value. \item \emph{Validity:} A decided value + is valid, i.e., it satisfies the predefined predicate denoted + \emph{valid()}. \end{itemize} + + This variant of the Byzantine consensus problem has an application-specific + \emph{valid()} predicate to indicate whether a value is valid. In the context + of blockchain systems, for example, a value is not valid if it does not + contain an appropriate hash of the last value (block) added to the blockchain. diff --git a/spec/consensus/consensus-paper/homodel.sty b/spec/consensus/consensus-paper/homodel.sty new file mode 100644 index 000000000..19f83e926 --- /dev/null +++ b/spec/consensus/consensus-paper/homodel.sty @@ -0,0 +1,32 @@ +\newcommand{\NC}{\mbox{\it NC}} +\newcommand{\HO}{\mbox{\it HO}} +\newcommand{\AS}{\mbox{\it AS}} +\newcommand{\SK}{\mbox{\it SK}} +\newcommand{\SHO}{\mbox{\it SHO}} +\newcommand{\AHO}{\mbox{\it AHO}} +\newcommand{\CONS}{\mbox{\it CONS}} +\newcommand{\K}{\mbox{\it K}} + +\newcommand{\Alg}{\mathcal{A}} +\newcommand{\Pred}{\mathcal{P}} +\newcommand{\Spr}{S_p^r} +\newcommand{\Tpr}{T_p^r} +\newcommand{\mupr}{\vec{\mu}_p^{\,r}} + +\newcommand{\MSpr}{S_p^{\rho}} +\newcommand{\MTpr}{T_p^{\rho}} + + + +\newconstruct{\SEND}{$\Spr$:}{}{\ENDSEND}{} +\newconstruct{\TRAN}{$\Tpr$:}{}{\ENDTRAN}{} +\newconstruct{\ROUND}{\textbf{Round}}{\!\textbf{:}}{\ENDROUND}{} +\newconstruct{\VARIABLES}{\textbf{Variables:}}{}{\ENDVARIABLES}{} +\newconstruct{\INIT}{\textbf{Initialization:}}{}{\ENDINIT}{} + +\newconstruct{\MSEND}{$\MSpr$:}{}{\ENDMSEND}{} +\newconstruct{\MTRAN}{$\MTpr$:}{}{\ENDMTRAN}{} + +\newconstruct{\SROUND}{\textbf{Selection Round}}{\!\textbf{:}}{\ENDSROUND}{} +\newconstruct{\VROUND}{\textbf{Validation Round}}{\!\textbf{:}}{\ENDVROUND}{} +\newconstruct{\DROUND}{\textbf{Decision Round}}{\!\textbf{:}}{\ENDDROUND}{} diff --git a/spec/consensus/consensus-paper/intro.tex b/spec/consensus/consensus-paper/intro.tex new file mode 100644 index 000000000..493b509e9 --- /dev/null +++ b/spec/consensus/consensus-paper/intro.tex @@ -0,0 +1,138 @@ +\section{Introduction} \label{sec:tendermint} + +Consensus is a fundamental problem in distributed computing. It +is important because of it's role in State Machine Replication (SMR), a generic +approach for replicating services that can be modeled as a deterministic state +machine~\cite{Lam78:cacm, Sch90:survey}. The key idea of this approach is that +service replicas start in the same initial state, and then execute requests +(also called transactions) in the same order; thereby guaranteeing that +replicas stay in sync with each other. The role of consensus in the SMR +approach is ensuring that all replicas receive transactions in the same order. +Traditionally, deployments of SMR based systems are in data-center settings +(local area network), have a small number of replicas (three to seven) and are +typically part of a single administration domain (e.g., Chubby +\cite{Bur:osdi06}); therefore they handle benign (crash) failures only, as more +general forms of failure (in particular, malicious or Byzantine faults) are +considered to occur with only negligible probability. + +The success of cryptocurrencies and blockchain systems in recent years (e.g., +\cite{Nak2012:bitcoin, But2014:ethereum}) pose a whole new set of challenges on +the design and deployment of SMR based systems: reaching agreement over wide +area network, among large number of nodes (hundreds or thousands) that are not +part of the same administrative domain, and where a subset of nodes can behave +maliciously (Byzantine faults). Furthermore, contrary to the previous +data-center deployments where nodes are fully connected to each other, in +blockchain systems, a node is only connected to a subset of other nodes, so +communication is achieved by gossip-based peer-to-peer protocols. +The new requirements demand designs and algorithms that are not necessarily +present in the classical academic literature on Byzantine fault tolerant +consensus (or SMR) systems (e.g., \cite{DLS88:jacm, CL02:tcs}) as the primary +focus was different setup. + +In this paper we describe a novel Byzantine-fault tolerant consensus algorithm +that is the core of the BFT SMR platform called Tendermint\footnote{The + Tendermint platform is available open source at + https://github.com/tendermint/tendermint.}. The Tendermint platform consists of +a high-performance BFT SMR implementation written in Go, a flexible interface +for +building arbitrary deterministic applications above the consensus, and a suite +of tools for deployment and management. + +The Tendermint consensus algorithm is inspired by the PBFT SMR +algorithm~\cite{CL99:osdi} and the DLS algorithm for authenticated faults (the +Algorithm 2 from \cite{DLS88:jacm}). Similar to DLS algorithm, Tendermint +proceeds in +rounds\footnote{Tendermint is not presented in the basic round model of + \cite{DLS88:jacm}. Furthermore, we use the term round differently than in + \cite{DLS88:jacm}; in Tendermint a round denotes a sequence of communication + steps instead of a single communication step in \cite{DLS88:jacm}.}, where each +round has a dedicated proposer (also called coordinator or +leader) and a process proceeds to a new round as part of normal +processing (not only in case the proposer is faulty or suspected as being faulty +by enough processes as in PBFT). +The communication pattern of each round is very similar to the "normal" case +of PBFT. Therefore, in preferable conditions (correct proposer, timely and +reliable communication between correct processes), Tendermint decides in three +communication steps (the same as PBFT). + +The major novelty and contribution of the Tendermint consensus algorithm is a +new termination mechanism. As explained in \cite{MHS09:opodis, RMS10:dsn}, the +existing BFT consensus (and SMR) algorithms for the partially synchronous +system model (for example PBFT~\cite{CL99:osdi}, \cite{DLS88:jacm}, +\cite{MA06:tdsc}) typically relies on the communication pattern illustrated in +Figure~\ref{ch3:fig:coordinator-change} for termination. The +Figure~\ref{ch3:fig:coordinator-change} illustrates messages exchanged during +the proposer change when processes start a new round\footnote{There is no + consistent terminology in the distributed computing terminology on naming + sequence of communication steps that corresponds to a logical unit. It is + sometimes called a round, phase or a view.}. It guarantees that eventually (ie. +after some Global Stabilization Time, GST), there exists a round with a correct +proposer that will bring the system into a univalent configuration. +Intuitively, in a round in which the proposed value is accepted +by all correct processes, and communication between correct processes is +timely and reliable, all correct processes decide. + + +\begin{figure}[tbh!] \def\rdstretch{5} \def\ystretch{3} \centering + \begin{rounddiag}{4}{2} \round{1}{~} \rdmessage{1}{1}{$v_1$} + \rdmessage{2}{1}{$v_2$} \rdmessage{3}{1}{$v_3$} \rdmessage{4}{1}{$v_4$} + \round{2}{~} \rdmessage{1}{1}{$x, [v_{1..4}]$} + \rdmessage{1}{2}{$~~~~~~x, [v_{1..4}]$} \rdmessage{1}{3}{$~~~~~~~~x, + [v_{1..4}]$} \rdmessage{1}{4}{$~~~~~~~x, [v_{1..4}]$} \end{rounddiag} + \vspace{-5mm} \caption{\boldmath Proposer (coordinator) change: $p_1$ is the + new proposer.} \label{ch3:fig:coordinator-change} \end{figure} + +To ensure that a proposed value is accepted by all correct +processes\footnote{The proposed value is not blindly accepted by correct + processes in BFT algorithms. A correct process always verifies if the proposed + value is safe to be accepted so that safety properties of consensus are not + violated.} +a proposer will 1) build the global state by receiving messages from other +processes, 2) select the safe value to propose and 3) send the selected value +together with the signed messages +received in the first step to support it. The +value $v_i$ that a correct process sends to the next proposer normally +corresponds to a value the process considers as acceptable for a decision: + +\begin{itemize} \item in PBFT~\cite{CL99:osdi} and DLS~\cite{DLS88:jacm} it is + not the value itself but a set of $2f+1$ signed messages with the same + value id, \item in Fast Byzantine Paxos~\cite{MA06:tdsc} the value + itself is being sent. \end{itemize} + +In both cases, using this mechanism in our system model (ie. high +number of nodes over gossip based network) would have high communication +complexity that increases with the number of processes: in the first case as +the message sent depends on the total number of processes, and in the second +case as the value (block of transactions) is sent by each process. The set of +messages received in the first step are normally piggybacked on the proposal +message (in the Figure~\ref{ch3:fig:coordinator-change} denoted with +$[v_{1..4}]$) to justify the choice of the selected value $x$. Note that +sending this message also does not scale with the number of processes in the +system. + +We designed a novel termination mechanism for Tendermint that better suits the +system model we consider. It does not require additional communication (neither +sending new messages nor piggybacking information on the existing messages) and +it is fully based on the communication pattern that is very similar to the +normal case in PBFT \cite{CL99:osdi}. Therefore, there is only a single mode of +execution in Tendermint, i.e., there is no separation between the normal and +the recovery mode, which is the case in other PBFT-like protocols (e.g., +\cite{CL99:osdi}, \cite{Ver09:spinning} or \cite{Cle09:aardvark}). We believe +this makes Tendermint simpler to understand and implement correctly. + +Note that the orthogonal approach for reducing message complexity in order to +improve +scalability and decentralization (number of processes) of BFT consensus +algorithms is using advanced cryptography (for example Boneh-Lynn-Shacham (BLS) +signatures \cite{BLS2001:crypto}) as done for example in SBFT +\cite{Gue2018:sbft}. + +The remainder of the paper is as follows: Section~\ref{sec:definitions} defines +the system model and gives the problem definitions. Tendermint +consensus algorithm is presented in Section~\ref{sec:tendermint} and the +proofs are given in Section~\ref{sec:proof}. We conclude in +Section~\ref{sec:conclusion}. + + + + diff --git a/spec/consensus/consensus-paper/latex8.bst b/spec/consensus/consensus-paper/latex8.bst new file mode 100644 index 000000000..2c7af5647 --- /dev/null +++ b/spec/consensus/consensus-paper/latex8.bst @@ -0,0 +1,1124 @@ + +% --------------------------------------------------------------- +% +% $Id: latex8.bst,v 1.1 1995/09/15 15:13:49 ienne Exp $ +% +% by Paolo.Ienne@di.epfl.ch +% + +% --------------------------------------------------------------- +% +% no guarantee is given that the format corresponds perfectly to +% IEEE 8.5" x 11" Proceedings, but most features should be ok. +% +% --------------------------------------------------------------- +% +% `latex8' from BibTeX standard bibliography style `abbrv' +% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. +% Copyright (C) 1985, all rights reserved. +% Copying of this file is authorized only if either +% (1) you make absolutely no changes to your copy, including name, or +% (2) if you do make changes, you name it something other than +% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. +% This restriction helps ensure that all standard styles are identical. +% The file btxbst.doc has the documentation for this style. + +ENTRY + { address + author + booktitle + chapter + edition + editor + howpublished + institution + journal + key + month + note + number + organization + pages + publisher + school + series + title + type + volume + year + } + {} + { label } + +INTEGERS { output.state before.all mid.sentence after.sentence after.block } + +FUNCTION {init.state.consts} +{ #0 'before.all := + #1 'mid.sentence := + #2 'after.sentence := + #3 'after.block := +} + +STRINGS { s t } + +FUNCTION {output.nonnull} +{ 's := + output.state mid.sentence = + { ", " * write$ } + { output.state after.block = + { add.period$ write$ + newline$ + "\newblock " write$ + } + { output.state before.all = + 'write$ + { add.period$ " " * write$ } + if$ + } + if$ + mid.sentence 'output.state := + } + if$ + s +} + +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} + +FUNCTION {output.check} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} + +FUNCTION {output.bibitem} +{ newline$ + "\bibitem{" write$ + cite$ write$ + "}" write$ + newline$ + "" + before.all 'output.state := +} + +FUNCTION {fin.entry} +{ add.period$ + write$ + newline$ +} + +FUNCTION {new.block} +{ output.state before.all = + 'skip$ + { after.block 'output.state := } + if$ +} + +FUNCTION {new.sentence} +{ output.state after.block = + 'skip$ + { output.state before.all = + 'skip$ + { after.sentence 'output.state := } + if$ + } + if$ +} + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} + +FUNCTION {and} +{ 'skip$ + { pop$ #0 } + if$ +} + +FUNCTION {or} +{ { pop$ #1 } + 'skip$ + if$ +} + +FUNCTION {new.block.checka} +{ empty$ + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.block.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.sentence.checka} +{ empty$ + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {new.sentence.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {field.or.null} +{ duplicate$ empty$ + { pop$ "" } + 'skip$ + if$ +} + +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "{\em " swap$ * "}" * } + if$ +} + +INTEGERS { nameptr namesleft numnames } + +FUNCTION {format.names} +{ 's := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := + nameptr #1 > + { namesleft #1 > + { ", " * t * } + { numnames #2 > + { "," * } + 'skip$ + if$ + t "others" = + { " et~al." * } + { " and " * t * } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {format.authors} +{ author empty$ + { "" } + { author format.names } + if$ +} + +FUNCTION {format.editors} +{ editor empty$ + { "" } + { editor format.names + editor num.names$ #1 > + { ", editors" * } + { ", editor" * } + if$ + } + if$ +} + +FUNCTION {format.title} +{ title empty$ + { "" } + { title "t" change.case$ } + if$ +} + +FUNCTION {n.dashify} +{ 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + +FUNCTION {format.date} +{ year empty$ + { month empty$ + { "" } + { "there's a month but no year in " cite$ * warning$ + month + } + if$ + } + { month empty$ + 'year + { month " " * year * } + if$ + } + if$ +} + +FUNCTION {format.btitle} +{ title emphasize +} + +FUNCTION {tie.or.space.connect} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ * * +} + +FUNCTION {either.or.check} +{ empty$ + 'pop$ + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} + +FUNCTION {format.bvolume} +{ volume empty$ + { "" } + { "volume" volume tie.or.space.connect + series empty$ + 'skip$ + { " of " * series emphasize * } + if$ + "volume and number" number either.or.check + } + if$ +} + +FUNCTION {format.number.series} +{ volume empty$ + { number empty$ + { series field.or.null } + { output.state mid.sentence = + { "number" } + { "Number" } + if$ + number tie.or.space.connect + series empty$ + { "there's a number but no series in " cite$ * warning$ } + { " in " * series * } + if$ + } + if$ + } + { "" } + if$ +} + +FUNCTION {format.edition} +{ edition empty$ + { "" } + { output.state mid.sentence = + { edition "l" change.case$ " edition" * } + { edition "t" change.case$ " edition" * } + if$ + } + if$ +} + +INTEGERS { multiresult } + +FUNCTION {multi.page.check} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} + +FUNCTION {format.pages} +{ pages empty$ + { "" } + { pages multi.page.check + { "pages" pages n.dashify tie.or.space.connect } + { "page" pages tie.or.space.connect } + if$ + } + if$ +} + +FUNCTION {format.vol.num.pages} +{ volume field.or.null + number empty$ + 'skip$ + { "(" number * ")" * * + volume empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + } + if$ + pages empty$ + 'skip$ + { duplicate$ empty$ + { pop$ format.pages } + { ":" * pages n.dashify * } + if$ + } + if$ +} + +FUNCTION {format.chapter.pages} +{ chapter empty$ + 'format.pages + { type empty$ + { "chapter" } + { type "l" change.case$ } + if$ + chapter tie.or.space.connect + pages empty$ + 'skip$ + { ", " * format.pages * } + if$ + } + if$ +} + +FUNCTION {format.in.ed.booktitle} +{ booktitle empty$ + { "" } + { editor empty$ + { "In " booktitle emphasize * } + { "In " format.editors * ", " * booktitle emphasize * } + if$ + } + if$ +} + +FUNCTION {empty.misc.check} + +{ author empty$ title empty$ howpublished empty$ + month empty$ year empty$ note empty$ + and and and and and + key empty$ not and + { "all relevant fields are empty in " cite$ * warning$ } + 'skip$ + if$ +} + +FUNCTION {format.thesis.type} +{ type empty$ + 'skip$ + { pop$ + type "t" change.case$ + } + if$ +} + +FUNCTION {format.tr.number} +{ type empty$ + { "Technical Report" } + 'type + if$ + number empty$ + { "t" change.case$ } + { number tie.or.space.connect } + if$ +} + +FUNCTION {format.article.crossref} +{ key empty$ + { journal empty$ + { "need key or journal for " cite$ * " to crossref " * crossref * + warning$ + "" + } + { "In {\em " journal * "\/}" * } + if$ + } + { "In " key * } + if$ + " \cite{" * crossref * "}" * +} + +FUNCTION {format.crossref.editor} +{ editor #1 "{vv~}{ll}" format.name$ + editor num.names$ duplicate$ + #2 > + { pop$ " et~al." * } + { #2 < + 'skip$ + { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = + { " et~al." * } + { " and " * editor #2 "{vv~}{ll}" format.name$ * } + if$ + } + if$ + } + if$ +} + +FUNCTION {format.book.crossref} +{ volume empty$ + { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ + "In " + } + { "Volume" volume tie.or.space.connect + " of " * + } + if$ + editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { series empty$ + { "need editor, key, or series for " cite$ * " to crossref " * + crossref * warning$ + "" * + } + { "{\em " * series * "\/}" * } + if$ + } + { key * } + if$ + } + { format.crossref.editor * } + if$ + " \cite{" * crossref * "}" * +} + +FUNCTION {format.incoll.inproc.crossref} +{ editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { booktitle empty$ + { "need editor, key, or booktitle for " cite$ * " to crossref " * + crossref * warning$ + "" + } + { "In {\em " booktitle * "\/}" * } + if$ + } + { "In " key * } + if$ + } + { "In " format.crossref.editor * } + if$ + " \cite{" * crossref * "}" * +} + +FUNCTION {article} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + crossref missing$ + { journal emphasize "journal" output.check + format.vol.num.pages output + format.date "year" output.check + } + { format.article.crossref output.nonnull + format.pages output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {book} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {booklet} +{ output.bibitem + format.authors output + new.block + format.title "title" output.check + howpublished address new.block.checkb + howpublished output + address output + format.date output + new.block + note output + fin.entry +} + +FUNCTION {inbook} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check } + { format.authors output.nonnull + + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + format.chapter.pages "chapter and pages" output.check + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { format.chapter.pages "chapter and pages" output.check + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {incollection} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.chapter.pages output + new.sentence + publisher "publisher" output.check + address output + format.edition output + format.date "year" output.check + } + { format.incoll.inproc.crossref output.nonnull + format.chapter.pages output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {inproceedings} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.pages output + address empty$ + { organization publisher new.sentence.checkb + organization output + publisher output + format.date "year" output.check + } + { address output.nonnull + format.date "year" output.check + new.sentence + organization output + publisher output + } + if$ + } + { format.incoll.inproc.crossref output.nonnull + format.pages output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {conference} { inproceedings } + +FUNCTION {manual} +{ output.bibitem + author empty$ + { organization empty$ + 'skip$ + { organization output.nonnull + address output + } + if$ + } + { format.authors output.nonnull } + if$ + new.block + format.btitle "title" output.check + author empty$ + { organization empty$ + { address new.block.checka + address output + } + 'skip$ + if$ + } + { organization address new.block.checkb + organization output + address output + } + if$ + format.edition output + format.date output + new.block + note output + fin.entry +} + +FUNCTION {mastersthesis} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + "Master's thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {misc} +{ output.bibitem + format.authors output + title howpublished new.block.checkb + format.title output + howpublished new.block.checka + howpublished output + format.date output + new.block + note output + fin.entry + empty.misc.check +} + +FUNCTION {phdthesis} +{ output.bibitem + format.authors "author" output.check + new.block + format.btitle "title" output.check + new.block + "PhD thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {proceedings} +{ output.bibitem + editor empty$ + { organization output } + { format.editors output.nonnull } + + if$ + new.block + format.btitle "title" output.check + format.bvolume output + format.number.series output + address empty$ + { editor empty$ + { publisher new.sentence.checka } + { organization publisher new.sentence.checkb + organization output + } + if$ + publisher output + format.date "year" output.check + } + { address output.nonnull + format.date "year" output.check + new.sentence + editor empty$ + 'skip$ + { organization output } + if$ + publisher output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {techreport} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + format.tr.number output.nonnull + institution "institution" output.check + address output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {unpublished} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + note "note" output.check + format.date output + fin.entry +} + +FUNCTION {default.type} { misc } + +MACRO {jan} {"Jan."} + +MACRO {feb} {"Feb."} + +MACRO {mar} {"Mar."} + +MACRO {apr} {"Apr."} + +MACRO {may} {"May"} + +MACRO {jun} {"June"} + +MACRO {jul} {"July"} + +MACRO {aug} {"Aug."} + +MACRO {sep} {"Sept."} + +MACRO {oct} {"Oct."} + +MACRO {nov} {"Nov."} + +MACRO {dec} {"Dec."} + +MACRO {acmcs} {"ACM Comput. Surv."} + +MACRO {acta} {"Acta Inf."} + +MACRO {cacm} {"Commun. ACM"} + +MACRO {ibmjrd} {"IBM J. Res. Dev."} + +MACRO {ibmsj} {"IBM Syst.~J."} + +MACRO {ieeese} {"IEEE Trans. Softw. Eng."} + +MACRO {ieeetc} {"IEEE Trans. Comput."} + +MACRO {ieeetcad} + {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} + +MACRO {ipl} {"Inf. Process. Lett."} + +MACRO {jacm} {"J.~ACM"} + +MACRO {jcss} {"J.~Comput. Syst. Sci."} + +MACRO {scp} {"Sci. Comput. Programming"} + +MACRO {sicomp} {"SIAM J. Comput."} + +MACRO {tocs} {"ACM Trans. Comput. Syst."} + +MACRO {tods} {"ACM Trans. Database Syst."} + +MACRO {tog} {"ACM Trans. Gr."} + +MACRO {toms} {"ACM Trans. Math. Softw."} + +MACRO {toois} {"ACM Trans. Office Inf. Syst."} + +MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} + +MACRO {tcs} {"Theoretical Comput. Sci."} + +READ + +FUNCTION {sortify} +{ purify$ + "l" change.case$ +} + +INTEGERS { len } + +FUNCTION {chop.word} +{ 's := + 'len := + s #1 len substring$ = + { s len #1 + global.max$ substring$ } + 's + if$ +} + +FUNCTION {sort.format.names} +{ 's := + #1 'nameptr := + "" + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { nameptr #1 > + { " " * } + 'skip$ + if$ + s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := + nameptr numnames = t "others" = and + { "et al" * } + { t sortify * } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {sort.format.title} +{ 't := + "A " #2 + "An " #3 + "The " #4 t chop.word + chop.word + chop.word + sortify + #1 global.max$ substring$ +} + +FUNCTION {author.sort} +{ author empty$ + { key empty$ + { "to sort, need author or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.editor.sort} +{ author empty$ + { editor empty$ + { key empty$ + { "to sort, need author, editor, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.organization.sort} +{ author empty$ + + { organization empty$ + { key empty$ + { "to sort, need author, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {editor.organization.sort} +{ editor empty$ + { organization empty$ + { key empty$ + { "to sort, need editor, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { editor sort.format.names } + if$ +} + +FUNCTION {presort} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.sort + { type$ "proceedings" = + 'editor.organization.sort + { type$ "manual" = + 'author.organization.sort + 'author.sort + if$ + } + if$ + } + if$ + " " + * + year field.or.null sortify + * + " " + * + title field.or.null + sort.format.title + * + #1 entry.max$ substring$ + 'sort.key$ := +} + +ITERATE {presort} + +SORT + +STRINGS { longest.label } + +INTEGERS { number.label longest.label.width } + +FUNCTION {initialize.longest.label} +{ "" 'longest.label := + #1 'number.label := + #0 'longest.label.width := +} + +FUNCTION {longest.label.pass} +{ number.label int.to.str$ 'label := + number.label #1 + 'number.label := + label width$ longest.label.width > + { label 'longest.label := + label width$ 'longest.label.width := + } + 'skip$ + if$ +} + +EXECUTE {initialize.longest.label} + +ITERATE {longest.label.pass} + +FUNCTION {begin.bib} +{ preamble$ empty$ + 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" longest.label * + "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ +} + +EXECUTE {begin.bib} + +EXECUTE {init.state.consts} + +ITERATE {call.type$} + +FUNCTION {end.bib} +{ newline$ + "\end{thebibliography}" write$ newline$ +} + +EXECUTE {end.bib} + +% end of file latex8.bst +% --------------------------------------------------------------- + + + diff --git a/spec/consensus/consensus-paper/latex8.sty b/spec/consensus/consensus-paper/latex8.sty new file mode 100644 index 000000000..1e6b0dc7e --- /dev/null +++ b/spec/consensus/consensus-paper/latex8.sty @@ -0,0 +1,168 @@ +% --------------------------------------------------------------- +% +% $Id: latex8.sty,v 1.2 1995/09/15 15:31:13 ienne Exp $ +% +% by Paolo.Ienne@di.epfl.ch +% +% --------------------------------------------------------------- +% +% no guarantee is given that the format corresponds perfectly to +% IEEE 8.5" x 11" Proceedings, but most features should be ok. +% +% --------------------------------------------------------------- +% with LaTeX2e: +% ============= +% +% use as +% \documentclass[times,10pt,twocolumn]{article} +% \usepackage{latex8} +% \usepackage{times} +% +% --------------------------------------------------------------- + +% with LaTeX 2.09: +% ================ +% +% use as +% \documentstyle[times,art10,twocolumn,latex8]{article} +% +% --------------------------------------------------------------- +% with both versions: +% =================== +% +% specify \pagestyle{empty} to omit page numbers in the final +% version +% +% specify references as +% \bibliographystyle{latex8} +% \bibliography{...your files...} +% +% use Section{} and SubSection{} instead of standard section{} +% and subsection{} to obtain headings in the form +% "1.3. My heading" +% +% --------------------------------------------------------------- + +\typeout{IEEE 8.5 x 11-Inch Proceedings Style `latex8.sty'.} + +% ten point helvetica bold required for captions +% in some sites the name of the helvetica bold font may differ, +% change the name here: +\font\tenhv = phvb at 10pt +%\font\tenhv = phvb7t at 10pt + +% eleven point times bold required for second-order headings +% \font\elvbf = cmbx10 scaled 1100 +\font\elvbf = ptmb scaled 1100 + +% set dimensions of columns, gap between columns, and paragraph indent +\setlength{\textheight}{8.875in} +\setlength{\textwidth}{6.875in} +\setlength{\columnsep}{0.3125in} +\setlength{\topmargin}{0in} +\setlength{\headheight}{0in} +\setlength{\headsep}{0in} +\setlength{\parindent}{1pc} +\setlength{\oddsidemargin}{-.304in} +\setlength{\evensidemargin}{-.304in} + +% memento from size10.clo +% \normalsize{\@setfontsize\normalsize\@xpt\@xiipt} +% \small{\@setfontsize\small\@ixpt{11}} +% \footnotesize{\@setfontsize\footnotesize\@viiipt{9.5}} +% \scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt} +% \tiny{\@setfontsize\tiny\@vpt\@vipt} +% \large{\@setfontsize\large\@xiipt{14}} +% \Large{\@setfontsize\Large\@xivpt{18}} +% \LARGE{\@setfontsize\LARGE\@xviipt{22}} +% \huge{\@setfontsize\huge\@xxpt{25}} +% \Huge{\@setfontsize\Huge\@xxvpt{30}} + +\def\@maketitle + { + \newpage + \null + \vskip .375in + \begin{center} + {\Large \bf \@title \par} + % additional two empty lines at the end of the title + \vspace*{24pt} + { + \large + \lineskip .5em + \begin{tabular}[t]{c} + \@author + \end{tabular} + \par + } + % additional small space at the end of the author name + \vskip .5em + { + \large + \begin{tabular}[t]{c} + \@affiliation + \end{tabular} + \par + \ifx \@empty \@email + \else + \begin{tabular}{r@{~}l} + E-mail: & {\tt \@email} + \end{tabular} + \par + \fi + } + % additional empty line at the end of the title block + \vspace*{12pt} + \end{center} + } + +\def\abstract + {% + \centerline{\large\bf Abstract}% + \vspace*{12pt}% + \it% + } + +\def\endabstract + { + % additional empty line at the end of the abstract + \vspace*{12pt} + } + +\def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{} + +\def\email#1{\gdef\@email{#1}} +\gdef\@email{} + +\newlength{\@ctmp} +\newlength{\@figindent} +\setlength{\@figindent}{1pc} + +\long\def\@makecaption#1#2{ + \vskip 10pt + \setbox\@tempboxa\hbox{\tenhv\noindent #1.~#2} + \setlength{\@ctmp}{\hsize} + \addtolength{\@ctmp}{-\@figindent}\addtolength{\@ctmp}{-\@figindent} + % IF longer than one indented paragraph line + \ifdim \wd\@tempboxa >\@ctmp + % THEN set as an indented paragraph + \begin{list}{}{\leftmargin\@figindent \rightmargin\leftmargin} + \item[]\tenhv #1.~#2\par + \end{list} + \else + % ELSE center + \hbox to\hsize{\hfil\box\@tempboxa\hfil} + \fi} + +% correct heading spacing and type +\def\section{\@startsection {section}{1}{\z@} + {14pt plus 2pt minus 2pt}{14pt plus 2pt minus 2pt} {\large\bf}} +\def\subsection{\@startsection {subsection}{2}{\z@} + {13pt plus 2pt minus 2pt}{13pt plus 2pt minus 2pt} {\elvbf}} + +% add the period after section numbers +\newcommand{\Section}[1]{\section{\hskip -1em.~#1}} +\newcommand{\SubSection}[1]{\subsection{\hskip -1em.~#1}} + +% end of file latex8.sty +% --------------------------------------------------------------- diff --git a/spec/consensus/consensus-paper/lit.bib b/spec/consensus/consensus-paper/lit.bib new file mode 100644 index 000000000..4abc83e70 --- /dev/null +++ b/spec/consensus/consensus-paper/lit.bib @@ -0,0 +1,1659 @@ +%--- conferences -------------------------------------------------- +@STRING{WDAG96 = "Proceedings of the 10th International Workshop + on Distributed Algorithms (WDAG'96)"} +@STRING{WDAG97 = "Proceedings of the 11th International Workshop + on Distributed Algorithms (WDAG'97)"} +@STRING{DISC98 = "Proceedings of the 12th International Conference + on Distributed Computing ({DISC}'98)"} +@STRING{DISC99 = "Proceedings of the 13th International Conference + on Distributed Computing ({DISC}'99)"} +@STRING{DISC98 = "Proceedings of the 13th International Conference + on Distributed Computing ({DISC}'98)"} +@STRING{DISC99 = "Proceedings of the 13th International Conference + on Distributed Computing ({DISC}'99)"} +@STRING{DISC00 = "Proceedings of the 14th International Conference + on Distributed Computing ({DISC}'00)"} +@STRING{DISC01 = "Proceedings of the 15th International Conference + on Distributed Computing ({DISC}'01)"} +@STRING{DISC02 = "Proceedings of the 16th International Conference + on Distributed Computing ({DISC}'02)"} +@STRING{DISC03 = "Proceedings of the 17th International Conference + on Distributed Computing ({DISC}'03)"} +@STRING{DISC04 = "Proceedings of the 18th International Conference + on Distributed Computing ({DISC}'04)"} +@STRING{DISC05 = "Proceedings of the 19th International Conference + on Distributed Computing ({DISC}'05)"} +@STRING{PODC83 = "Proceeding of the 1st Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'83)"} +@STRING{PODC91 = "Proceeding of the 9th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'91)"} +@STRING{PODC94 = "Proceeding of the 12th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'94)"} +@STRING{PODC95 = "Proceeding of the 13th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'95)"} +@STRING{PODC96 = "Proceeding of the 14th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'96)"} +@STRING{PODC97 = "Proceeding of the 15th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'97)"} +@STRING{PODC98 = "Proceeding of the 16th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'98)"} +@STRING{PODC99 = "Proceeding of the 17th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'99)"} +@STRING{PODC00 = "Proceeding of the 18th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'00)"} +@STRING{PODC01 = "Proceeding of the 19th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'01)"} +@STRING{PODC02 = "Proceeding of the 20th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'02)"} +@STRING{PODC03 = "Proceeding of the 21st Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'03)"} +@STRING{PODC03 = "Proceeding of the 22nd Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'03)"} +@STRING{PODC04 = "Proceeding of the 23rd Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'04)"} +@STRING{PODC05 = "Proceeding of the 24th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'05)"} +@STRING{PODC06 = "Proceedings of the 25th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'06)"} +@STRING{PODC07 = "Proceedings of the 26th Annual {ACM} Symposium on + Principles of Distributed Computing ({PODC}'07)"} +@STRING{STOC91 = "Proceedings of the 23rd Annual {ACM} Symposium on + Theory of Computing ({STOC}'91)"} +@STRING{WSS01 = "Proceedings of the 5th International Workshop on + Self-Stabilizing Systems ({WSS} '01)"} +@STRING{SSS06 = "Proceedings of the 8th International Symposium on + Stabilization, Safety, and Security of Distributed + Systems ({SSS} '06)"} +@STRING{DSN00 = "Dependable Systems and Networks ({DSN} 2000)"} +@STRING{DSN05 = "Dependable Systems and Networks ({DSN} 2005)"} +@STRING{DSN06 = "Dependable Systems and Networks ({DSN} 2006)"} +@STRING{DSN07 = "Dependable Systems and Networks ({DSN} 2007)"} + +%--- journals ----------------------------------------------------- +@STRING{PPL = "Parallel Processing Letters"} +@STRING{IPL = "Information Processing Letters"} +@STRING{DC = "Distributed Computing"} +@STRING{JACM = "Journal of the ACM"} +@STRING{IC = "Information and Control"} +@STRING{TCS = "Theoretical Computer Science"} +@STRING{ACMTCS = "ACM Transactions on Computer Systems"} +@STRING{TDSC = "Transactions on Dependable and Secure Computing"} +@STRING{TPLS = "ACM Trans. Program. Lang. Syst."} + +%--- publisher ---------------------------------------------------- +@STRING{ACM = "ACM Press"} +@STRING{IEEE = "IEEE"} +@STRING{SPR = "Springer-Verlag"} + +%--- institution -------------------------------------------------- +@STRING{TUAuto = {Technische Universit\"at Wien, Department of + Automation}} +@STRING{TUECS = {Technische Universit\"at Wien, Embedded Computing + Systems Group}} + + +%------------------------------------------------------------------ +@article{ABND+90:jacm, + author = {Hagit Attiya and Amotz Bar-Noy and Danny Dolev and + David Peleg and R{\"u}diger Reischuk}, + title = {Renaming in an asynchronous environment}, + journal = JACM, + volume = {37}, + number = {3}, + year = {1990}, + pages = {524--548}, + publisher = ACM, + address = {New York, NY, USA}, +} + +@article{ABND95:jacm, + author = {Hagit Attiya and Amotz Bar-Noy and Danny Dolev}, + title = {Sharing memory robustly in message-passing systems}, + journal = JACM, + volume = {42}, + number = {1}, + year = {1995}, + pages = {124--142}, + publisher = ACM, + address = {New York, NY, USA}, +} + +@inproceedings{ACKM04:podc, + author = {Ittai Abraham and Gregory Chockler and Idit Keidar + and Dahlia Malkhi}, + title = {Byzantine disk paxos: optimal resilience with + byzantine shared memory.}, + booktitle = PODC04, + year = {2004}, + pages = {226-235} +} + +@article{ACKM05:dc, + author = {Ittai Abraham and Gregory Chockler and Idit Keidar + and Dahlia Malkhi}, + title = {Byzantine disk paxos: optimal resilience with + byzantine shared memory.}, + journal = DC, + volume = {18}, + number = {5}, + year = {2006}, + pages = {387-408} +} + +@article{ACT00:dc, + author = "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg", + title = "Failure Detection and Consensus in the + Crash-Recovery Model", + journal = DC, + year = 2000, + month = apr, + volume = 13, + number = 2, + pages = "99--125", + url = + "http://www.cs.cornell.edu/home/sam/FDpapers/crash-recovery-finaldcversion.ps" +} + +@article{ACT00:siam, + author = "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg", + title = "On quiescent reliable communication", + journal = "SIAM Journal of Computing", + year = 2000, + volume = 29, + number = 6, + pages = "2040--2073", + month = apr +} + +@inproceedings{ACT97:wdag, + author = "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg", + title = "Heartbeat: A Timeout-Free Failure Detector for + Quiescent Reliable Communication", + booktitle = WDAG97, + year = 1997, + pages = "126--140", + url = + "http://simon.cs.cornell.edu/Info/People/weichen/research/mypapers/wdag97final.ps" +} + +@article{ACT98:disc, + author = "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg", + title = "Failure Detection and Consensus in the + Crash-Recovery Model", + journal = DISC98, + year = 1998, + pages = "231--245", + publisher = SPR +} + +@article{ACT99:tcs, + author = "Marcos Kawazoe Aguilera and Wei Chen and Sam Toueg", + title = "Using the Heartbeat Failure Detector for Quiescent + Reliable Communication and Consensus in + Partitionable Networks", + journal = "Theoretical Computer Science", + year = 1999, + month = jun, + volume = 220, + number = 1, + pages = "3--30", + url = + "http://www.cs.cornell.edu/home/sam/FDpapers/TCS98final.ps" +} + +@inproceedings{ADGF+04:ispdc, + author = {Anceaume, Emmanuelle and Delporte-Gallet, Carole and + Fauconnier, Hugues and Hurfin, Michel and Le Lann, + G{\'e}rard }, + title = {Designing Modular Services in the Scattered + Byzantine Failure Model.}, + booktitle = {ISPDC/HeteroPar}, + year = {2004}, + pages = {262-269} +} + +@inproceedings{ADGF+06:dsn, + author = {Marcos Kawazoe Aguilera and Carole Delporte-Gallet + and Hugues Fauconnier and Sam Toueg}, + title = {Consensus with Byzantine Failures and Little System + Synchrony.}, + booktitle = DSN06, + year = {2006}, + pages = {147-155} +} + +@inproceedings{ADGFT01:disc, + author = "Marcos Kawazoe Aguilera and Carole Delporte-Gallet + and Hugues Fauconnier and Sam Toueg", + title = "Stable Leader Election", + booktitle = DISC01, + year = 2001, + pages = "108--122", + publisher = SPR +} + +@inproceedings{ADGFT03:podc, + author = "Marcos K. Aguilera and Carole Delporte-Gallet and + Hugues Fauconnier and Sam Toueg", + title = "On implementing {O}mega with weak reliability and + synchrony assumptions", + booktitle = PODC03, + year = 2003, + publisher = ACM +} + +@inproceedings{ADGFT04:podc, + author = {Marcos K. Aguilera and Carole Delporte-Gallet and + Hugues Fauconnier and Sam Toueg}, + title = {Communication-efficient leader election and + consensus with limited link synchrony}, + booktitle = PODC04, + year = 2004, + pages = {328--337}, + address = {St. John's, Newfoundland, Canada}, + publisher = ACM +} + +@inproceedings{ADGFT06:dsn, + author = {Marcos Kawazoe Aguilera and Carole Delporte-Gallet + and Hugues Fauconnier and Sam Toueg}, + title = {Consensus with Byzantine Failures and Little System + Synchrony.}, + booktitle = DSN06, + year = 2006, + pages = {147-155}, + ee = + {http://doi.ieeecomputersociety.org/10.1109/DSN.2006.22}, + bibsource = {DBLP, http://dblp.uni-trier.de} +} + +@inproceedings{ADLS91:stoc, + author = "Hagit Attiya and Cynthia Dwork and Nancy A. Lynch + and Larry J. Stockmeyer", + title = "Bounds on the Time to Reach Agreement in the + Presence of Timing Uncertainty", + booktitle = STOC91, + year = 1991, + pages = "359--369", +} + +@article{AT99:ipl, + author = "Marcos Kawazoe Aguilera and Sam Toueg", + title = "A Simple Bivalency Proof that t -Resilient Consensus + Requires t + 1 Rounds", + journal = IPL, + volume = "71", + number = "3-4", + pages = "155--158", + year = "1999" +} + +@Book{AW04:book, + author = {Attiya, Hagit and Welch, Jennifer}, + title = {Distributed Computing}, + publisher = {John Wiley {\&} Sons}, + edition = {2nd}, + year = {2004} +} + +@Book{AW98:book, + author = {Hagit Attiya and Jennifer Welch}, + title = {Distributed Computing}, + publisher = {McGraw-Hill Publishing Company}, + year = {1998} +} + +@InBook{AW98:book:chap12, + author = {Hagit Attiya and Jennifer Welch}, + title = {Distributed Computing}, + publisher = {McGraw-Hill Publishing Company}, + year = {1998}, + chapter = {12, "Improving the fault-tolerance of algorithms"} +} + +@inproceedings{ABHMS11:disc, + author = {Hagit Attiya and + Fatemeh Borran and + Martin Hutle and + Zarko Milosevic and + Andr{\'e} Schiper}, + title = {Structured Derivation of Semi-Synchronous Algorithms}, + booktitle = {DISC}, + year = {2011}, + pages = {374-388} +} + +@inproceedings{BCBG+07:podc, + author = {Martin Biely and Bernadette Charron-Bost and Antoine + Gaillard and Martin Hutle and Andr{\'e} Schiper and + Josef Widder}, + title = {Tolerating Corrupted Communication}, + publisher = ACM, + booktitle = PODC07, + year = {2007} +} + +@InProceedings{BCBT96:wdag, + author = {Anindya Basu and Bernadette Charron-Bost and Sam + Toueg}, + title = {Simulating Reliable Links with Unreliable Links in + the Presence of Process Crashes}, + pages = {105--122}, + booktitle = {WDAG 1996}, + editor = {Babao{\u g}lu, {\"O}zalp}, + year = {1996}, + month = {Oct}, + volume = {1151}, + ISBN = {3-540-61769-8}, + pubisher = {Springer}, + series = {Lecture Notes in Computer Science}, +} + +@article{BDFG03:sigact, + author = "R. Boichat and P. Dutta and S. Frolund and + R. Guerraoui", + title = "Reconstructing {P}axos", + journal = "ACM SIGACT News", + year = "2003", + volume = "34", + number = "1", + pages = "47-67" +} + +@unpublished{BHR+06:note, + author = "Martin Biely and Martin Hutle and Sergio Rajsbaum + and Ulrich Schmid and Corentin Travers and Josef + Widder", + title = "Discussion note on moving timely links", + note = "Unpublished", + month = apr, + year = 2006 +} + +@article{BHRT03:jda, + author = {Roberto Baldoni and Jean-Michel H{\'e}lary and + Michel Raynal and L{\'e}naick Tanguy}, + title = {Consensus in Byzantine asynchronous systems.}, + journal = {J. Discrete Algorithms}, + volume = {1}, + number = {2}, + year = {2003}, + pages = {185-210}, + ee = {http://dx.doi.org/10.1016/S1570-8667(03)00025-X}, + bibsource = {DBLP, http://dblp.uni-trier.de} +} + +@unpublished{BHSS08:tdsc, + author = {Fatemeh Borran and Martin Hutle and Nuno Santos and + Andr{\'e} Schiper}, + title = {Solving Consensus with Communication Predicates: + A~Quantitative Approach}, + note = {Under submission}, + year = {2008} +} + +@inproceedings{Ben83:podc, + author = {Michael Ben-Or}, + title = {Another Advantage of Free Choice: Completely + Asynchronous Agreement Protocols}, + booktitle = {PODC}, + year = {1983}, +} + +@inproceedings{Bra04:podc, + author = {Bracha, Gabriel}, + title = {An asynchronous [(n - 1)/3]-resilient consensus protocol}, + booktitle = {PODC '84: Proceedings of the third annual ACM symposium on Principles of distributed computing}, + year = {1984}, + isbn = {0-89791-143-1}, + pages = {154--162}, + location = {Vancouver, British Columbia, Canada}, + doi = {http://doi.acm.org/10.1145/800222.806743}, + publisher = {ACM}, + address = {New York, NY, USA}, + } + + +@inproceedings{CBGS00:dsn, + author = "Bernadette Charron-Bost and Rachid Guerraoui and + Andr{\'{e}} Schiper", + title = "Synchronous System and Perfect Failure Detector: + {S}olvability and efficiency issues", + booktitle = DSN00, + publisher = "{IEEE} Computer Society", + address = "New York, {USA}", + pages = "523--532", + year = "2000" +} + +@inproceedings{CBS06:prdc, + author = {Bernadette Charron-Bost and Andr{\'e} Schiper}, + title = {Improving Fast Paxos: being optimistic with no + overhead}, + booktitle = {Pacific Rim Dependable Computing, Proceedings}, + year = {2006} +} + +@article{CBS09, + author = {B. Charron-Bost and A. Schiper}, + title = {The {H}eard-{O}f model: computing in distributed systems with benign failures}, + journal ={Distributed Computing}, + number = {1}, + volume = {22}, + pages = {49-71}, + year ={2009} + } + + +@article{CBS07:sigact, + author = {Bernadette Charron-Bost and Andr\'{e} Schiper}, + title = {Harmful dogmas in fault tolerant distributed + computing}, + journal = {SIGACT News}, + volume = {38}, + number = {1}, + year = {2007}, + pages = {53--61}, +} + +@techreport{CBS07:tr, + author = {Charron-Bost, Bernadette and Schiper, Andr{\'{e}}}, + title = {The Heard-Of Model: Unifying all Benign Failures}, + institution = {EPFL}, + year = 2007, + OPTnumber = {LSR-REPORT-2006-004} +} + +@article{CELT00:jacm, + author = {Soma Chaudhuri and Maurice Erlihy and Nancy A. Lynch + and Mark R. Tuttle}, + title = {Tight bounds for k-set agreement}, + journal = JACM, + volume = {47}, + number = {5}, + year = {2000}, + pages = {912--943}, + publisher = ACM, + address = {New York, NY, USA}, +} + +@article{CF99:tpds, + author = "Flaviu Cristian and Christof Fetzer", + title = "The Timed Asynchronous Distributed System Model", + journal = "IEEE Transactions on Parallel and Distributed + Systems", + volume = "10", + number = "6", + pages = "642--657", + year = "1999" +} + +@article{CHT96:jacm, + author = "Tushar Deepak Chandra and Vassos Hadzilacos and Sam + Toueg", + title = "The Weakest Failure Detector for Solving Consensus", + journal = {JACM}, + year = {1996}, +} + +@article{CL02:tcs, + author = {Miguel Castro and Barbara Liskov}, + title = {Practical byzantine fault tolerance and proactive + recovery}, + journal = {ACMTCS}, + year = {2002}, +} + +@inproceedings{CL99:osdi, + author = {Miguel Castro and Barbara Liskov}, + title = {Practical byzantine fault tolerance and proactive + recovery}, + booktitle = {Proceedings of the 3rd Symposium on Operating + Systems Design and Implementation}, + year = {1999}, + month = feb +} + +@inproceedings{CT91:podc, + author = {Tushar Deepak Chandra and Sam Toueg}, + title = {Unreliable Failure Detectors for Asynchronous + Systems (Preliminary Version)}, + booktitle = PODC91, + year = {1991}, + pages = {325-340} +} + +@article{CT96:jacm1, + author = "Tushar Deepak Chandra and Sam Toueg", + title = "Unreliable Failure Detectors for Reliable + Distributed Systems", + journal = {JACM}, + year = {1996}, +} + +@inproceedings{CTA00:dsn, + author = "Wei Chen and Sam Toueg and Marcos Kawazoe Aguilera", + title = "On the Quality of Service of Failure Detectors", + booktitle = "Proceedings IEEE International Conference on + Dependable Systems and Networks (DSN / FTCS'30)", + address = "New York City, USA", + year = 2000 +} + +@TechReport{DFKM96:tr, + author = {Danny Dolev and Roy Friedman and Idit Keidar and + Dahlia Malkhi}, + title = {Failure detectors in omission failure environments}, + institution = {Department of Computer Science, Cornell University}, + year = {1996}, + type = {Technical Report}, + number = {96-1608} +} + +@inproceedings{DG02:podc, + author = {Partha Dutta and Rachid Guerraoui}, + title = {The inherent price of indulgence}, + booktitle = PODC02, + year = 2002, + pages = {88--97}, + location = {Monterey, California}, + publisher = ACM, + address = {New York, NY, USA}, +} + +@inproceedings{DGFG+04:podc, + author = {Carole Delporte-Gallet and Hugues Fauconnier and + Rachid Guerraoui and Vassos Hadzilacos and Petr + Kouznetsov and Sam Toueg}, + title = {The weakest failure detectors to solve certain + fundamental problems in distributed computing}, + booktitle = PODC04, + year = 2004, + pages = {338--346}, + location = {St. John's, Newfoundland, Canada}, + publisher = ACM, + address = {New York, NY, USA} +} + +@inproceedings{DGL05:dsn, + author = {Partha Dutta and Rachid Guerraoui and Leslie + Lamport}, + title = {How Fast Can Eventual Synchrony Lead to Consensus?}, + booktitle = {Proceedings of the 2005 International Conference on + Dependable Systems and Networks (DSN'05)}, + pages = {22--27}, + year = {2005}, + address = {Los Alamitos, CA, USA} +} + +@article{DLS88:jacm, + author = "Cynthia Dwork and Nancy Lynch and Larry Stockmeyer", + title = "Consensus in the Presence of Partial Synchrony", + journal = {JACM}, + year = {1988}, +} + +@article{DPLL00:tcs, + author = "De Prisco, Roberto and Butler Lampson and Nancy + Lynch", + title = "Revisiting the {PAXOS} algorithm", + journal = TCS, + volume = "243", + number = "1--2", + pages = "35--91", + year = "2000" +} + +@techreport{DS97:tr, + author = {A. Doudou and A. Schiper}, + title = {Muteness Failure Detectors for Consensus with + {B}yzantine Processes}, + institution = {EPFL, Dept d'Informatique}, + year = {1997}, + type = {TR}, + month = {October}, + number = {97/230}, +} + +@inproceedings{DS98:podc, + author = {A. Doudou and A. Schiper}, + title = {Muteness Detectors for Consensus with {B}yzantine + Processes ({B}rief {A}nnouncement)}, + booktitle = {PODC}, + month = jul, + year = {1998} +} + +@article{DSU04:survey, + author = {D{\'e}fago, Xavier and Schiper, Andr{\'e} and Urb\'{a}n, P{\'e}ter}, + title = {Total order broadcast and multicast algorithms: Taxonomy and survey}, + journal = {ACM Comput. Surv.}, + issue_date = {December 2004}, + volume = {36}, + number = {4}, + month = dec, + year = {2004}, + issn = {0360-0300}, + pages = {372--421}, + numpages = {50}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {Distributed systems, agreement problems, atomic broadcast, atomic multicast, classification, distributed algorithms, fault-tolerance, global ordering, group communication, message passing, survey, taxonomy, total ordering}, +} + +@article{DeCandia07:dynamo, + author = {DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner}, + title = {Dynamo: amazon's highly available key-value store}, + journal = {SIGOPS Oper. Syst. Rev.}, + issue_date = {December 2007}, + volume = {41}, + number = {6}, + month = oct, + year = {2007}, + issn = {0163-5980}, + pages = {205--220}, + numpages = {16}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {performance, reliability, scalability}, +} + + +@book{Dol00:book, + author = {Shlomi Dolev}, + title = {Self-Stabilization}, + publisher = {The MIT Press}, + year = {2000} +} + +@inproceedings{FC95:podc, + author = "Christof Fetzer and Flaviu Cristian", + title = "Lower Bounds for Convergence Function Based Clock + Synchronization", + booktitle = PODC95, + year = 1995, + pages = "137--143" +} + +@article{FLP85:jacm, + author = "Michael J. Fischer and Nancy A. Lynch and + M. S. Paterson", + title = "Impossibility of Distributed Consensus with one + Faulty Process", + journal = {JACM}, + year = {1985}, +} + +@article{FMR05:tdsc, + author = {Roy Friedman and Achour Most{\'e}faoui and Michel + Raynal}, + title = {Simple and Efficient Oracle-Based Consensus + Protocols for Asynchronous Byzantine Systems.}, + journal = TDSC, + volume = {2}, + number = {1}, + year = {2005}, + pages = {46-56}, + ee = {http://dx.doi.org/10.1109/TDSC.2005.13}, + bibsource = {DBLP, http://dblp.uni-trier.de} +} + +@inproceedings{FS04:podc, + author = "Christof Fetzer and Ulrich Schmid", + title = "Brief announcement: on the possibility of consensus + in asynchronous systems with finite average response + times.", + booktitle = PODC04, + year = 2004, + pages = 402 +} + +@InProceedings{GL00:disc, + author = {Eli Gafni and Lesli Lamport}, + title = {Disk Paxos}, + booktitle = DISC00, + pages = {330--344}, + year = {2000}, +} + +@Article{GL03:dc, + author = {Eli Gafni and Lesli Lamport}, + title = {Disk Paxos}, + journal = DC, + year = 2003, + volume = {16}, + number = {1}, + pages = {1--20} +} + +@inproceedings{GP01:wss, + author = "Felix C. G{\"a}rtner and Stefan Pleisch", + title = "({I}m)Possibilities of Predicate Detection in + Crash-Affected Systems", + booktitle = WSS01, + year = 2001, + pages = "98--113" +} + +@inproceedings{GP02:disc, + author = "Felix C. G{\"a}rtner and Stefan Pleisch", + title = "Failure Detection Sequencers: Necessary and + Sufficient Information about Failures to Solve + Predicate Detection", + booktitle = DISC02, + year = 2002, + pages = "280--294" +} + +@inproceedings{GS96:wdag, + author = {Rachid Guerraoui and Andr{\'e} Schiper}, + title = {{``Gamma-Accurate''} Failure Detectors}, + booktitle = WDAG96, + year = {1996}, + pages = {269--286}, + publisher = SPR, + address = {London, UK} +} + +@inproceedings{Gaf98:podc, + author = {Eli Gafni}, + title = {Round-by-round fault detectors (extended abstract): + unifying synchrony and asynchrony}, + booktitle = PODC98, + year = {1998}, + pages = {143--152}, + address = {Puerto Vallarta, Mexico}, + publisher = ACM +} + +@incollection{Gra78:book, + author = {Jim N. Gray}, + title = {Notes on data base operating systems}, + booktitle = {Operating Systems: An Advanced Course}, + chapter = {3.F}, + publisher = {Springer}, + year = {1978}, + editor = {R. Bayer, R.M. Graham, G. Seegm\"uller}, + volume = {60}, + series = {Lecture Notes in Computer Science}, + address = {New York}, + pages = {465}, +} + +@InProceedings{HMR98:srds, + author = {Hurfin, M. and Mostefaoui, A. and Raynal, M.}, + title = {Consensus in asynchronous systems where processes + can crash and recover}, + booktitle = {Seventeenth IEEE Symposium on Reliable Distributed + Systems, Proceedings. }, + pages = { 280--286}, + year = {1998}, + address = {West Lafayette, IN}, + month = oct, + organization = {IEEE} +} + +@inproceedings{HMSZ06:sss, + author = "Martin Hutle and Dahlia Malkhi and Ulrich Schmid and + Lidong Zhou", + title = "Brief Announcement: Chasing the Weakest System Model + for Implementing {$\Omega$} and Consensus", + booktitle = SSS06, + year = 2006 +} + +@incollection{HT93:ds, + author = {Hadzilacos, Vassos and Toueg, Sam}, + title = {Fault-tolerant broadcasts and related problems}, + booktitle = {Distributed systems (2nd Ed.)}, + editor = {Mullender, Sape}, + year = {1993}, + isbn = {0-201-62427-3}, + pages = {97--145}, + numpages = {49} +} + + +@inproceedings{HS06:opodis, + author = {Heinrich Moser and Ulrich Schmid}, + title = {Optimal Clock Synchronization Revisited: Upper and + Lower Bounds in Real-Time Systems}, + booktitle = { Principles of Distributed Systems}, + pages = {94--109}, + year = {2006}, + volume = {4305}, + series = {Lecture Notes in Computer Science}, + publisher = SPR +} + +@techreport{HS06:tr, + author = {Martin Hutle and Andr{\'e} Schiper}, + title = { Communication predicates: A high-level abstraction + for coping with transient and dynamic faults}, + institution = {EPFL}, + number = { LSR-REPORT-2006-006 }, + year = {2006} +} + +@inproceedings{HS07:dsn, + author = {Martin Hutle and Andr{\'e} Schiper}, + title = { Communication predicates: A high-level abstraction + for coping with transient and dynamic faults}, + year = 2007, + booktitle = DSN07, + publisher = IEEE, + location = {Edinburgh,UK}, + pages = {92--10}, + month = jun +} + +@article{Her91:tpls, + author = {Maurice Herlihy}, + title = {Wait-free synchronization}, + journal = TPLS, + volume = {13}, + number = {1}, + year = {1991}, + pages = {124--149}, + publisher = ACM, + address = {New York, NY, USA}, +} + +@article{Kot09:zyzzyva, + author = {Kotla, Ramakrishna and Alvisi, Lorenzo and Dahlin, Mike and Clement, Allen and Wong, Edmund}, + title = {Zyzzyva: Speculative Byzantine fault tolerance}, + journal = {ACM Trans. Comput. Syst.}, + issue_date = {December 2009}, + volume = {27}, + number = {4}, + month = jan, + year = {2010}, + issn = {0734-2071}, + pages = {7:1--7:39}, + articleno = {7}, + numpages = {39}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {Byzantine fault tolerance, output commit, replication, speculative execution}, +} + + +@inproceedings{KMMS97:opodis, + author = "Kim Potter Kihlstrom and Louise E. Moser and + P. M. Melliar-Smith", + title = "Solving Consensus in a Byzantine Environment Using + an Unreliable Fault Detector", + booktitle = "Proceedings of the International Conference on + Principles of Distributed Systems (OPODIS)", + year = 1997, + month = dec, + address = "Chantilly, France", + pages = "61--75" +} + +@inproceedings{KS06:podc, + author = {Idit Keidar and Alexander Shraer}, + title = {Timeliness, failure-detectors, and consensus + performance}, + booktitle = PODC06, + year = {2006}, + pages = {169--178}, + location = {Denver, Colorado, USA}, + publisher = {ACM Press}, + address = {New York, NY, USA}, +} + +@InProceedings{LFA99:disc, + author = {Mikel Larrea and Antonio Fern\'andez and Sergio + Ar\'evalo}, + title = {Efficient algorithms to implement unreliable failure + detectors in partially synchronous systems}, + year = 1999, + month = sep, + pages = {34-48}, + series = "LNCS 1693", + booktitle = DISC99, + publisher = SPR, + address = {Bratislava, Slovaquia}, +} + +@article{LL84:ic, + author = "Jennifer Lundelius and Nancy A. Lynch", + title = "An Upper and Lower Bound for Clock Synchronization", + journal = IC, + volume = 62, + number = {2/3}, + year = 1984, + pages = {190--204} +} + +@techreport{LLS03:tr, + title = {How to Implement a Timer-free Perfect Failure + Detector in Partially Synchronous Systems}, + author = {Le Lann, G\'erard and Schmid, Ulrich}, + institution = TUAuto, + number = "183/1-127", + month = jan, + year = 2003 +} + +@article{LSP82:tpls, + author = {Leslie Lamport and Robert Shostak and Marshall + Pease}, + title = {The {B}yzantine Generals Problem}, + journal = {ACM Trans. Program. Lang. Syst.}, + year = {1982}, +} + +@inproceedings{Lam01:podc, + author = {Butler Lampson}, + title = {The ABCD's of Paxos}, + booktitle = {PODC}, + year = {2001}, + +} + +@inproceedings{Lam03:fddc, + author = {Leslie Lamport}, + title = {Lower Bounds for Asynchronous Consensus}, + booktitle = {Future Directions in Distributed Computing}, + pages = {22--23}, + year = {2003}, + editor = {Andr{\'e} Schiper and Alex A. Shvartsman and Hakim + Weatherspoon and Ben Y. Zhao}, + number = {2584}, + series = {Lecture Notes in Computer Science}, + publisher = SPR +} + +@techreport{Lam04:tr, + author = {Leslie Lamport}, + title = {Lower Bounds for Asynchronous Consensus}, + institution = {Microsoft Research}, + year = {2004}, + number = {MSR-TR-2004-72} +} + +@techreport{Lam05:tr, + author = {Leslie Lamport}, + title = {Fast Paxos}, + institution = {Microsoft Research}, + year = {2005}, + number = {MSR-TR-2005-12} +} + +@techreport{Lam05:tr-33, + author = {Leslie Lamport}, + title = {Generalized Consensus and Paxos}, + institution = {Microsoft Research}, + year = {2005}, + number = {MSR-TR-2005-33} +} + +@Misc{Lam06:slides, + author = {Leslie Lamport}, + title = {Byzantine Paxos}, + howpublished = {Unpublished slides}, + year = {2006} +} + +@Article{Lam86:dc, + author = {Lesli Lamport}, + title = {On Interprocess Communication--Part I: Basic + Formalism, Part II: Algorithms}, + journal = DC, + year = 1986, + volume = 1, + number = 2, + pages = {77--101} +} + +@Article {Lam98:tcs, + author = {Leslie Lamport}, + title = {The part-time parliament}, + journal = ACMTCS, + year = 1998, + volume = 16, + number = 2, + month = may, + pages = {133-169}, +} + +@book{Lyn96:book, + author = {Nancy Lynch}, + title = {Distributed Algorithms}, + publisher = {Morgan Kaufman}, + year = {1996}, +} + +@inproceedings{MA05:dsn, + author = {Martin, J.-P. and Alvisi, L. }, + title = {Fast Byzantine consensus}, + booktitle = DSN05, + pages = {402--411}, + year = {2005}, + month = jun, + organization = {IEEE}, +} + +@article{MA06:tdsc, + author = {Martin, J.-P. and Alvisi, L. }, + title = {Fast {B}yzantine Consensus}, + journal = {TDSC}, + year = {2006}, +} + +@InProceedings{MOZ05:dsn, + author = {Dahlia Malkhi and Florin Oprea and Lidong Zhou}, + title = {{$\Omega$} Meets Paxos: Leader Election and + Stability without Eventual Timely Links}, + booktitle = DSN05, + year = {2005} +} + +@inproceedings{MR00:podc, + author = "Achour Most{\'e}faoui and Michel Raynal", + title = "k-set agreement with limited accuracy failure + detectors", + booktitle = PODC00, + year = 2000, + pages = {143--152}, + location = {Portland, Oregon, United States}, + publisher = ACM +} + +@article{MR01:ppl, + author = "Achour Most{\'e}faoui and Michel Raynal", + title = "Leader-Based Consensus", + journal = PPL, + volume = 11, + number = 1, + year = 2001, + pages = {95--107} +} + +@techreport{OGS97:tr, + author = "Rui Oliveira and Rachid Guerraoui and {Andr\'e} + Schiper", + title = "Consensus in the crash-recover model", + number = "TR-97/239", + year = "1997" +} + +@article{PSL80:jacm, + author = {M. Pease and R. Shostak and L. Lamport}, + title = {Reaching Agreement in the Presence of Faults}, + journal = JACM, + volume = {27}, + number = {2}, + year = {1980}, + pages = {228--234}, + publisher = ACM, + address = ACMADDR, +} + +@article{ST87:jacm, + author = "T. K. Srikanth and Sam Toueg", + title = "Optimal clock synchronization", + journal = JACM, + volume = 34, + number = 3, + year = 1987, + pages = "626--645" +} + +@article{ST87:dc, + author = {T. K. Srikanth and Sam Toueg,}, + title = {Simulating authenticated broadcasts to derive simple fault-tolerant algorithms}, + journal = DC, + volume = {2}, + number = {2}, + year = {1987}, + pages = {80-94} +} + + +@inproceedings{SW89:stacs, + author = {Santoro, Nicola and Widmayer, Peter}, + title = {Time is not a healer}, + booktitle = {Proc.\ 6th Annual Symposium on Theor.\ Aspects of + Computer Science (STACS'89)}, + publisher = "Springer-Verlag", + series = {LNCS}, + volume = "349", + address = "Paderborn, Germany", + pages = "304-313", + year = "1989", + month = feb, +} + +@inproceedings{SW90:sigal, + author = {Nicola Santoro and Peter Widmayer}, + title = {Distributed Function Evaluation in the Presence of + Transmission Faults.}, + booktitle = {SIGAL International Symposium on Algorithms}, + year = {1990}, + pages = {358-367} +} + +@inproceedings{SWR02:icdcs, + author = {Ulrich Schmid and Bettina Weiss and John Rushby}, + title = {Formally Verified Byzantine Agreement in Presence of + Link Faults}, + booktitle = "22nd International Conference on Distributed + Computing Systems (ICDCS'02)", + year = 2002, + month = jul # " 2-5, ", + pages = "608--616", + address = "Vienna, Austria", +} + +@incollection{Sch93a:mullender, + Author = {F. B. Schneider}, + Title = {What Good are Models and What Models are Good}, + BookTitle = {Distributed Systems}, + Year = {1993}, + Editor = {Sape Mullender}, + Publisher = {ACM Press}, + Pages = {169-197}, +} + +@article{VL96:ic, + author = {George Varghese and Nancy A. Lynch}, + title = {A Tradeoff Between Safety and Liveness for + Randomized Coordinated Attack.}, + journal = {Inf. Comput.}, + volume = {128}, + number = {1}, + year = 1996, + pages = {57--71} +} + +@inproceedings{WGWB07:dsn, + title = {Synchronous Consensus with Mortal Byzantines}, + author = {Josef Widder and Günther Gridling and Bettina Weiss + and Jean-Paul Blanquart}, + year = {2007}, + booktitle = DSN07, + publisher = IEEE +} + +@inproceedings{Wid03:disc, + author = {Josef Widder}, + title = {Booting clock Synchronization in Partially + Synchronous Systems}, + booktitle = DISC03, + year = {2003}, + pages = {121--135} +} + +@techreport{Zie04:tr, + author = {Piotr Zieli{\'n}ski}, + title = {Paxos at War}, + institution = {University of Cambridge}, + year = {2004}, + number = {UCAM-CL-TR-593}, +} + +@article{Lam78:cacm, + author = {Leslie Lamport}, + title = {Time, clocks, and the ordering of events in a + distributed system}, + journal = {Commun. ACM}, + year = {1978}, +} + +@Article{Gue06:cj, + author = {Guerraoui, R. and Raynal, M.}, + journal = {The {C}omputer {J}ournal}, + title = {The {A}lpha of {I}ndulgent {C}onsensus}, + year = {2006} +} + +@Article{Gue03:toc, + affiliation = {EPFL}, + author = {Guerraoui, Rachid and Raynal, Michel}, + journal = {{IEEE} {T}rans. on {C}omputers}, + title = {The {I}nformation {S}tructure of {I}ndulgent {C}onsensus}, + year = {2004}, +} + +@techreport{Cas00, + author = {Castro, Miguel}, + title = {Practical {B}yzantine Fault-Tolerance. {PhD} thesis}, + institution = {MIT}, + year = 2000, +} + +@inproceedings{SongRSD08:icdcn, + author = {Yee Jiun Song and + Robbert van Renesse and + Fred B. Schneider and + Danny Dolev}, + title = {The Building Blocks of Consensus}, + booktitle = {ICDCN}, + year = {2008}, +} + + +@inproceedings{BS09:icdcn, + author = {Borran, Fatemeh and Schiper, Andr{\'e}}, + + title = {A {L}eader-free {B}yzantine {C}onsensus {A}lgorithm}, + note = {To appear in ICDCN, 2010}, +} + + +@inproceedings{MHS09:opodis, + author = {Zarko Milosevic and Martin Hutle and Andr{\'e} + Schiper}, + title = {Unifying {B}yzantine Consensus Algorithms with {W}eak + {I}nteractive {C}onsistency}, + note = {To appear in OPODIS 2009}, +} + +@inproceedings{MRR:dsn02, + author = {Most\'{e}faoui, Achour and Rajsbaum, Sergio and Raynal, Michel}, + title = {A Versatile and Modular Consensus Protocol}, + booktitle = {DSN}, + year = {2002}, + } + +@article{MR98:dc, + author = {Dahlia Malkhi and + Michael K. Reiter}, + title = {Byzantine Quorum Systems}, + journal = {Distributed Computing}, + year = {1998}, +} + +@inproceedings{Rei:ccs94, + author = {Reiter, Michael K.}, + title = {Secure agreement protocols: reliable and atomic group multicast in rampart}, + booktitle = {CCS}, + year = {1994}, + pages = {68--80}, + numpages = {13} +} + + +@techreport{RMS09-tr, + author = {Olivier R\"utti and Zarko Milosevic and Andr\'e Schiper}, + title = {{G}eneric construction of consensus algorithm for benign and {B}yzantine faults}, + institution = {EPFL-IC}, + number = {LSR-REPORT-2009-005}, + year = 2009, +} + +@inproceedings{Li:srds07, + author = {Li, Harry C. and Clement, Allen and Aiyer, Amitanand S. and Alvisi, Lorenzo}, + title = {The Paxos Register}, + booktitle = {SRDS}, + year = {2007}, + } + + @article{Amir11:prime, + author = {Amir, Yair and Coan, Brian and Kirsch, Jonathan and Lane, John}, + title = {Prime: Byzantine Replication under Attack}, + journal = {IEEE Trans. Dependable Secur. Comput.}, + issue_date = {July 2011}, + volume = {8}, + number = {4}, + month = jul, + year = {2011}, + issn = {1545-5971}, + pages = {564--577}, + numpages = {14}, + publisher = {IEEE Computer Society Press}, + address = {Los Alamitos, CA, USA}, + keywords = {Performance under attack, Byzantine fault tolerance, replicated state machines, distributed systems.}, +} + +@inproceedings{Mao08:mencius, + author = {Mao, Yanhua and Junqueira, Flavio P. and Marzullo, Keith}, + title = {Mencius: building efficient replicated state machines for WANs}, + booktitle = {OSDI}, + year = {2008}, + pages = {369--384}, + numpages = {16} +} + +@article{Sch90:survey, + author = {Schneider, Fred B.}, + title = {Implementing fault-tolerant services using the state machine approach: a tutorial}, + journal = {ACM Comput. Surv.}, + volume = {22}, + number = {4}, + month = dec, + year = {1990} +} + + +@techreport{HT94:TR, + author = {Hadzilacos, Vassos and Toueg, Sam}, + title = {A Modular Approach to Fault-Tolerant Broadcasts and Related Problems}, + year = {1994}, + source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Acornellcs%3ACORNELLCS%3ATR94-1425}, + publisher = {Cornell University}, + address = {Ithaca, NY, USA}, +} + +@inproceedings{Ver09:spinning, + author = {Veronese, Giuliana Santos and Correia, Miguel and Bessani, Alysson Neves and Lung, Lau Cheuk}, + title = {Spin One's Wheels? Byzantine Fault Tolerance with a Spinning Primary}, + booktitle = {SRDS}, + year = {2009}, + numpages = {10} +} + +@inproceedings{Cle09:aardvark, + author = {Clement, Allen and Wong, Edmund and Alvisi, Lorenzo and Dahlin, Mike and Marchetti, Mirco}, + title = {Making Byzantine fault tolerant systems tolerate Byzantine faults}, + booktitle = {NSDI}, + year = {2009}, + pages = {153--168}, + numpages = {16} +} + +@inproceedings{Aiyer05:barB, + author = {Aiyer, Amitanand S. and Alvisi, Lorenzo and Clement, Allen and Dahlin, Mike and Martin, Jean-Philippe and Porth, Carl}, + title = {BAR fault tolerance for cooperative services}, + booktitle = {SOSP}, + year = {2005}, + pages = {45--58}, + numpages = {14} +} + +@inproceedings{Cach01:crypto, + author = {Cachin, Christian and Kursawe, Klaus and Petzold, Frank and Shoup, Victor}, + title = {Secure and Efficient Asynchronous Broadcast Protocols}, + booktitle = {CRYPTO}, + year = {2001}, + pages = {524--541}, + numpages = {18} +} + +@article{Moniz11:ritas, + author = {Moniz, Henrique and Neves, Nuno Ferreria and Correia, Miguel and Verissimo, Paulo}, + title = {RITAS: Services for Randomized Intrusion Tolerance}, + journal = {IEEE Trans. Dependable Secur. Comput.}, + volume = {8}, + number = {1}, + month = jan, + year = {2011}, + pages = {122--136}, + numpages = {15} +} + +@inproceedings{MHS11:jabc, + author = {Milosevic, Zarko and Hutle, Martin and Schiper, Andre}, + title = {On the Reduction of Atomic Broadcast to Consensus with Byzantine Faults}, + booktitle = {SRDS}, + year = {2011}, + pages = {235--244}, + numpages = {10} +} + +@incollection{DHSZ03, + author={Driscoll, Kevin and Hall, Brendan and Sivencrona, Håkan and Zumsteg, Phil}, + title={Byzantine Fault Tolerance, from Theory to Reality}, + year={2003}, + booktitle={Computer Safety, Reliability, and Security}, + volume={2788}, + pages={235--248} +} + +@inproceedings{RMES:dsn07, + author = {Olivier R{\"u}tti and + Sergio Mena and + Richard Ekwall and + Andr{\'e} Schiper}, + title = {On the Cost of Modularity in Atomic Broadcast}, + booktitle = {DSN}, + year = {2007}, + pages = {635-644} +} + +@article{Ben:jc92, + author = {Charles H. Bennett and + Fran\c{c}ois Bessette and + Gilles Brassard and + Louis Salvail and + John A. Smolin}, + title = {Experimental Quantum Cryptography}, + journal = {J. Cryptology}, + volume = {5}, + number = {1}, + year = {1992}, + pages = {3-28} +} + +@inproceedings{Aiyer:disc08, + author = {Aiyer, Amitanand S. and Alvisi, Lorenzo and Bazzi, Rida A. and Clement, Allen}, + title = {Matrix Signatures: From MACs to Digital Signatures in Distributed Systems}, + booktitle = {DISC}, + year = {2008}, + pages = {16--31}, + numpages = {16} +} + +@inproceedings{Biel13:dsn, + author = {Biely, Martin and Delgado, Pamela and Milosevic, Zarko and Schiper, Andr{\'e}}, + title = {Distal: A Framework for Implementing Fault-tolerant Distributed Algorithms}, + note = {To appear in DSN, 2013}, + year = 2013 +} + +@inproceedings{BS10:icdcn, + author = {Borran, Fatemeh and Schiper, Andr{\'e}}, + title = {A leader-free Byzantine consensus algorithm}, + booktitle = {ICDCN}, + year = {2010}, + pages = {67--78}, + numpages = {12} +} + +@article{Cor06:cj, + author = {Correia, Miguel and Neves, Nuno Ferreira and Ver\'{\i}ssimo, Paulo}, + title = {From Consensus to Atomic Broadcast: Time-Free Byzantine-Resistant Protocols without Signatures}, + journal = {Comput. J.}, + volume = {49}, + number = {1}, + year = {2006}, + pages = {82--96}, + numpages = {15} +} + +@inproceedings{RMS10:dsn, + author = {Olivier R{\"u}tti and + Zarko Milosevic and + Andr{\'e} Schiper}, + title = {Generic construction of consensus algorithms for benign + and Byzantine faults}, + booktitle = {DSN}, + year = {2010}, + pages = {343-352} +} + + + +@inproceedings{HKJR:usenix10, + author = {Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio P. and Reed, Benjamin}, + title = {ZooKeeper: wait-free coordination for internet-scale systems}, + OPTbooktitle = {Proceedings of the 2010 USENIX conference on USENIX annual technical conference}, + booktitle = {USENIXATC}, + year = {2010}, + OPTlocation = {Boston, MA}, + pages = {11}, + numpages = {1}, + OPTurl = {http://dl.acm.org/citation.cfm?id=1855840.1855851}, + acmid = {1855851}, + OPTpublisher = {USENIX Association}, + OPTaddress = {Berkeley, CA, USA}, +} + +@inproceedings{Bur:osdi06, + author = {Burrows, Mike}, + title = {The Chubby lock service for loosely-coupled distributed systems}, + booktitle = {OSDI}, + year = {2006}, + pages = {335--350}, + numpages = {16}, +} + +@INPROCEEDINGS{Mao09:hotdep, + author = {Yanhua Mao and Flavio P. Junqueira and Keith Marzullo}, + title = {Towards low latency state machine replication for uncivil wide-area networks}, + booktitle = {HotDep}, + year = {2009} +} + +@inproceedings{Chun07:a2m, + author = {Chun, Byung-Gon and Maniatis, Petros and Shenker, Scott and Kubiatowicz, John}, + title = {Attested append-only memory: making adversaries stick to their word}, + booktitle = {SOSP}, + year = {2007}, + pages = {189--204}, + numpages = {16} +} + +@TECHREPORT{MBS:epfltr, + author = {Zarko Milosevic and Martin Biely and Andr\'e Schiper}, + title = {Bounded {D}elay in {B}yzantine {T}olerant {S}tate {M}achine {R}eplication}, + year = 2013, + month = april, + institution = {EPFL}, + number = {185962}, +} + +@book{BH09:datacenter, + author = {Barroso, Luiz Andre and Hoelzle, Urs}, + title = {The Datacenter as a Computer: An Introduction to the Design of Warehouse-Scale Machines}, + year = {2009}, + isbn = {159829556X, 9781598295566}, + edition = {1st}, + publisher = {Morgan and Claypool Publishers}, +} + +@inproceedings{Kir11:csiirw, + author = {Kirsch, Jonathan and Goose, Stuart and Amir, Yair and Skare, Paul}, + title = {Toward survivable SCADA}, + booktitle = {CSIIRW}, + year = {2011}, + pages = {21:1--21:1}, + articleno = {21}, + numpages = {1} +} + +@inproceedings{Ongaro14:raft, + author = {Ongaro, Diego and Ousterhout, John}, + title = {In Search of an Understandable Consensus Algorithm}, + booktitle = {Proceedings of the 2014 USENIX Conference on USENIX Annual Technical Conference}, + series = {USENIX ATC'14}, + year = {2014}, + isbn = {978-1-931971-10-2}, + location = {Philadelphia, PA}, + pages = {305--320}, + numpages = {16}, + url = {http://dl.acm.org/citation.cfm?id=2643634.2643666}, + acmid = {2643666}, + publisher = {USENIX Association}, + address = {Berkeley, CA, USA}, +} + +@article{GLR17:red-belly-bc, + author = {Tyler Crain and + Vincent Gramoli and + Mikel Larrea and + Michel Raynal}, + title = {Leader/Randomization/Signature-free Byzantine Consensus for Consortium + Blockchains}, + journal = {CoRR}, + volume = {abs/1702.03068}, + year = {2017}, + url = {http://arxiv.org/abs/1702.03068}, + archivePrefix = {arXiv}, + eprint = {1702.03068}, + timestamp = {Wed, 07 Jun 2017 14:41:08 +0200}, + biburl = {http://dblp.org/rec/bib/journals/corr/CrainGLR17}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@misc{Nak2012:bitcoin, + added-at = {2014-04-17T08:33:06.000+0200}, + author = {Nakamoto, Satoshi}, + biburl = {https://www.bibsonomy.org/bibtex/23db66df0fc9fa2b5033f096a901f1c36/ngnn}, + interhash = {423c2cdff70ba0cd0bca55ebb164d770}, + intrahash = {3db66df0fc9fa2b5033f096a901f1c36}, + keywords = {imported}, + timestamp = {2014-04-17T08:33:06.000+0200}, + title = {Bitcoin: A peer-to-peer electronic cash system}, + url = {http://www.bitcoin.org/bitcoin.pdf}, + year = 2009 +} + +@misc{But2014:ethereum, + author = {Vitalik Buterin}, + title = {Ethereum: A next-generation smart contract and decentralized application platform}, + year = {2014}, + howpublished = {\url{https://github.com/ethereum/wiki/wiki/White-Paper}}, + note = {Accessed: 2018-07-11}, + url = {https://github.com/ethereum/wiki/wiki/White-Paper}, +} + +@inproceedings{Dem1987:gossip, + author = {Demers, Alan and Greene, Dan and Hauser, Carl and Irish, Wes and Larson, John and Shenker, Scott and Sturgis, Howard and Swinehart, Dan and Terry, Doug}, + title = {Epidemic Algorithms for Replicated Database Maintenance}, + booktitle = {Proceedings of the Sixth Annual ACM Symposium on Principles of Distributed Computing}, + series = {PODC '87}, + year = {1987}, + isbn = {0-89791-239-X}, + location = {Vancouver, British Columbia, Canada}, + pages = {1--12}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/41840.41841}, + doi = {10.1145/41840.41841}, + acmid = {41841}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@article{Gue2018:sbft, + author = {Guy Golan{-}Gueta and + Ittai Abraham and + Shelly Grossman and + Dahlia Malkhi and + Benny Pinkas and + Michael K. Reiter and + Dragos{-}Adrian Seredinschi and + Orr Tamir and + Alin Tomescu}, + title = {{SBFT:} a Scalable Decentralized Trust Infrastructure for Blockchains}, + journal = {CoRR}, + volume = {abs/1804.01626}, + year = {2018}, + url = {http://arxiv.org/abs/1804.01626}, + archivePrefix = {arXiv}, + eprint = {1804.01626}, + timestamp = {Tue, 01 May 2018 19:46:29 +0200}, + biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-01626}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{BLS2001:crypto, + author = {Boneh, Dan and Lynn, Ben and Shacham, Hovav}, + title = {Short Signatures from the Weil Pairing}, + booktitle = {Proceedings of the 7th International Conference on the Theory and Application of Cryptology and Information Security: Advances in Cryptology}, + series = {ASIACRYPT '01}, + year = {2001}, + isbn = {3-540-42987-5}, + pages = {514--532}, + numpages = {19}, + url = {http://dl.acm.org/citation.cfm?id=647097.717005}, + acmid = {717005}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, +} + + diff --git a/spec/consensus/consensus-paper/paper.tex b/spec/consensus/consensus-paper/paper.tex new file mode 100644 index 000000000..22f8b405f --- /dev/null +++ b/spec/consensus/consensus-paper/paper.tex @@ -0,0 +1,153 @@ +%\documentclass[conference]{IEEEtran} +\documentclass[conference,onecolumn,draft,a4paper]{IEEEtran} +% Add the compsoc option for Computer Society conferences. +% +% If IEEEtran.cls has not been installed into the LaTeX system files, +% manually specify the path to it like: +% \documentclass[conference]{../sty/IEEEtran} + + + +% *** GRAPHICS RELATED PACKAGES *** +% +\ifCLASSINFOpdf +\else +\fi + +% correct bad hyphenation here +\hyphenation{op-tical net-works semi-conduc-tor} + +%\usepackage[caption=false,font=footnotesize]{subfig} +\usepackage{tikz} +\usetikzlibrary{decorations,shapes,backgrounds,calc} +\tikzstyle{msg}=[->,black,>=latex] +\tikzstyle{rubber}=[|<->|] +\tikzstyle{announce}=[draw=blue,fill=blue,shape=diamond,right,minimum + height=2mm,minimum width=1.6667mm,inner sep=0pt] +\tikzstyle{decide}=[draw=red,fill=red,shape=isosceles triangle,right,minimum + height=2mm,minimum width=1.6667mm,inner sep=0pt,shape border rotate=90] +\tikzstyle{cast}=[draw=green!50!black,fill=green!50!black,shape=circle,left,minimum + height=2mm,minimum width=1.6667mm,inner sep=0pt] + + +\usepackage{multirow} +\usepackage{graphicx} +\usepackage{epstopdf} +\usepackage{amssymb} +\usepackage{rounddiag} +\graphicspath{{../}} + +\usepackage{technote} +\usepackage{homodel} +\usepackage{enumerate} +%%\usepackage{ulem}\normalem + +% to center caption +\usepackage{caption} + +\newcommand{\textstretch}{1.4} +\newcommand{\algostretch}{1} +\newcommand{\eqnstretch}{0.5} + +\newconstruct{\FOREACH}{\textbf{for each}}{\textbf{do}}{\ENDFOREACH}{} + +%\newconstruct{\ON}{\textbf{on}}{\textbf{do}}{\ENDON}{\textbf{end on}} +\newcommand\With{\textbf{while}} +\newcommand\From{\textbf{from}} +\newcommand\Broadcast{\textbf{broadcast}} +\newcommand\PBroadcast{send} +\newcommand\UpCall{\textbf{UpCall}} +\newcommand\DownCall{\textbf{DownCall}} +\newcommand \Call{\textbf{Call}} +\newident{noop} +\newconstruct{\UPON}{\textbf{upon}}{\textbf{do}}{\ENDUPON}{} + + + +\newcommand{\abcast}{\mathsf{to\mbox{\sf-}broadcast}} +\newcommand{\adeliver}{\mathsf{to\mbox{\sf-}deliver}} + +\newcommand{\ABCAgreement}{\emph{TO-Agreement}} +\newcommand{\ABCIntegrity}{\emph{TO-Integrity}} +\newcommand{\ABCValidity}{\emph{TO-Validity}} +\newcommand{\ABCTotalOrder}{\emph{TO-Order}} +\newcommand{\ABCBoundedDelivery}{\emph{TO-Bounded Delivery}} + + +\newcommand{\tabc}{\mathit{atab\mbox{\sf-}cast}} +\newcommand{\anno}{\mathit{atab\mbox{\sf-}announce}} +\newcommand{\abort}{\mathit{atab\mbox{\sf-}abort}} +\newcommand{\tadel}{\mathit{atab\mbox{\sf-}deliver}} + +\newcommand{\ATABAgreement}{\emph{ATAB-Agreement}} +\newcommand{\ATABAbort}{\emph{ATAB-Abort}} +\newcommand{\ATABIntegrity}{\emph{ATAB-Integrity}} +\newcommand{\ATABValidity}{\emph{ATAB-Validity}} +\newcommand{\ATABAnnounce}{\emph{ATAB-Announcement}} +\newcommand{\ATABTermination}{\emph{ATAB-Termination}} +%\newcommand{\ATABFastAnnounce}{\emph{ATAB-Fast-Announcement}} + +%% Command for observations. +\newtheorem{observation}{Observation} + + +%% HO ALGORITHM DEFINITIONS +\newconstruct{\FUNCTION}{\textbf{Function}}{\textbf{:}}{\ENDFUNCTION}{} + +%% Uncomment the following four lines to remove remarks and visible traces of +%% modifications in the document +%%\renewcommand{\sout}[1]{\relaxx} +%%\renewcommand{\uline}[1]{#1} +%% \renewcommand{\uwave}[1]{#1} + \renewcommand{\note}[2][default]{\relax} + + +%% The following commands can be used to generate TR or Conference version of the paper +\newcommand{\tr}[1]{} +\renewcommand{\tr}[1]{#1} +\newcommand{\onlypaper}[1]{#1} +%\renewcommand{\onlypaper}[1]{} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\pagestyle{plain} +%\pagestyle{empty} + +%% IEEE tweaks +%\setlength{\IEEEilabelindent}{.5\parindent} +%\setlength{\IEEEiednormlabelsep}{.5\parindent} + +\begin{document} +% +% paper title +% can use linebreaks \\ within to get better formatting as desired +\title{The latest gossip on BFT consensus\vspace{-0.7\baselineskip}} + + + +\author{\IEEEauthorblockN{\large Ethan Buchman, Jae Kwon and Zarko Milosevic\\} + \IEEEauthorblockN{\large Tendermint}\\ + %\\\vspace{-0.5\baselineskip} + \IEEEauthorblockN{September 24, 2018} +} + +% make the title area +\maketitle +\vspace*{0.5em} + +\begin{abstract} +This paper presents Tendermint, a new protocol for ordering events in a distributed network under adversarial conditions. More commonly known as Byzantine Fault Tolerant (BFT) consensus or atomic broadcast, the problem has attracted significant attention in recent years due to the widespread success of blockchain-based digital currencies, such as Bitcoin and Ethereum, which successfully solved the problem in a public setting without a central authority. Tendermint modernizes classic academic work on the subject and simplifies the design of the BFT algorithm by relying on a peer-to-peer gossip protocol among nodes. +\end{abstract} + +%\noindent \textbf{Keywords:} Blockchain, Byzantine Fault Tolerance, State Machine %Replication + +\input{intro} +\input{definitions} +\input{consensus} +\input{proof} +\input{conclusion} + +\bibliographystyle{IEEEtran} +\bibliography{lit} + +%\appendix + +\end{document} diff --git a/spec/consensus/consensus-paper/proof.tex b/spec/consensus/consensus-paper/proof.tex new file mode 100644 index 000000000..1c84d9b11 --- /dev/null +++ b/spec/consensus/consensus-paper/proof.tex @@ -0,0 +1,280 @@ +\section{Proof of Tendermint consensus algorithm} \label{sec:proof} + +\begin{lemma} \label{lemma:majority-intersection} For all $f\geq 0$, any two +sets of processes with voting power at least equal to $2f+1$ have at least one +correct process in common. \end{lemma} + +\begin{proof} As the total voting power is equal to $n=3f+1$, we have $2(2f+1) + = n+f+1$. This means that the intersection of two sets with the voting + power equal to $2f+1$ contains at least $f+1$ voting power in common, \ie, + at least one correct process (as the total voting power of faulty processes + is $f$). The result follows directly from this. \end{proof} + +\begin{lemma} \label{lemma:locked-decision_value-prevote-v} If $f+1$ correct +processes lock value $v$ in round $r_0$ ($lockedValue = v$ and $lockedRound = +r_0$), then in all rounds $r > r_0$, they send $\Prevote$ for $id(v)$ or +$\nil$. \end{lemma} + +\begin{proof} We prove the result by induction on $r$. + +\emph{Base step $r = r_0 + 1:$} Let's denote with $C$ the set of correct +processes with voting power equal to $f+1$. By the rules at +line~\ref{line:tab:recvProposal} and line~\ref{line:tab:acceptProposal}, the +processes from the set $C$ can't accept $\Proposal$ for any value different +from $v$ in round $r$, and therefore can't send a $\li{\Prevote,height_p, +r,id(v')}$ message, if $v' \neq v$. Therefore, the Lemma holds for the base +step. + +\emph{Induction step from $r_1$ to $r_1+1$:} We assume that no process from the +set $C$ has sent $\Prevote$ for values different than $id(v)$ or $\nil$ until +round $r_1 + 1$. We now prove that the Lemma also holds for round $r_1 + 1$. As +processes from the set $C$ send $\Prevote$ for $id(v)$ or $\nil$ in rounds $r_0 +\le r \le r_1$, by Lemma~\ref{lemma:majority-intersection} there is no value +$v' \neq v$ for which it is possible to receive $2f+1$ $\Prevote$ messages in +those rounds (i). Therefore, we have for all processes from the set $C$, +$lockedValue = v$ and $lockedRound \ge r_0$. Let's assume by a contradiction +that a process $q$ from the set $C$ sends $\Prevote$ in round $r_1 + 1$ for +value $id(v')$, where $v' \neq v$. This is possible only by +line~\ref{line:tab:prevote-higher-proposal}. Note that this implies that $q$ +received $2f+1$ $\li{\Prevote,h_q, r,id(v')}$ messages, where $r > r_0$ and $r +< r_1 +1$ (see line~\ref{line:tab:cond-prevote-higher-proposal}). A +contradiction with (i) and Lemma~\ref{lemma:majority-intersection}. +\end{proof} + +\begin{lemma} \label{lemma:agreement} Algorithm~\ref{alg:tendermint} satisfies +Agreement. \end{lemma} + +\begin{proof} Let round $r_0$ be the first round of height $h$ such that some + correct process $p$ decides $v$. We now prove that if some correct process + $q$ decides $v'$ in some round $r \ge r_0$, then $v = v'$. + +In case $r = r_0$, $q$ has received at least $2f+1$ +$\li{\Precommit,h_p,r_0,id(v')}$ messages at line~\ref{line:tab:onDecideRule}, +while $p$ has received at least $2f+1$ $\li{\Precommit,h_p,r_0,id(v)}$ +messages. By Lemma~\ref{lemma:majority-intersection} two sets of messages of +voting power $2f+1$ intersect in at least one correct process. As a correct +process sends a single $\Precommit$ message in a round, then $v=v'$. + +We prove the case $r > r_0$ by contradiction. By the +rule~\ref{line:tab:onDecideRule}, $p$ has received at least $2f+1$ voting-power +equivalent of $\li{\Precommit,h_p,r_0,id(v)}$ messages, i.e., at least $f+1$ +voting-power equivalent correct processes have locked value $v$ in round $r_0$ and have +sent those messages (i). Let denote this set of messages with $C$. On the +other side, $q$ has received at least $2f+1$ voting power equivalent of +$\li{\Precommit,h_q, r,id(v')}$ messages. As the voting power of all faulty +processes is at most $f$, some correct process $c$ has sent one of those +messages. By the rule at line~\ref{line:tab:recvPrevote}, $c$ has locked value +$v'$ in round $r$ before sending $\li{\Precommit,h_q, r,id(v')}$. Therefore $c$ +has received $2f+1$ $\Prevote$ messages for $id(v')$ in round $r > r_0$ (see +line~\ref{line:tab:recvPrevote}). By Lemma~\ref{lemma:majority-intersection}, a +process from the set $C$ has sent $\Prevote$ message for $id(v')$ in round $r$. +A contradiction with (i) and Lemma~\ref{lemma:locked-decision_value-prevote-v}. +\end{proof} + +\begin{lemma} \label{lemma:agreement} Algorithm~\ref{alg:tendermint} satisfies +Validity. \end{lemma} + +\begin{proof} Trivially follows from the rule at line +\ref{line:tab:validDecisionValue} which ensures that only valid values can be +decided. \end{proof} + +\begin{lemma} \label{lemma:round-synchronisation} If we assume that: +\begin{enumerate} + \item a correct process $p$ is the first correct process to + enter a round $r>0$ at time $t > GST$ (for every correct process + $c$, $round_c \le r$ at time $t$) + \item the proposer of round $r$ is + a correct process $q$ + \item for every correct process $c$, + $lockedRound_c \le validRound_q$ at time $t$ + \item $\timeoutPropose(r) + > 2\Delta + \timeoutPrecommit(r-1)$, $\timeoutPrevote(r) > 2\Delta$ and + $\timeoutPrecommit(r) > 2\Delta$, +\end{enumerate} +then all correct processes decide in round $r$ before $t + 4\Delta + + \timeoutPrecommit(r-1)$. +\end{lemma} + +\begin{proof} As $p$ is the first correct process to enter round $r$, it + executed the line~\ref{line:tab:nextRound} after $\timeoutPrecommit(r-1)$ + expired. Therefore, $p$ received $2f+1$ $\Precommit$ messages in the round + $r-1$ before time $t$. By the \emph{Gossip communication} property, all + correct processes will receive those messages the latest at time $t + + \Delta$. Correct processes that are in rounds $< r-1$ at time $t$ will + enter round $r-1$ (see the rule at line~\ref{line:tab:nextRound2}) and + trigger $\timeoutPrecommit(r-1)$ (see rule~\ref{line:tab:startTimeoutPrecommit}) + by time $t+\Delta$. Therefore, all correct processes will start round $r$ + by time $t+\Delta+\timeoutPrecommit(r-1)$ (i). + +In the worst case, the process $q$ is the last correct process to enter round +$r$, so $q$ starts round $r$ and sends $\Proposal$ message for some value $v$ +at time $t + \Delta + \timeoutPrecommit(r-1)$. Therefore, all correct processes +receive the $\Proposal$ message from $q$ the latest by time $t + 2\Delta + +\timeoutPrecommit(r-1)$. Therefore, if $\timeoutPropose(r) > 2\Delta + +\timeoutPrecommit(r-1)$, all correct processes will receive $\Proposal$ message +before $\timeoutPropose(r)$ expires. + +By (3) and the rules at line~\ref{line:tab:recvProposal} and +\ref{line:tab:acceptProposal}, all correct processes will accept the +$\Proposal$ message for value $v$ and will send a $\Prevote$ message for +$id(v)$ by time $t + 2\Delta + \timeoutPrecommit(r-1)$. Note that by the +\emph{Gossip communication} property, the $\Prevote$ messages needed to trigger +the rule at line~\ref{line:tab:acceptProposal} are received before time $t + +\Delta$. + +By time $t + 3\Delta + \timeoutPrecommit(r-1)$, all correct processes will receive +$\Proposal$ for $v$ and $2f+1$ corresponding $\Prevote$ messages for $id(v)$. +By the rule at line~\ref{line:tab:recvPrevote}, all correct processes will send +a $\Precommit$ message (see line~\ref{line:tab:precommit-v}) for $id(v)$ by +time $t + 3\Delta + \timeoutPrecommit(r-1)$. Therefore, by time $t + 4\Delta + +\timeoutPrecommit(r-1)$, all correct processes will have received the $\Proposal$ +for $v$ and $2f+1$ $\Precommit$ messages for $id(v)$, so they decide at +line~\ref{line:tab:decide} on $v$. + +This scenario holds if every correct process $q$ sends a $\Precommit$ message +before $\timeoutPrevote(r)$ expires, and if $\timeoutPrecommit(r)$ does not expire +before $t + 4\Delta + \timeoutPrecommit(r-1)$. Let's assume that a correct process +$c_1$ is the first correct process to trigger $\timeoutPrevote(r)$ (see the rule +at line~\ref{line:tab:recvAny2/3Prevote}) at time $t_1 > t$. This implies that +before time $t_1$, $c_1$ received a $\Proposal$ ($step_{c_1}$ must be +$\prevote$ by the rule at line~\ref{line:tab:recvAny2/3Prevote}) and a set of +$2f+1$ $\Prevote$ messages. By time $t_1 + \Delta$, all correct processes will +receive those messages. Note that even if some correct process was in the +smaller round before time $t_1$, at time $t_1 + \Delta$ it will start round $r$ +after receiving those messages (see the rule at +line~\ref{line:tab:skipRounds}). Therefore, all correct processes will send +their $\Prevote$ message for $id(v)$ by time $t_1 + \Delta$, and all correct +processes will receive those messages the by time $t_1 + 2\Delta$. Therefore, +as $\timeoutPrevote(r) > 2\Delta$, this ensures that all correct processes receive +$\Prevote$ messages from all correct processes before their respective local +$\timeoutPrevote(r)$ expire. + +On the other hand, $\timeoutPrecommit(r)$ is triggered in a correct process $c_2$ +after it receives any set of $2f+1$ $\Precommit$ messages for the first time. +Let's denote with $t_2 > t$ the earliest point in time $\timeoutPrecommit(r)$ is +triggered in some correct process $c_2$. This implies that $c_2$ has received +at least $f+1$ $\Precommit$ messages for $id(v)$ from correct processes, i.e., +those processes have received $\Proposal$ for $v$ and $2f+1$ $\Prevote$ +messages for $id(v)$ before time $t_2$. By the \emph{Gossip communication} +property, all correct processes will receive those messages by time $t_2 + +\Delta$, and will send $\Precommit$ messages for $id(v)$. Note that even if +some correct processes were at time $t_2$ in a round smaller than $r$, by the +rule at line~\ref{line:tab:skipRounds} they will enter round $r$ by time $t_2 + +\Delta$. Therefore, by time $t_2 + 2\Delta$, all correct processes will +receive $\Proposal$ for $v$ and $2f+1$ $\Precommit$ messages for $id(v)$. So if +$\timeoutPrecommit(r) > 2\Delta$, all correct processes will decide before the +timeout expires. \end{proof} + + +\begin{lemma} \label{lemma:validValue} If a correct process $p$ locks a value + $v$ at time $t_0 > GST$ in some round $r$ ($lockedValue = v$ and + $lockedRound = r$) and $\timeoutPrecommit(r) > 2\Delta$, then all correct + processes set $validValue$ to $v$ and $validRound$ to $r$ before starting + round $r+1$. \end{lemma} + +\begin{proof} In order to prove this Lemma, we need to prove that if the + process $p$ locks a value $v$ at time $t_0$, then no correct process will + leave round $r$ before time $t_0 + \Delta$ (unless it has already set + $validValue$ to $v$ and $validRound$ to $r$). It is sufficient to prove + this, since by the \emph{Gossip communication} property the messages that + $p$ received at time $t_0$ and that triggered rule at + line~\ref{line:tab:recvPrevote} will be received by time $t_0 + \Delta$ by + all correct processes, so all correct processes that are still in round $r$ + will set $validValue$ to $v$ and $validRound$ to $r$ (by the rule at + line~\ref{line:tab:recvPrevote}). To prove this, we need to compute the + earliest point in time a correct process could leave round $r$ without + updating $validValue$ to $v$ and $validRound$ to $r$ (we denote this time + with $t_1$). The Lemma is correct if $t_0 + \Delta < t_1$. + +If the process $p$ locks a value $v$ at time $t_0$, this implies that $p$ +received the valid $\Proposal$ message for $v$ and $2f+1$ +$\li{\Prevote,h,r,id(v)}$ at time $t_0$. At least $f+1$ of those messages are +sent by correct processes. Let's denote this set of correct processes as $C$. By +Lemma~\ref{lemma:majority-intersection} any set of $2f+1$ $\Prevote$ messages +in round $r$ contains at least a single message from the set $C$. + +Let's denote as time $t$ the earliest point in time a correct process, $c_1$, triggered +$\timeoutPrevote(r)$. This implies that $c_1$ received $2f+1$ $\Prevote$ messages +(see the rule at line \ref{line:tab:recvAny2/3Prevote}), where at least one of +those messages was sent by a process $c_2$ from the set $C$. Therefore, process +$c_2$ had received $\Proposal$ message before time $t$. By the \emph{Gossip +communication} property, all correct processes will receive $\Proposal$ and +$2f+1$ $\Prevote$ messages for round $r$ by time $t+\Delta$. The latest point +in time $p$ will trigger $\timeoutPrevote(r)$ is $t+\Delta$\footnote{Note that +even if $p$ was in smaller round at time $t$ it will start round $r$ by time +$t+\Delta$.}. So the latest point in time $p$ can lock the value $v$ in +round $r$ is $t_0 = t+\Delta+\timeoutPrevote(r)$ (as at this point +$\timeoutPrevote(r)$ expires, so a process sends $\Precommit$ $\nil$ and updates +$step$ to $\precommit$, see line \ref{line:tab:onTimeoutPrevote}). + +Note that according to the Algorithm \ref{alg:tendermint}, a correct process +can not send a $\Precommit$ message before receiving $2f+1$ $\Prevote$ +messages. Therefore, no correct process can send a $\Precommit$ message in +round $r$ before time $t$. If a correct process sends a $\Precommit$ message +for $\nil$, it implies that it has waited for the full duration of +$\timeoutPrevote(r)$ (see line +\ref{line:tab:precommit-nil-onTimeout})\footnote{The other case in which a +correct process $\Precommit$ for $\nil$ is after receiving $2f+1$ $Prevote$ for +$\nil$ messages, see the line \ref{line:tab:precommit-v-1}. By +Lemma~\ref{lemma:majority-intersection}, this is not possible in round $r$.}. +Therefore, no correct process can send $\Precommit$ for $\nil$ before time $t + +\timeoutPrevote(r)$ (*). + +A correct process $q$ that enters round $r+1$ must wait (i) $\timeoutPrecommit(r)$ +(see line \ref{line:tab:nextRound}) or (ii) receiving $f+1$ messages from the +round $r+1$ (see the line \ref{line:tab:skipRounds}). In the former case, $q$ +receives $2f+1$ $\Precommit$ messages before starting $\timeoutPrecommit(r)$. If +at least a single $\Precommit$ message from a correct process (at least $f+1$ +voting power equivalent of those messages is sent by correct processes) is for +$\nil$, then $q$ cannot start round $r+1$ before time $t_1 = t + +\timeoutPrevote(r) + \timeoutPrecommit(r)$ (see (*)). Therefore in this case we have: +$t_0 + \Delta < t_1$, i.e., $t+2\Delta+\timeoutPrevote(r) < t + \timeoutPrevote(r) + +\timeoutPrecommit(r)$, and this is true whenever $\timeoutPrecommit(r) > 2\Delta$, so +Lemma holds in this case. + +If in the set of $2f+1$ $\Precommit$ messages $q$ receives, there is at least a +single $\Precommit$ for $id(v)$ message from a correct process $c$, then $q$ +can start the round $r+1$ the earliest at time $t_1 = t+\timeoutPrecommit(r)$. In +this case, by the \emph{Gossip communication} property, all correct processes +will receive $\Proposal$ and $2f+1$ $\Prevote$ messages (that $c$ received +before time $t$) the latest at time $t+\Delta$. Therefore, $q$ will set +$validValue$ to $v$ and $validRound$ to $r$ the latest at time $t+\Delta$. As +$t+\Delta < t+\timeoutPrecommit(r)$, whenever $\timeoutPrecommit(r) > \Delta$, the +Lemma holds also in this case. + +In case (ii), $q$ received at least a single message from a correct process $c$ +from the round $r+1$. The earliest point in time $c$ could have started round +$r+1$ is $t+\timeoutPrecommit(r)$ in case it received a $\Precommit$ message for +$v$ from some correct process in the set of $2f+1$ $\Precommit$ messages it +received. The same reasoning as above holds also in this case, so $q$ set +$validValue$ to $v$ and $validRound$ to $r$ the latest by time $t+\Delta$. As +$t+\Delta < t+\timeoutPrecommit(r)$, whenever $\timeoutPrecommit(r) > \Delta$, the +Lemma holds also in this case. \end{proof} + +\begin{lemma} \label{lemma:agreement} Algorithm~\ref{alg:tendermint} satisfies +Termination. \end{lemma} + +\begin{proof} Lemma~\ref{lemma:round-synchronisation} defines a scenario in + which all correct processes decide. We now prove that within a bounded + duration after GST such a scenario will unfold. Let's assume that at time + $GST$ the highest round started by a correct process is $r_0$, and that + there exists a correct process $p$ such that the following holds: for every + correct process $c$, $lockedRound_c \le validRound_p$. Furthermore, we + assume that $p$ will be the proposer in some round $r_1 > r$ (this is + ensured by the $\coord$ function). + +We have two cases to consider. In the first case, for all rounds $r \ge r_0$ +and $r < r_1$, no correct process locks a value (set $lockedRound$ to $r$). So +in round $r_1$ we have the scenario from the +Lemma~\ref{lemma:round-synchronisation}, so all correct processes decides in +round $r_1$. + +In the second case, a correct process locks a value $v$ in round $r_2$, where +$r_2 \ge r_0$ and $r_2 < r_1$. Let's assume that $r_2$ is the highest round +before $r_1$ in which some correct process $q$ locks a value. By Lemma +\ref{lemma:validValue} at the end of round $r_2$ the following holds for all +correct processes $c$: $validValue_c = lockedValue_q$ and $validRound_c = r_2$. +Then in round $r_1$, the conditions for the +Lemma~\ref{lemma:round-synchronisation} holds, so all correct processes decide. +\end{proof} + diff --git a/spec/consensus/consensus-paper/rounddiag.sty b/spec/consensus/consensus-paper/rounddiag.sty new file mode 100644 index 000000000..a6ca5d883 --- /dev/null +++ b/spec/consensus/consensus-paper/rounddiag.sty @@ -0,0 +1,62 @@ +% ROUNDDIAG STYLE +% for LaTeX version 2e +% by -- 2008 Martin Hutle +% +% This style file is free software; you can redistribute it and/or +% modify it under the terms of the GNU Lesser General Public +% License as published by the Free Software Foundation; either +% version 2 of the License, or (at your option) any later version. +% +% This style file is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% Lesser General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public +% License along with this style file; if not, write to the +% Free Software Foundation, Inc., 59 Temple Place - Suite 330, +% Boston, MA 02111-1307, USA. +% +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{rounddiag} +\typeout{Document Style `rounddiag' - provides simple round diagrams} +% +\RequirePackage{ifthen} +\RequirePackage{calc} +\RequirePackage{tikz} + +\def\rdstretch{3} + +\tikzstyle{msg}=[->,thick,>=latex] +\tikzstyle{rndline}=[dotted] +\tikzstyle{procline}=[dotted] + +\newenvironment{rounddiag}[2]{ +\begin{center} +\begin{tikzpicture} +\foreach \i in {1,...,#1}{ + \draw[procline] (0,#1-\i) node[xshift=-1em]{$p_{\i}$} -- (#2*\rdstretch+1,#1-\i); +} +\foreach \i in {0,...,#2}{ + \draw[rndline] (\i*\rdstretch+0.5,0) -- (\i*\rdstretch+0.5,#1-1); +} +\newcommand{\rdat}[2]{ + (##2*\rdstretch+0.5,#1-##1) +}% +\newcommand{\round}[2]{% + \def\rdround{##1} + \ifthenelse{\equal{##2}{}}{}{ + \node[yshift=-1em] at ({##1*\rdstretch+0.5-0.5*\rdstretch},0) {##2}; + } +}% +\newcommand{\rdmessage}[3]{\draw[msg] + (\rdround*\rdstretch-\rdstretch+0.5,#1-##1) -- node[yshift=1.2ex]{##3} + (\rdround*\rdstretch+0.5,#1-##2);}% +\newcommand{\rdalltoall}{% + \foreach \i in {1,...,#1}{ + \foreach \j in {1,...,#1}{ + { \rdmessage{\i}{\j}{}}}}}% +}{% +\end{tikzpicture} +\end{center} +} diff --git a/spec/consensus/consensus-paper/technote.sty b/spec/consensus/consensus-paper/technote.sty new file mode 100644 index 000000000..5353f13cd --- /dev/null +++ b/spec/consensus/consensus-paper/technote.sty @@ -0,0 +1,118 @@ +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{technote}[2007/11/09] +\typeout{Template for quick notes with some useful definitions} + +\RequirePackage{ifthen} +\RequirePackage{calc} +\RequirePackage{amsmath,amssymb,amsthm} +\RequirePackage{epsfig} +\RequirePackage{algorithm} +\RequirePackage[noend]{algorithmicplus} + +\newboolean{technote@noedit} +\setboolean{technote@noedit}{false} +\DeclareOption{noedit}{\setboolean{technote@noedit}{true}} + +\newcounter{technote@lang} +\setcounter{technote@lang}{0} +\DeclareOption{german}{\setcounter{technote@lang}{1}} +\DeclareOption{french}{\setcounter{technote@lang}{2}} + +\DeclareOption{fullpage}{ +\oddsidemargin -10mm % Margin on odd side pages (default=0mm) +\evensidemargin -10mm % Margin on even side pages (default=0mm) +\topmargin -10mm % Top margin space (default=16mm) +\headheight \baselineskip % Height of headers (default=0mm) +\headsep \baselineskip % Separation spc btw header and text (d=0mm) +\footskip 30pt % Separation spc btw text and footer (d=30pt) +\textheight 230mm % Total text height (default=200mm) +\textwidth 180mm % Total text width (default=160mm) +} + +\renewcommand{\algorithmiccomment}[1]{\hfill/* #1 */} +\renewcommand{\algorithmiclnosize}{\scriptsize} + +\newboolean{technote@truenumbers} +\setboolean{technote@truenumbers}{false} +\DeclareOption{truenumbers}{\setboolean{technote@truenumbers}{true}} + +\ProcessOptions + +\newcommand{\N}{\ifthenelse{\boolean{technote@truenumbers}}% + {\mbox{\rm I\hspace{-.5em}N}}% + {\mathbb{N}}} + +\newcommand{\R}{\ifthenelse{\boolean{technote@truenumbers}}% + {\mbox{\rm I\hspace{-.2em}R}}% + {\mathbb{R}}} + +\newcommand{\Z}{\mathbb{Z}} + +\newcommand{\set}[1]{\left\{#1\right\}} +\newcommand{\mathsc}[1]{\mbox{\sc #1}} +\newcommand{\li}[1]{\langle#1\rangle} +\newcommand{\st}{\;s.t.\;} +\newcommand{\Real}{\R} +\newcommand{\Natural}{\N} +\newcommand{\Integer}{\Z} + +% edit commands +\newcommand{\newedit}[2]{ + \newcommand{#1}[2][default]{% + \ifthenelse{\boolean{technote@noedit}}{}{ + \par\vspace{2mm} + \noindent + \begin{tabular}{|l|}\hline + \parbox{\linewidth-\tabcolsep*2}{{\bf #2:}\hfill\ifthenelse{\equal{##1}{default}}{}{##1}}\\\hline + \parbox{\linewidth-\tabcolsep*2}{\rule{0pt}{5mm}##2\rule[-2mm]{0pt}{2mm}}\\\hline + \end{tabular} + \par\vspace{2mm} + } + } +} + +\newedit{\note}{Note} +\newedit{\comment}{Comment} +\newedit{\question}{Question} +\newedit{\content}{Content} +\newedit{\problem}{Problem} + +\newcommand{\mnote}[1]{\marginpar{\scriptsize\it + \begin{minipage}[t]{0.8 in} + \raggedright #1 + \end{minipage}}} + +\newcommand{\Insert}[1]{\underline{#1}\marginpar{$|$}} + +\newcommand{\Delete}[1]{\marginpar{$|$} +} + +% lemma, theorem, etc. +\newtheorem{lemma}{Lemma} +\newtheorem{proposition}{Proposition} +\newtheorem{theorem}{Theorem} +\newtheorem{corollary}{Corollary} +\newtheorem{assumption}{Assumption} +\newtheorem{definition}{Definition} + +\gdef\op|{\,|\;} +\gdef\op:{\,:\;} +\newcommand{\assign}{\leftarrow} +\newcommand{\inc}[1]{#1 \assign #1 + 1} +\newcommand{\isdef}{:=} + +\newcommand{\ident}[1]{\mathit{#1}} +\def\newident#1{\expandafter\def\csname #1\endcsname{\ident{#1}}} + +\newcommand{\eg}{{\it e.g.}} +\newcommand{\ie}{{\it i.e.}} +\newcommand{\apriori}{{\it apriori}} +\newcommand{\etal}{{\it et al.}} + +\newcommand\ps@technote{% + \renewcommand\@oddhead{\theheader}% + \let\@evenhead\@oddhead + \renewcommand\@evenfoot + {\hfil\normalfont\textrm{\thepage}\hfil}% + \let\@oddfoot\@evenfoot +} diff --git a/spec/consensus/consensus.md b/spec/consensus/consensus.md new file mode 100644 index 000000000..5d1526a17 --- /dev/null +++ b/spec/consensus/consensus.md @@ -0,0 +1,352 @@ +--- +order: 1 +--- +# Byzantine Consensus Algorithm + +## Terms + +- The network is composed of optionally connected _nodes_. Nodes + directly connected to a particular node are called _peers_. +- The consensus process in deciding the next block (at some _height_ + `H`) is composed of one or many _rounds_. +- `NewHeight`, `Propose`, `Prevote`, `Precommit`, and `Commit` + represent state machine states of a round. (aka `RoundStep` or + just "step"). +- A node is said to be _at_ a given height, round, and step, or at + `(H,R,S)`, or at `(H,R)` in short to omit the step. +- To _prevote_ or _precommit_ something means to broadcast a [prevote + vote](https://godoc.org/github.com/tendermint/tendermint/types#Vote) + or [first precommit + vote](https://godoc.org/github.com/tendermint/tendermint/types#FirstPrecommit) + for something. +- A vote _at_ `(H,R)` is a vote signed with the bytes for `H` and `R` + included in its [sign-bytes](../core/data_structures.md#vote). +- _+2/3_ is short for "more than 2/3" +- _1/3+_ is short for "1/3 or more" +- A set of +2/3 of prevotes for a particular block or `` at + `(H,R)` is called a _proof-of-lock-change_ or _PoLC_ for short. + +## State Machine Overview + +At each height of the blockchain a round-based protocol is run to +determine the next block. Each round is composed of three _steps_ +(`Propose`, `Prevote`, and `Precommit`), along with two special steps +`Commit` and `NewHeight`. + +In the optimal scenario, the order of steps is: + +```md +NewHeight -> (Propose -> Prevote -> Precommit)+ -> Commit -> NewHeight ->... +``` + +The sequence `(Propose -> Prevote -> Precommit)` is called a _round_. +There may be more than one round required to commit a block at a given +height. Examples for why more rounds may be required include: + +- The designated proposer was not online. +- The block proposed by the designated proposer was not valid. +- The block proposed by the designated proposer did not propagate + in time. +- The block proposed was valid, but +2/3 of prevotes for the proposed + block were not received in time for enough validator nodes by the + time they reached the `Precommit` step. Even though +2/3 of prevotes + are necessary to progress to the next step, at least one validator + may have voted `` or maliciously voted for something else. +- The block proposed was valid, and +2/3 of prevotes were received for + enough nodes, but +2/3 of precommits for the proposed block were not + received for enough validator nodes. + +Some of these problems are resolved by moving onto the next round & +proposer. Others are resolved by increasing certain round timeout +parameters over each successive round. + +## State Machine Diagram + +```md + +-------------------------------------+ + v |(Wait til `CommmitTime+timeoutCommit`) + +-----------+ +-----+-----+ + +----------> | Propose +--------------+ | NewHeight | + | +-----------+ | +-----------+ + | | ^ + |(Else, after timeoutPrecommit) v | ++-----+-----+ +-----------+ | +| Precommit | <------------------------+ Prevote | | ++-----+-----+ +-----------+ | + |(When +2/3 Precommits for block found) | + v | ++--------------------------------------------------------------------+ +| Commit | +| | +| * Set CommitTime = now; | +| * Wait for block, then stage/save/commit block; | ++--------------------------------------------------------------------+ +``` + +# Background Gossip + +A node may not have a corresponding validator private key, but it +nevertheless plays an active role in the consensus process by relaying +relevant meta-data, proposals, blocks, and votes to its peers. A node +that has the private keys of an active validator and is engaged in +signing votes is called a _validator-node_. All nodes (not just +validator-nodes) have an associated state (the current height, round, +and step) and work to make progress. + +Between two nodes there exists a `Connection`, and multiplexed on top of +this connection are fairly throttled `Channel`s of information. An +epidemic gossip protocol is implemented among some of these channels to +bring peers up to speed on the most recent state of consensus. For +example, + +- Nodes gossip `PartSet` parts of the current round's proposer's + proposed block. A LibSwift inspired algorithm is used to quickly + broadcast blocks across the gossip network. +- Nodes gossip prevote/precommit votes. A node `NODE_A` that is ahead + of `NODE_B` can send `NODE_B` prevotes or precommits for `NODE_B`'s + current (or future) round to enable it to progress forward. +- Nodes gossip prevotes for the proposed PoLC (proof-of-lock-change) + round if one is proposed. +- Nodes gossip to nodes lagging in blockchain height with block + [commits](https://godoc.org/github.com/tendermint/tendermint/types#Commit) + for older blocks. +- Nodes opportunistically gossip `ReceivedVote` messages to hint peers what + votes it already has. +- Nodes broadcast their current state to all neighboring peers. (but + is not gossiped further) + +There's more, but let's not get ahead of ourselves here. + +## Proposals + +A proposal is signed and published by the designated proposer at each +round. The proposer is chosen by a deterministic and non-choking round +robin selection algorithm that selects proposers in proportion to their +voting power (see +[implementation](https://github.com/tendermint/tendermint/blob/master/types/validator_set.go)). + +A proposal at `(H,R)` is composed of a block and an optional latest +`PoLC-Round < R` which is included iff the proposer knows of one. This +hints the network to allow nodes to unlock (when safe) to ensure the +liveness property. + +## State Machine Spec + +### Propose Step (height:H,round:R) + +Upon entering `Propose`: + +- The designated proposer proposes a block at `(H,R)`. + +The `Propose` step ends: + +- After `timeoutProposeR` after entering `Propose`. --> goto + `Prevote(H,R)` +- After receiving proposal block and all prevotes at `PoLC-Round`. --> + goto `Prevote(H,R)` +- After [common exit conditions](#common-exit-conditions) + +### Prevote Step (height:H,round:R) + +Upon entering `Prevote`, each validator broadcasts its prevote vote. + +- First, if the validator is locked on a block since `LastLockRound` + but now has a PoLC for something else at round `PoLC-Round` where + `LastLockRound < PoLC-Round < R`, then it unlocks. +- If the validator is still locked on a block, it prevotes that. +- Else, if the proposed block from `Propose(H,R)` is good, it + prevotes that. +- Else, if the proposal is invalid or wasn't received on time, it + prevotes ``. + +The `Prevote` step ends: + +- After +2/3 prevotes for a particular block or ``. -->; goto + `Precommit(H,R)` +- After `timeoutPrevote` after receiving any +2/3 prevotes. --> goto + `Precommit(H,R)` +- After [common exit conditions](#common-exit-conditions) + +### Precommit Step (height:H,round:R) + +Upon entering `Precommit`, each validator broadcasts its precommit vote. + +- If the validator has a PoLC at `(H,R)` for a particular block `B`, it + (re)locks (or changes lock to) and precommits `B` and sets + `LastLockRound = R`. +- Else, if the validator has a PoLC at `(H,R)` for ``, it unlocks + and precommits ``. +- Else, it keeps the lock unchanged and precommits ``. + +A precommit for `` means "I didn’t see a PoLC for this round, but I +did get +2/3 prevotes and waited a bit". + +The Precommit step ends: + +- After +2/3 precommits for ``. --> goto `Propose(H,R+1)` +- After `timeoutPrecommit` after receiving any +2/3 precommits. --> goto + `Propose(H,R+1)` +- After [common exit conditions](#common-exit-conditions) + +### Common exit conditions + +- After +2/3 precommits for a particular block. --> goto + `Commit(H)` +- After any +2/3 prevotes received at `(H,R+x)`. --> goto + `Prevote(H,R+x)` +- After any +2/3 precommits received at `(H,R+x)`. --> goto + `Precommit(H,R+x)` + +### Commit Step (height:H) + +- Set `CommitTime = now()` +- Wait until block is received. --> goto `NewHeight(H+1)` + +### NewHeight Step (height:H) + +- Move `Precommits` to `LastCommit` and increment height. +- Set `StartTime = CommitTime+timeoutCommit` +- Wait until `StartTime` to receive straggler commits. --> goto + `Propose(H,0)` + +## Proofs + +### Proof of Safety + +Assume that at most -1/3 of the voting power of validators is byzantine. +If a validator commits block `B` at round `R`, it's because it saw +2/3 +of precommits at round `R`. This implies that 1/3+ of honest nodes are +still locked at round `R' > R`. These locked validators will remain +locked until they see a PoLC at `R' > R`, but this won't happen because +1/3+ are locked and honest, so at most -2/3 are available to vote for +anything other than `B`. + +### Proof of Liveness + +If 1/3+ honest validators are locked on two different blocks from +different rounds, a proposers' `PoLC-Round` will eventually cause nodes +locked from the earlier round to unlock. Eventually, the designated +proposer will be one that is aware of a PoLC at the later round. Also, +`timeoutProposalR` increments with round `R`, while the size of a +proposal are capped, so eventually the network is able to "fully gossip" +the whole proposal (e.g. the block & PoLC). + +### Proof of Fork Accountability + +Define the JSet (justification-vote-set) at height `H` of a validator +`V1` to be all the votes signed by the validator at `H` along with +justification PoLC prevotes for each lock change. For example, if `V1` +signed the following precommits: `Precommit(B1 @ round 0)`, +`Precommit( @ round 1)`, `Precommit(B2 @ round 4)` (note that no +precommits were signed for rounds 2 and 3, and that's ok), +`Precommit(B1 @ round 0)` must be justified by a PoLC at round 0, and +`Precommit(B2 @ round 4)` must be justified by a PoLC at round 4; but +the precommit for `` at round 1 is not a lock-change by definition +so the JSet for `V1` need not include any prevotes at round 1, 2, or 3 +(unless `V1` happened to have prevoted for those rounds). + +Further, define the JSet at height `H` of a set of validators `VSet` to +be the union of the JSets for each validator in `VSet`. For a given +commit by honest validators at round `R` for block `B` we can construct +a JSet to justify the commit for `B` at `R`. We say that a JSet +_justifies_ a commit at `(H,R)` if all the committers (validators in the +commit-set) are each justified in the JSet with no duplicitous vote +signatures (by the committers). + +- **Lemma**: When a fork is detected by the existence of two + conflicting [commits](../core/data_structures.md#commit), the + union of the JSets for both commits (if they can be compiled) must + include double-signing by at least 1/3+ of the validator set. + **Proof**: The commit cannot be at the same round, because that + would immediately imply double-signing by 1/3+. Take the union of + the JSets of both commits. If there is no double-signing by at least + 1/3+ of the validator set in the union, then no honest validator + could have precommitted any different block after the first commit. + Yet, +2/3 did. Reductio ad absurdum. + +As a corollary, when there is a fork, an external process can determine +the blame by requiring each validator to justify all of its round votes. +Either we will find 1/3+ who cannot justify at least one of their votes, +and/or, we will find 1/3+ who had double-signed. + +### Alternative algorithm + +Alternatively, we can take the JSet of a commit to be the "full commit". +That is, if light clients and validators do not consider a block to be +committed unless the JSet of the commit is also known, then we get the +desirable property that if there ever is a fork (e.g. there are two +conflicting "full commits"), then 1/3+ of the validators are immediately +punishable for double-signing. + +There are many ways to ensure that the gossip network efficiently share +the JSet of a commit. One solution is to add a new message type that +tells peers that this node has (or does not have) a +2/3 majority for B +(or) at (H,R), and a bitarray of which votes contributed towards that +majority. Peers can react by responding with appropriate votes. + +We will implement such an algorithm for the next iteration of the +Tendermint consensus protocol. + +Other potential improvements include adding more data in votes such as +the last known PoLC round that caused a lock change, and the last voted +round/step (or, we may require that validators not skip any votes). This +may make JSet verification/gossip logic easier to implement. + +### Censorship Attacks + +Due to the definition of a block +[commit](https://github.com/tendermint/tendermint/blob/master/docs/tendermint-core/validators.md), any 1/3+ coalition of +validators can halt the blockchain by not broadcasting their votes. Such +a coalition can also censor particular transactions by rejecting blocks +that include these transactions, though this would result in a +significant proportion of block proposals to be rejected, which would +slow down the rate of block commits of the blockchain, reducing its +utility and value. The malicious coalition might also broadcast votes in +a trickle so as to grind blockchain block commits to a near halt, or +engage in any combination of these attacks. + +If a global active adversary were also involved, it can partition the +network in such a way that it may appear that the wrong subset of +validators were responsible for the slowdown. This is not just a +limitation of Tendermint, but rather a limitation of all consensus +protocols whose network is potentially controlled by an active +adversary. + +### Overcoming Forks and Censorship Attacks + +For these types of attacks, a subset of the validators through external +means should coordinate to sign a reorg-proposal that chooses a fork +(and any evidence thereof) and the initial subset of validators with +their signatures. Validators who sign such a reorg-proposal forego its +collateral on all other forks. Clients should verify the signatures on +the reorg-proposal, verify any evidence, and make a judgement or prompt +the end-user for a decision. For example, a phone wallet app may prompt +the user with a security warning, while a refrigerator may accept any +reorg-proposal signed by +1/2 of the original validators. + +No non-synchronous Byzantine fault-tolerant algorithm can come to +consensus when 1/3+ of validators are dishonest, yet a fork assumes that +1/3+ of validators have already been dishonest by double-signing or +lock-changing without justification. So, signing the reorg-proposal is a +coordination problem that cannot be solved by any non-synchronous +protocol (i.e. automatically, and without making assumptions about the +reliability of the underlying network). It must be provided by means +external to the weakly-synchronous Tendermint consensus algorithm. For +now, we leave the problem of reorg-proposal coordination to human +coordination via internet media. Validators must take care to ensure +that there are no significant network partitions, to avoid situations +where two conflicting reorg-proposals are signed. + +Assuming that the external coordination medium and protocol is robust, +it follows that forks are less of a concern than [censorship +attacks](#censorship-attacks). + +### Canonical vs subjective commit + +We distinguish between "canonical" and "subjective" commits. A subjective commit is what +each validator sees locally when they decide to commit a block. The canonical commit is +what is included by the proposer of the next block in the `LastCommit` field of +the block. This is what makes it canonical and ensures every validator agrees on the canonical commit, +even if it is different from the +2/3 votes a validator has seen, which caused the validator to +commit the respective block. Each block contains a canonical +2/3 commit for the previous +block. diff --git a/spec/consensus/creating-proposal.md b/spec/consensus/creating-proposal.md new file mode 100644 index 000000000..cb43c8ebb --- /dev/null +++ b/spec/consensus/creating-proposal.md @@ -0,0 +1,43 @@ +--- +order: 2 +--- +# Creating a proposal + +A block consists of a header, transactions, votes (the commit), +and a list of evidence of malfeasance (ie. signing conflicting votes). + +We include no more than 1/10th of the maximum block size +(`ConsensusParams.Block.MaxBytes`) of evidence with each block. + +## Reaping transactions from the mempool + +When we reap transactions from the mempool, we calculate maximum data +size by subtracting maximum header size (`MaxHeaderBytes`), the maximum +amino overhead for a block (`MaxAminoOverheadForBlock`), the size of +the last commit (if present) and evidence (if present). While reaping +we account for amino overhead for each transaction. + +```go +func MaxDataBytes(maxBytes int64, valsCount, evidenceCount int) int64 { + return maxBytes - + MaxOverheadForBlock - + MaxHeaderBytes - + int64(valsCount)*MaxVoteBytes - + int64(evidenceCount)*MaxEvidenceBytes +} +``` + +## Validating transactions in the mempool + +Before we accept a transaction in the mempool, we check if it's size is no more +than {MaxDataSize}. {MaxDataSize} is calculated using the same formula as +above, except we subtract the max number of evidence, {MaxNum} by the maximum size of evidence + +```go +func MaxDataBytesUnknownEvidence(maxBytes int64, valsCount int) int64 { + return maxBytes - + MaxOverheadForBlock - + MaxHeaderBytes - + (maxNumEvidence * MaxEvidenceBytes) +} +``` diff --git a/spec/consensus/evidence.md b/spec/consensus/evidence.md new file mode 100644 index 000000000..84d51a01b --- /dev/null +++ b/spec/consensus/evidence.md @@ -0,0 +1,199 @@ +# Evidence + +Evidence is an important component of Tendermint's security model. Whilst the core +consensus protocol provides correctness gaurantees for state machine replication +that can tolerate less than 1/3 failures, the evidence system looks to detect and +gossip byzantine faults whose combined power is greater than or equal to 1/3. It is worth noting that +the evidence system is designed purely to detect possible attacks, gossip them, +commit them on chain and inform the application running on top of Tendermint. +Evidence in itself does not punish "bad actors", this is left to the discretion +of the application. A common form of punishment is slashing where the validators +that were caught violating the protocol have all or a portion of their voting +power removed. Evidence, given the assumption that 1/3+ of the network is still +byzantine, is susceptible to censorship and should therefore be considered added +security on a "best effort" basis. + +This document walks through the various forms of evidence, how they are detected, +gossiped, verified and committed. + +> NOTE: Evidence here is internal to tendermint and should not be confused with +> application evidence + +## Detection + +### Equivocation + +Equivocation is the most fundamental of byzantine faults. Simply put, to prevent +replication of state across all nodes, a validator tries to convince some subset +of nodes to commit one block whilst convincing another subset to commit a +different block. This is achieved by double voting (hence +`DuplicateVoteEvidence`). A successful duplicate vote attack requires greater +than 1/3 voting power and a (temporary) network partition between the aforementioned +subsets. This is because in consensus, votes are gossiped around. When a node +observes two conflicting votes from the same peer, it will use the two votes of +evidence and begin gossiping this evidence to other nodes. [Verification](#duplicatevoteevidence) is addressed further down. + +```go +type DuplicateVoteEvidence struct { + VoteA Vote + VoteB Vote + + // and abci specific fields +} +``` + +### Light Client Attacks + +Light clients also comply with the 1/3+ security model, however, by using a +different, more lightweight verification method they are subject to a +different kind of 1/3+ attack whereby the byzantine validators could sign an +alternative light block that the light client will think is valid. Detection, +explained in greater detail +[here](../light-client/detection/detection_003_reviewed.md), involves comparison +with multiple other nodes in the hope that at least one is "honest". An "honest" +node will return a challenging light block for the light client to validate. If +this challenging light block also meets the +[validation criteria](../light-client/verification/verification_001_published.md) +then the light client sends the "forged" light block to the node. +[Verification](#lightclientattackevidence) is addressed further down. + +```go +type LightClientAttackEvidence struct { + ConflictingBlock LightBlock + CommonHeight int64 + + // and abci specific fields +} +``` + +## Verification + +If a node receives evidence, it will first try to verify it, then persist it. +Evidence of byzantine behavior should only be committed once (uniqueness) and +should be committed within a certain period from the point that it occurred +(timely). Timelines is defined by the `EvidenceParams`: `MaxAgeNumBlocks` and +`MaxAgeDuration`. In Proof of Stake chains where validators are bonded, evidence +age should be less than the unbonding period so validators still can be +punished. Given these two propoerties the following initial checks are made. + +1. Has the evidence expired? This is done by taking the height of the `Vote` + within `DuplicateVoteEvidence` or `CommonHeight` within + `LightClientAttakEvidence`. The evidence height is then used to retrieve the + header and thus the time of the block that corresponds to the evidence. If + `CurrentHeight - MaxAgeNumBlocks > EvidenceHeight` && `CurrentTime - + MaxAgeDuration > EvidenceTime`, the evidence is considered expired and + ignored. + +2. Has the evidence already been committed? The evidence pool tracks the hash of + all committed evidence and uses this to determine uniqueness. If a new + evidence has the same hash as a committed one, the new evidence will be + ignored. + +### DuplicateVoteEvidence + +Valid `DuplicateVoteEvidence` must adhere to the following rules: + +- Validator Address, Height, Round and Type must be the same for both votes + +- BlockID must be different for both votes (BlockID can be for a nil block) + +- Validator must have been in the validator set at that height + +- Vote signature must be correctly signed. This also uses `ChainID` so we know + that the fault occurred on this chain + +### LightClientAttackEvidence + +Valid Light Client Attack Evidence must adhere to the following rules: + +- If the header of the light block is invalid, thus indicating a lunatic attack, + the node must check that they can use `verifySkipping` from their header at + the common height to the conflicting header + +- If the header is valid, then the validator sets are the same and this is + either a form of equivocation or amnesia. We therefore check that 2/3 of the + validator set also signed the conflicting header. + +- The nodes own header at the same height as the conflicting header must have a + different hash to the conflicting header. + +- If the nodes latest header is less in height to the conflicting header, then + the node must check that the conflicting block has a time that is less than + this latest header (This is a forward lunatic attack). + +## Gossiping + +If a node verifies evidence it then broadcasts it to all peers, continously sending +the same evidence once every 10 seconds until the evidence is seen on chain or +expires. + +## Commiting on Chain + +Evidence takes strict priority over regular transactions, thus a block is filled +with evidence first and transactions take up the remainder of the space. To +mitigate the threat of an already punished node from spamming the network with +more evidence, the size of the evidence in a block can be capped by +`EvidenceParams.MaxBytes`. Nodes receiving blocks with evidence will validate +the evidence before sending `Prevote` and `Precommit` votes. The evidence pool +will usually cache verifications so that this process is much quicker. + +## Sending Evidence to the Application + +After evidence is committed, the block is then processed by the block executor +which delivers the evidence to the application via `EndBlock`. Evidence is +stripped of the actual proof, split up per faulty validator and only the +validator, height, time and evidence type is sent. + +```proto +enum EvidenceType { + UNKNOWN = 0; + DUPLICATE_VOTE = 1; + LIGHT_CLIENT_ATTACK = 2; +} + +message Evidence { + EvidenceType type = 1; + // The offending validator + Validator validator = 2 [(gogoproto.nullable) = false]; + // The height when the offense occurred + int64 height = 3; + // The corresponding time where the offense occurred + google.protobuf.Timestamp time = 4 [ + (gogoproto.nullable) = false, (gogoproto.stdtime) = true]; + // Total voting power of the validator set in case the ABCI application does + // not store historical validators. + // https://github.com/tendermint/tendermint/issues/4581 + int64 total_voting_power = 5; +} +``` + +`DuplicateVoteEvidence` and `LightClientAttackEvidence` are self-contained in +the sense that the evidence can be used to derive the `abci.Evidence` that is +sent to the application. Because of this, extra fields are necessary: + +```go +type DuplicateVoteEvidence struct { + VoteA *Vote + VoteB *Vote + + // abci specific information + TotalVotingPower int64 + ValidatorPower int64 + Timestamp time.Time +} + +type LightClientAttackEvidence struct { + ConflictingBlock *LightBlock + CommonHeight int64 + + // abci specific information + ByzantineValidators []*Validator + TotalVotingPower int64 + Timestamp time.Time +} +``` + +These ABCI specific fields don't affect validity of the evidence itself but must +be consistent amongst nodes and agreed upon on chain. If evidence with the +incorrect abci information is sent, a node will create new evidence from it and +replace the ABCI fields with the correct information. diff --git a/spec/consensus/light-client/README.md b/spec/consensus/light-client/README.md new file mode 100644 index 000000000..44b9e0c76 --- /dev/null +++ b/spec/consensus/light-client/README.md @@ -0,0 +1,9 @@ +--- +order: 1 +parent: + title: Light Client + order: false +--- +# Tendermint Light Client Protocol + +Deprecated, please see [light-client](../../light-client/README.md). diff --git a/spec/consensus/light-client/accountability.md b/spec/consensus/light-client/accountability.md new file mode 100644 index 000000000..5cf46b0b4 --- /dev/null +++ b/spec/consensus/light-client/accountability.md @@ -0,0 +1,3 @@ +# Fork accountability + +Deprecated, please see [light-client/accountability](../../light-client/accountability.md). diff --git a/spec/consensus/light-client/assets/light-node-image.png b/spec/consensus/light-client/assets/light-node-image.png new file mode 100644 index 000000000..f0b93c6e4 Binary files /dev/null and b/spec/consensus/light-client/assets/light-node-image.png differ diff --git a/spec/consensus/light-client/detection.md b/spec/consensus/light-client/detection.md new file mode 100644 index 000000000..5c87562ba --- /dev/null +++ b/spec/consensus/light-client/detection.md @@ -0,0 +1,3 @@ +# Detection + +Deprecated, please see [light-client/detection](../../light-client/detection.md). diff --git a/spec/consensus/light-client/verification.md b/spec/consensus/light-client/verification.md new file mode 100644 index 000000000..1f0104a40 --- /dev/null +++ b/spec/consensus/light-client/verification.md @@ -0,0 +1,3 @@ +# Core Verification + +Deprecated, please see [light-client/accountability](../../light-client/verification.md). diff --git a/spec/consensus/proposer-based-timestamp/README.md b/spec/consensus/proposer-based-timestamp/README.md new file mode 100644 index 000000000..82421d99d --- /dev/null +++ b/spec/consensus/proposer-based-timestamp/README.md @@ -0,0 +1,20 @@ +# Proposer-Based Timestamps + +This section describes a version of the Tendermint consensus protocol, +which uses proposer-based timestamps. + +## Contents + +- [Proposer-Based Time][main] (entry point) +- [Part I - System Model and Properties][sysmodel] +- [Part II - Protocol Specification][algorithm] +- [TLA+ Specification][proposertla] + + +[algorithm]: ./pbts-algorithm_001_draft.md + +[sysmodel]: ./pbts-sysmodel_001_draft.md + +[main]: ./pbts_001_draft.md + +[proposertla]: ./tla/TendermintPBT_001_draft.tla diff --git a/spec/consensus/proposer-based-timestamp/pbts-algorithm_001_draft.md b/spec/consensus/proposer-based-timestamp/pbts-algorithm_001_draft.md new file mode 100644 index 000000000..9791ff86b --- /dev/null +++ b/spec/consensus/proposer-based-timestamp/pbts-algorithm_001_draft.md @@ -0,0 +1,163 @@ +# Proposer-Based Time - Part II + +## Updated Consensus Algorithm + +### Outline + +The algorithm in the [arXiv paper][arXiv] evaluates rules of the received messages without making explicit how these messages are received. In our solution, we will make some message filtering explicit. We will assume that there are message reception steps (where messages are received and possibly stored locally for later evaluation of rules) and processing steps (the latter roughly as described in a way similar to the pseudo code of the arXiv paper). + +In contrast to the original algorithm the field `proposal` in the `PROPOSE` message is a pair `(v, time)`, of the proposed consensus value `v` and the proposed time `time`. + +#### **[PBTS-RECEPTION-STEP.0]** + +In the reception step at process `p` at local time `now_p`, upon receiving a message `m`: + +- if the message `m` is of type `PROPOSE` and satisfies `now_p - PRECISION < m.time < now_p + PRECISION + MSGDELAY`, then mark the message as `timely` + +> if `m` does not satisfy the constraint consider it `untimely` + + +#### **[PBTS-PROCESSING-STEP.0]** + +In the processing step, based on the messages stored, the rules of the algorithms are +executed. Note that the processing step only operates on messages +for the current height. The consensus algorithm rules are defined by the following updates to arXiv paper. + +#### New `StartRound` + +There are two additions + +- in case the proposer's local time is smaller than the time of the previous block, the proposer waits until this is not the case anymore (to ensure the block time is monotonically increasing) +- the proposer sends its time `now_p` as part of its proposal + +We update the timeout for the `PROPOSE` step according to the following reasoning: + +- If a correct proposer needs to wait to make sure its proposed time is larger than the `blockTime` of the previous block, then it sends by realtime `blockTime + ACCURACY` (By this time, its local clock must exceed `blockTime`) +- the receiver will receive a `PROPOSE` message by `blockTime + ACCURACY + MSGDELAY` +- the receiver's local clock will be `<= blockTime + 2 * ACCURACY + MSGDELAY` +- thus when the receiver `p` enters this round it can set its timeout to a value `waitingTime => blockTime + 2 * ACCURACY + MSGDELAY - now_p` + +So we should set the timeout to `max(timeoutPropose(round_p), waitingTime)`. + +> If, in the future, a block delay parameter `BLOCKDELAY` is introduced, this means +that the proposer should wait for `now_p > blockTime + BLOCKDELAY` before sending a `PROPOSE` message. +Also, `BLOCKDELAY` needs to be added to `waitingTime`. + +#### **[PBTS-ALG-STARTROUND.0]** + +```go +function StartRound(round) { + blockTime ← block time of block h_p - 1 + waitingTime ← blockTime + 2 * ACCURACY + MSGDELAY - now_p + round_p ← round + step_p ← propose + if proposer(h_p, round_p) = p { + wait until now_p > blockTime // new wait condition + if validValue_p != nil { + proposal ← (validValue_p, now_p) // added "now_p" + } + else { + proposal ← (getValue(), now_p) // added "now_p" + } + broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ + } + else { + schedule OnTimeoutPropose(h_p,round_p) to be executed after max(timeoutPropose(round_p), waitingTime) + } +} +``` + +#### New Rule Replacing Lines 22 - 27 + +- a validator prevotes for the consensus value `v` **and** the time `t` +- the code changes as the `PROPOSAL` message carries time (while `lockedValue` does not) + +#### **[PBTS-ALG-UPON-PROP.0]** + +```go +upon timely(⟨PROPOSAL, h_p, round_p, (v,t), −1⟩) from proposer(h_p, round_p) while step_p = propose do { + if valid(v) ∧ (lockedRound_p = −1 ∨ lockedValue_p = v) { + broadcast ⟨PREVOTE, h_p, round_p, id(v,t)⟩ + } + else { + broadcast ⟨PREVOTE, h_p, round_p, nil⟩ + } + step_p ← prevote +} +``` + +#### New Rule Replacing Lines 28 - 33 + +In case consensus is not reached in round 1, in `StartRound` the proposer of future rounds may propose the same value but with a different time. +Thus, the time `tprop` in the `PROPOSAL` message need not match the time `tvote` in the (old) `PREVOTE` messages. +A validator may send `PREVOTE` for the current round as long as the value `v` matches. +This gives the following rule: + +#### **[PBTS-ALG-OLD-PREVOTE.0]** + +```go +upon timely(⟨PROPOSAL, h_p, round_p, (v, tprop), vr⟩) from proposer(h_p, round_p) AND 2f + 1 ⟨PREVOTE, h_p, vr, id((v, tvote)⟩ +while step_p = propose ∧ (vr ≥ 0 ∧ vr < round_p) do { + if valid(v) ∧ (lockedRound_p ≤ vr ∨ lockedValue_p = v) { + broadcast ⟨PREVOTE, h_p, roundp, id(v, tprop)⟩ + } + else { + broadcast ⟨PREVOTE, hp, roundp, nil⟩ + } + step_p ← prevote +} +``` + +#### New Rule Replacing Lines 36 - 43 + +- As above, in the following `(v,t)` is part of the message rather than `v` +- the stored values (i.e., `lockedValue`, `validValue`) do not contain the time + +#### **[PBTS-ALG-NEW-PREVOTE.0]** + +```go +upon timely(⟨PROPOSAL, h_p, round_p, (v,t), ∗⟩) from proposer(h_p, round_p) AND 2f + 1 ⟨PREVOTE, h_p, round_p, id(v,t)⟩ while valid(v) ∧ step_p ≥ prevote for the first time do { + if step_p = prevote { + lockedValue_p ← v + lockedRound_p ← round_p + broadcast ⟨PRECOMMIT, h_p, round_p, id(v,t))⟩ + step_p ← precommit + } + validValue_p ← v + validRound_p ← round_p +} +``` + +#### New Rule Replacing Lines 49 - 54 + +- we decide on `v` as well as on the time from the proposal message +- here we do not care whether the proposal was received timely. + +> In particular we need to take care of the case where the proposer is untimely to one correct validator only. We need to ensure that this validator decides if all decide. + +#### **[PBTS-ALG-DECIDE.0]** + +```go +upon ⟨PROPOSAL, h_p, r, (v,t), ∗⟩ from proposer(h_p, r) AND 2f + 1 ⟨PRECOMMIT, h_p, r, id(v,t)⟩ while decisionp[h_p] = nil do { + if valid(v) { + decision_p [h_p] = (v,t) // decide on time too + h_p ← h_p + 1 + reset lockedRound_p , lockedValue_p, validRound_p and validValue_p to initial values and empty message log + StartRound(0) + } +} +``` + +**All other rules remains unchanged.** + +Back to [main document][main]. + +[main]: ./pbts_001_draft.md + +[arXiv]: https://arxiv.org/abs/1807.04938 + +[tlatender]: https://github.com/tendermint/spec/blob/master/rust-spec/tendermint-accountability/README.md + +[bfttime]: https://github.com/tendermint/spec/blob/439a5bcacb5ef6ef1118566d7b0cd68fff3553d4/spec/consensus/bft-time.md + +[lcspec]: https://github.com/tendermint/spec/blob/439a5bcacb5ef6ef1118566d7b0cd68fff3553d4/rust-spec/lightclient/README.md diff --git a/spec/consensus/proposer-based-timestamp/pbts-sysmodel_001_draft.md b/spec/consensus/proposer-based-timestamp/pbts-sysmodel_001_draft.md new file mode 100644 index 000000000..2eb32bc7d --- /dev/null +++ b/spec/consensus/proposer-based-timestamp/pbts-sysmodel_001_draft.md @@ -0,0 +1,198 @@ +# Proposer-Based Time - Part I + +## System Model + +### Time and Clocks + +#### **[PBTS-CLOCK-NEWTON.0]** + +There is a reference Newtonian real-time `t` (UTC). + +Every correct validator `V` maintains a synchronized clock `C_V` that ensures: + +#### **[PBTS-CLOCK-PRECISION.0]** + +There exists a system parameter `PRECISION` such that for any two correct validators `V` and `W`, and at any real-time `t`, +`|C_V(t) - C_W(t)| < PRECISION` + + +### Message Delays + +We do not want to interfere with the Tendermint timing assumptions. We will postulate a timing restriction, which, if satisfied, ensures that liveness is preserved. + +In general the local clock may drift from the global time. (It may progress faster, e.g., one second of clock time might take 1.005 seconds of real-time). As a result the local clock and the global clock may be measured in different time units. Usually, the message delay is measured in global clock time units. To estimate the correct local timeout precisely, we would need to estimate the clock time duration of a message delay taking into account the clock drift. For simplicity we ignore this, and directly postulate the message delay assumption in terms of local time. + + +#### **[PBTS-MSG-D.0]** + +There exists a system parameter `MSGDELAY` for message end-to-end delays **counted in clock-time**. + +> Observe that [PBTS-MSG-D.0] imposes constraints on message delays as well as on the clock. + +#### **[PBTS-MSG-FAIR.0]** + +The message end-to-end delay between a correct proposer and a correct validator (for `PROPOSE` messages) is less than `MSGDELAY`. + + +## Problem Statement + +In this section we define the properties of Tendermint consensus (cf. the [arXiv paper][arXiv]) in this new system model. + +#### **[PBTS-PROPOSE.0]** + +A proposer proposes a pair `(v,t)` of consensus value `v` and time `t`. + +> We then restrict the allowed decisions along the following lines: + +#### **[PBTS-INV-AGREEMENT.0]** + +[Agreement] No two correct validators decide on different values `v`. + +#### **[PBTS-INV-TIME-VAL.0]** + +[Time-Validity] If a correct validator decides on `t` then `t` is "OK" (we will formalize this below), even if up to `2f` validators are faulty. + +However, the properties of Tendermint consensus are of more interest with respect to the blocks, that is, what is written into a block and when. We therefore, in the following, will give the safety and liveness properties from this block-centric viewpoint. +For this, observe that the time `t` decided at consensus height `k` will be written in the block of height `k+1`, and will be supported by `2f + 1` `PRECOMMIT` messages of the same consensus round `r`. The time written in the block, we will denote by `b.time` (to distinguish it from the term `bfttime` used for median-based time). For this, it is important to have the following consensus algorithm property: + +#### **[PBTS-INV-TIME-AGR.0]** + +[Time-Agreement] If two correct validators decide in the same round, then they decide on the same `t`. + +#### **[PBTS-DECISION-ROUND.0]** + +Note that the relation between consensus decisions, on the one hand, and blocks, on the other hand, is not immediate; in particular if we consider time: In the proposed solution, +as validators may decide in different rounds, they may decide on different times. +The proposer of the next block, may pick a commit (at least `2f + 1` `PRECOMMIT` messages from one round), and thus it picks a decision round that is going to become "canonic". +As a result, the proposer implicitly has a choice of one of the times that belong to rounds in which validators decided. Observe that this choice was implicitly the case already in the median-based `bfttime`. +However, as most consensus instances terminate within one round on the Cosmos hub, this is hardly ever observed in practice. + + + +Finally, observe that the agreement ([Agreement] and [Time-Agreement]) properties are based on the Tendermint security model [TMBC-FM-2THIRDS.0] of more than 2/3 correct validators, while [Time-Validity] is based on more than 1/3 correct validators. + +### SAFETY + +Here we will provide specifications that relate local time to block time. However, since we do not assume (by now) that local time is linked to real-time, these specifications also do not provide a relation between block time and real-time. Such properties are given [later](#REAL-TIME-SAFETY). + +For a correct validator `V`, let `beginConsensus(V,k)` be the local time when it sets its height to `k`, and let `endConsensus(V,k)` be the time when it sets its height to `k + 1`. + +Let + +- `beginConsensus(k)` be the minimum over `beginConsensus(V,k)`, and +- `last-beginConsensus(k)` be the maximum over `beginConsensus(V,k)`, and +- `endConsensus(k)` the maximum over `endConsensus(V,k)` + +for all correct validators `V`. + +> Observe that `beginConsensus(k) <= last-beginConsensus(k)` and if local clocks are monotonic, then `last-beginConsensus(k) <= endConsensus(k)`. + +#### **[PBTS-CLOCK-GROW.0]** + +We assume that during one consensus instance, local clocks are not set back, in particular for each correct validator `V` and each height `k`, we have `beginConsensus(V,k) < endConsensus(V,k)`. + + +#### **[PBTS-CONSENSUS-TIME-VALID.0]** + +If + +- there is a valid commit `c` for height `k`, and +- `c` contains a `PRECOMMIT` message by at least one correct validator, + +then the time `b.time` in the block `b` that is signed by `c` satisfies + +- `beginConsensus(k) - PRECISION <= b.time < endConsensus(k) + PRECISION + MSGDELAY`. + + +> [PBTS-CONSENSUS-TIME-VALID.0] is based on an analysis where the proposer is faulty (and does does not count towards `beginConsensus(k)` and `endConsensus(k)`), and we estimate the times at which correct validators receive and `accept` the `propose` message. If the proposer is correct we obtain + +#### **[PBTS-CONSENSUS-LIVE-VALID-CORR-PROP.0]** + +If the proposer of round 1 is correct, and + +- [TMBC-FM-2THIRDS.0] holds for a block of height `k - 1`, and +- [PBTS-MSG-FAIR.0], and +- [PBTS-CLOCK-PRECISION.0], and +- [PBTS-CLOCK-GROW.0] (**TODO:** is that enough?) + +then eventually (within bounded time) every correct validator decides in round 1. + +#### **[PBTS-CONSENSUS-SAFE-VALID-CORR-PROP.0]** + +If the proposer of round 1 is correct, and + +- [TMBC-FM-2THIRDS.0] holds for a block of height `k - 1`, and +- [PBTS-MSG-FAIR.0], and +- [PBTS-CLOCK-PRECISION.0], and +- [PBTS-CLOCK-GROW.0] (**TODO:** is that enough?) + +then `beginConsensus_k <= b.time <= last-beginConsensus_k`. + + +> For the above two properties we will assume that a correct proposer `v` sends its `PROPOSAL` at its local time `beginConsensus(v,k)`. + +### LIVENESS + +If + +- [TMBC-FM-2THIRDS.0] holds for a block of height `k - 1`, and +- [PBTS-MSG-FAIR.0], +- [PBTS-CLOCK.0], and +- [PBTS-CLOCK-GROW.0] (**TODO:** is that enough?) + +then eventually there is a valid commit `c` for height `k`. + + +### REAL-TIME SAFETY + +> We want to give a property that can be exploited from the outside, that is, given a block with some time stored in it, what is the estimate at which real-time the block was generated. To do so, we need to link clock-time to real-time; which is not the case with [PBTS-CLOCK.0]. For this, we introduce the following assumption on the clocks: + +#### **[PBTS-CLOCKSYNC-EXTERNAL.0]** + +There is a system parameter `ACCURACY`, such that for all real-times `t` and all correct validators `V`, + +- `| C_V(t) - t | < ACCURACY`. + +> `ACCURACY` is not necessarily visible at the code level. The properties below just show that the smaller +its value, the closer the block time will be to real-time + +#### **[PBTS-CONSENSUS-PTIME.0]** + +LET `m` be a propose message. We consider the following two real-times `proposalTime(m)` and `propRecvTime(m)`: + +- if the proposer is correct and sends `m` at time `t`, we write `proposalTime(m)` for real-time `t`. +- if first correct validator receives `m` at time `t`, we write `propRecvTime(m)` for real-time `t`. + + +#### **[PBTS-CONSENSUS-REALTIME-VALID.0]** + +Let `b` be a block with a valid commit that contains at least one `precommit` message by a correct validator (and `proposalTime` is the time for the height/round `propose` message `m` that triggered the `precommit`). Then: + +`propRecvTime(m) - ACCURACY - PRECISION < b.time < propRecvTime(m) + ACCURACY + PRECISION + MSGDELAY` + + +#### **[PBTS-CONSENSUS-REALTIME-VALID-CORR.0]** + +Let `b` be a block with a valid commit that contains at least one `precommit` message by a correct validator (and `proposalTime` is the time for the height/round `propose` message `m` that triggered the `precommit`). Then, if the proposer is correct: + +`proposalTime(m) - ACCURACY < b.time < proposalTime(m) + ACCURACY` + +> by the algorithm at time `proposalTime(m)` the proposer fixes `m.time <- now_p(proposalTime(m))` + +> "triggered the `PRECOMMIT`" implies that the data in `m` and `b` are "matching", that is, `m` proposed the values that are actually stored in `b`. + +Back to [main document][main]. + +[main]: ./pbts_001_draft.md + +[arXiv]: https://arxiv.org/abs/1807.04938 + +[tlatender]: https://github.com/tendermint/spec/blob/master/rust-spec/tendermint-accountability/README.md + +[bfttime]: https://github.com/tendermint/spec/blob/439a5bcacb5ef6ef1118566d7b0cd68fff3553d4/spec/consensus/bft-time.md + +[lcspec]: https://github.com/tendermint/spec/blob/439a5bcacb5ef6ef1118566d7b0cd68fff3553d4/rust-spec/lightclient/README.md + +[algorithm]: ./pbts-algorithm_001_draft.md + +[sysmodel]: ./pbts-sysmodel_001_draft.md diff --git a/spec/consensus/proposer-based-timestamp/pbts_001_draft.md b/spec/consensus/proposer-based-timestamp/pbts_001_draft.md new file mode 100644 index 000000000..a4d876b28 --- /dev/null +++ b/spec/consensus/proposer-based-timestamp/pbts_001_draft.md @@ -0,0 +1,270 @@ +# Proposer-Based Time + +## Current BFTTime + +### Description + +In Tendermint consensus, the first version of how time is computed and stored in a block works as follows: + +- validators send their current local time as part of `precommit` messages +- upon collecting the `precommit` messages that the proposer uses to build a commit to be put in the next block, the proposer computes the `time` of the next block as the median (weighted over voting power) of the times in the `precommit` messages. + +### Analysis + +1. **Fault tolerance.** The computed median time is called [`bfttime`][bfttime] as it is indeed fault-tolerant: if **less than a third** of the validators is faulty (counted in voting power), it is guaranteed that the computed time lies between the minimum and the maximum times sent by correct validators. +1. **Effect of faulty validators.** If more than `1/2` of the voting power (which is in fact more than one third and less than two thirds of the voting power) is held by faulty validators, then the time is under total control of the faulty validators. (This is particularly challenging in the context of [lightclient][lcspec] security.) +1. **Proposer influence on block time.** The proposer of the next block has a degree of freedom in choosing the `bfttime`, since it computes the median time based on the timestamps from `precommit` messages sent by + `2f + 1` correct validators. + 1. If there are `n` different timestamps in the `precommit` messages, the proposer can use any subset of timestamps that add up to `2f + 1` + of the voting power in order to compute the median. + 1. If the validators decide in different rounds, the proposer can decide on which round the median computation is based. +1. **Liveness.** The liveness of the protocol: + 1. does not depend on clock synchronization, + 1. depends on bounded message delays. +1. **Relation to real time.** There is no clock synchronizaton, which implies that there is **no relation** between the computed block `time` and real time. +1. **Aggregate signatures.** As the `precommit` messages contain the local times, all these `precommit` messages typically differ in the time field, which **prevents** the use of aggregate signatures. + +## Suggested Proposer-Based Time + +### Outline + +An alternative approach to time has been discussed: Rather than having the validators send the time in the `precommit` messages, the proposer in the consensus algorithm sends its time in the `propose` message, and the validators locally check whether the time is OK (by comparing to their local clock). + +This proposed solution adds the requirement of having synchronized clocks, and other implicit assumptions. + +### Comparison of the Suggested Method to the Old One + +1. **Fault tolerance.** Maintained in the suggested protocol. +1. **Effect of faulty validators.** Eliminated in the suggested protocol, + that is, the block `time` can be corrupted only in the extreme case when + `>2/3` of the validators are faulty. +1. **Proposer influence on block time.** The proposer of the next block + has less freedom when choosing the block time. + 1. This scenario is eliminated in the suggested protocol, provided that there are `<1/3` faulty validators. + 1. This scenario is still there. +1. **Liveness.** The liveness of the suggested protocol: + 1. depends on the introduced assumptions on synchronized clocks (see below), + 1. still depends on the message delays (unavoidable). +1. **Relation to real time.** We formalize clock synchronization, and obtain a **well-defined relation** between the block `time` and real time. +1. **Aggregate signatures.** The `precommit` messages free of time, which **allows** for aggregate signatures. + +### Protocol Overview + +#### Proposed Time + +We assume that the field `proposal` in the `PROPOSE` message is a pair `(v, time)`, of the proposed consensus value `v` and the proposed time `time`. + +#### Reception Step + +In the reception step at node `p` at local time `now_p`, upon receiving a message `m`: + +- **if** the message `m` is of type `PROPOSE` and satisfies `now_p - PRECISION < m.time < now_p + PRECISION + MSGDELAY`, then mark the message as `timely`. +(`PRECISION` and `MSGDELAY` being system parameters, see [below](#safety-and-liveness)) + +> after the presentation in the dev session, we realized that different semantics for the reception step is closer aligned to the implementation. Instead of dropping propose messages, we keep all of them, and mark timely ones. + +#### Processing Step + +- Start round + + + + + + + + + + + + +
arXiv paperProposer-based time
+ +```go +function StartRound(round) { + round_p ← round + step_p ← propose + if proposer(h_p, round_p) = p { + + + if validValue_p != nil { + + proposal ← validValue_p + } else { + + proposal ← getValue() + } + broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ + } else { + schedule OnTimeoutPropose(h_p,round_p) to + be executed after timeoutPropose(round_p) + } +} +``` + + + +```go +function StartRound(round) { + round_p ← round + step_p ← propose + if proposer(h_p, round_p) = p { + // new wait condition + wait until now_p > block time of block h_p - 1 + if validValue_p != nil { + // add "now_p" + proposal ← (validValue_p, now_p) + } else { + // add "now_p" + proposal ← (getValue(), now_p) + } + broadcast ⟨PROPOSAL, h_p, round_p, proposal, validRound_p⟩ + } else { + schedule OnTimeoutPropose(h_p,round_p) to + be executed after timeoutPropose(round_p) + } +} +``` + +
+ +- Rule on lines 28-35 + + + + + + + + + + + + +
arXiv paperProposer-based time
+ +```go +upon timely(⟨PROPOSAL, h_p, round_p, v, vr⟩) + from proposer(h_p, round_p) + AND 2f + 1 ⟨PREVOTE, h_p, vr, id(v)⟩ +while step_p = propose ∧ (vr ≥ 0 ∧ vr < round_p) do { + if valid(v) ∧ (lockedRound_p ≤ vr ∨ lockedValue_p = v) { + + broadcast ⟨PREVOTE, h_p, round_p, id(v)⟩ + } else { + broadcast ⟨PREVOTE, hp, round_p, nil⟩ + } +} +``` + + + +```go +upon timely(⟨PROPOSAL, h_p, round_p, (v, tprop), vr⟩) + from proposer(h_p, round_p) + AND 2f + 1 ⟨PREVOTE, h_p, vr, id(v, tvote)⟩ + while step_p = propose ∧ (vr ≥ 0 ∧ vr < round_p) do { + if valid(v) ∧ (lockedRound_p ≤ vr ∨ lockedValue_p = v) { + // send hash of v and tprop in PREVOTE message + broadcast ⟨PREVOTE, h_p, round_p, id(v, tprop)⟩ + } else { + broadcast ⟨PREVOTE, hp, round_p, nil⟩ + } + } +``` + +
+ +- Rule on lines 49-54 + + + + + + + + + + + + +
arXiv paperProposer-based time
+ +```go +upon ⟨PROPOSAL, h_p, r, v, ∗⟩ from proposer(h_p, r) + AND 2f + 1 ⟨PRECOMMIT, h_p, r, id(v)⟩ + while decisionp[h_p] = nil do { + if valid(v) { + + decision_p [h_p] = v + h_p ← h_p + 1 + reset lockedRound_p , lockedValue_p, validRound_p and + validValue_p to initial values and empty message log + StartRound(0) + } + } +``` + + + +```go +upon ⟨PROPOSAL, h_p, r, (v,t), ∗⟩ from proposer(h_p, r) + AND 2f + 1 ⟨PRECOMMIT, h_p, r, id(v,t)⟩ + while decisionp[h_p] = nil do { + if valid(v) { + // decide on time too + decision_p [h_p] = (v,t) + h_p ← h_p + 1 + reset lockedRound_p , lockedValue_p, validRound_p and + validValue_p to initial values and empty message log + StartRound(0) + } + } +``` + +
+ +- Other rules are extended in a similar way, or remain unchanged + +### Property Overview + +#### Safety and Liveness + +For safety (Point 1, Point 2, Point 3i) and liveness (Point 4) we need +the following assumptions: + +- There exists a system parameter `PRECISION` such that for any two correct validators `V` and `W`, and at any real-time `t`, their local times `C_V(t)` and `C_W(t)` differ by less than `PRECISION` time units, +i.e., `|C_V(t) - C_W(t)| < PRECISION` +- The message end-to-end delay between a correct proposer and a correct validator (for `PROPOSE` messages) is less than `MSGDELAY`. + +#### Relation to Real-Time + +For analyzing real-time safety (Point 5), we use a system parameter `ACCURACY`, such that for all real-times `t` and all correct validators `V`, we have `| C_V(t) - t | < ACCURACY`. + +> `ACCURACY` is not necessarily visible at the code level. We might even view `ACCURACY` as variable over time. The smaller it is during a consensus instance, the closer the block time will be to real-time. +> +> Note that `PRECISION` and `MSGDELAY` show up in the code. + +### Detailed Specification + +This specification describes the changes needed to be done to the Tendermint consensus algorithm as described in the [arXiv paper][arXiv] and the simplified specification in [TLA+][tlatender], and makes precise the underlying assumptions and the required properties. + +- [Part I - System Model and Properties][sysmodel] +- [Part II - Protocol specification][algorithm] +- [TLA+ Specification][proposertla] + +[arXiv]: https://arxiv.org/abs/1807.04938 + +[tlatender]: https://github.com/tendermint/spec/blob/master/rust-spec/tendermint-accountability/README.md + +[bfttime]: https://github.com/tendermint/spec/blob/439a5bcacb5ef6ef1118566d7b0cd68fff3553d4/spec/consensus/bft-time.md + +[lcspec]: https://github.com/tendermint/spec/blob/439a5bcacb5ef6ef1118566d7b0cd68fff3553d4/rust-spec/lightclient/README.md + +[algorithm]: ./pbts-algorithm_001_draft.md + +[sysmodel]: ./pbts-sysmodel_001_draft.md + +[main]: ./pbts_001_draft.md + +[proposertla]: ./tla/TendermintPBT_001_draft.tla diff --git a/spec/consensus/proposer-based-timestamp/tla/TendermintPBT_001_draft.tla b/spec/consensus/proposer-based-timestamp/tla/TendermintPBT_001_draft.tla new file mode 100644 index 000000000..2bcdecb27 --- /dev/null +++ b/spec/consensus/proposer-based-timestamp/tla/TendermintPBT_001_draft.tla @@ -0,0 +1,597 @@ +-------------------- MODULE TendermintPBT_001_draft --------------------------- +(* + A TLA+ specification of a simplified Tendermint consensus, with added clocks + and proposer-based timestamps. This TLA+ specification extends and modifies + the Tendermint TLA+ specification for fork accountability: + https://github.com/tendermint/spec/blob/master/spec/light-client/accountability/TendermintAcc_004_draft.tla + + * Version 1. A preliminary specification. + + Zarko Milosevic, Igor Konnov, Informal Systems, 2019-2020. + Ilina Stoilkovska, Josef Widder, Informal Systems, 2021. + *) + +EXTENDS Integers, FiniteSets + +(********************* PROTOCOL PARAMETERS **********************************) +CONSTANTS + Corr, \* the set of correct processes + Faulty, \* the set of Byzantine processes, may be empty + N, \* the total number of processes: correct, defective, and Byzantine + T, \* an upper bound on the number of Byzantine processes + ValidValues, \* the set of valid values, proposed both by correct and faulty + InvalidValues, \* the set of invalid values, never proposed by the correct ones + MaxRound, \* the maximal round number + MaxTimestamp, \* the maximal value of the clock tick + Delay, \* message delay + Precision, \* clock precision: the maximal difference between two local clocks + Accuracy, \* clock accuracy: the maximal difference between a local clock and the real time + Proposer, \* the proposer function from 0..NRounds to 1..N + ClockDrift \* is there clock drift between the local clocks and the global clock + +ASSUME(N = Cardinality(Corr \union Faulty)) + +(*************************** DEFINITIONS ************************************) +AllProcs == Corr \union Faulty \* the set of all processes +Rounds == 0..MaxRound \* the set of potential rounds +Timestamps == 0..MaxTimestamp \* the set of clock ticks +NilRound == -1 \* a special value to denote a nil round, outside of Rounds +NilTimestamp == -1 \* a special value to denote a nil timestamp, outside of Ticks +RoundsOrNil == Rounds \union {NilRound} +Values == ValidValues \union InvalidValues \* the set of all values +NilValue == "None" \* a special value for a nil round, outside of Values +Proposals == Values \X Timestamps +NilProposal == <> +ValuesOrNil == Values \union {NilValue} +Decisions == Values \X Timestamps \X Rounds +NilDecision == <> + + +\* a value hash is modeled as identity +Id(v) == v + +\* The validity predicate +IsValid(v) == v \in ValidValues + +\* the two thresholds that are used in the algorithm +THRESHOLD1 == T + 1 \* at least one process is not faulty +THRESHOLD2 == 2 * T + 1 \* a quorum when having N > 3 * T + +Min(S) == CHOOSE x \in S : \A y \in S : x <= y + +Max(S) == CHOOSE x \in S : \A y \in S : y <= x + +(********************* TYPE ANNOTATIONS FOR APALACHE **************************) +\* the operator for type annotations +a <: b == a + +\* the type of message records +MT == [type |-> STRING, src |-> STRING, round |-> Int, + proposal |-> <>, validRound |-> Int, id |-> <>] + +RP == <> + +\* a type annotation for a message +AsMsg(m) == m <: MT +\* a type annotation for a set of messages +SetOfMsgs(S) == S <: {MT} +\* a type annotation for an empty set of messages +EmptyMsgSet == SetOfMsgs({}) + +SetOfRcvProp(S) == S <: {RP} +EmptyRcvProp == SetOfRcvProp({}) + +SetOfProc(S) == S <: {STRING} +EmptyProcSet == SetOfProc({}) + +(********************* PROTOCOL STATE VARIABLES ******************************) +VARIABLES + round, \* a process round number: Corr -> Rounds + localClock, \* a process local clock: Corr -> Ticks + realTime, \* a reference Newtonian real time + step, \* a process step: Corr -> { "PROPOSE", "PREVOTE", "PRECOMMIT", "DECIDED" } + decision, \* process decision: Corr -> ValuesOrNil + lockedValue, \* a locked value: Corr -> ValuesOrNil + lockedRound, \* a locked round: Corr -> RoundsOrNil + validValue, \* a valid value: Corr -> ValuesOrNil + validRound \* a valid round: Corr -> RoundsOrNil + +\* book-keeping variables +VARIABLES + msgsPropose, \* PROPOSE messages broadcast in the system, Rounds -> Messages + msgsPrevote, \* PREVOTE messages broadcast in the system, Rounds -> Messages + msgsPrecommit, \* PRECOMMIT messages broadcast in the system, Rounds -> Messages + receivedTimelyProposal, \* used to keep track when a process receives a timely PROPOSAL message, {<>} + inspectedProposal, \* used to keep track when a process tries to receive a message, [Rounds -> <>] + evidence, \* the messages that were used by the correct processes to make transitions + action, \* we use this variable to see which action was taken + beginConsensus, \* the minimum of the local clocks in the initial state, Int + endConsensus, \* the local time when a decision is made, [Corr -> Int] + lastBeginConsensus, \* the maximum of the local clocks in the initial state, Int + proposalTime, \* the real time when a proposer proposes in a round, [Rounds -> Int] + proposalReceivedTime \* the real time when a correct process first receives a proposal message in a round, [Rounds -> Int] + +(* to see a type invariant, check TendermintAccInv3 *) + +\* a handy definition used in UNCHANGED +vars == <> + +(********************* PROTOCOL INITIALIZATION ******************************) +FaultyProposals(r) == + SetOfMsgs([type: {"PROPOSAL"}, src: Faulty, + round: {r}, proposal: Proposals, validRound: RoundsOrNil]) + +AllFaultyProposals == + SetOfMsgs([type: {"PROPOSAL"}, src: Faulty, + round: Rounds, proposal: Proposals, validRound: RoundsOrNil]) + +FaultyPrevotes(r) == + SetOfMsgs([type: {"PREVOTE"}, src: Faulty, round: {r}, id: Proposals]) + +AllFaultyPrevotes == + SetOfMsgs([type: {"PREVOTE"}, src: Faulty, round: Rounds, id: Proposals]) + +FaultyPrecommits(r) == + SetOfMsgs([type: {"PRECOMMIT"}, src: Faulty, round: {r}, id: Proposals]) + +AllFaultyPrecommits == + SetOfMsgs([type: {"PRECOMMIT"}, src: Faulty, round: Rounds, id: Proposals]) + +AllProposals == + SetOfMsgs([type: {"PROPOSAL"}, src: AllProcs, + round: Rounds, proposal: Proposals, validRound: RoundsOrNil]) + +RoundProposals(r) == + SetOfMsgs([type: {"PROPOSAL"}, src: AllProcs, + round: {r}, proposal: Proposals, validRound: RoundsOrNil]) + +BenignRoundsInMessages(msgfun) == + \* the message function never contains a message for a wrong round + \A r \in Rounds: + \A m \in msgfun[r]: + r = m.round + +\* The initial states of the protocol. Some faults can be in the system already. +Init == + /\ round = [p \in Corr |-> 0] + /\ \/ /\ ~ClockDrift + /\ localClock \in [Corr -> 0..Accuracy] + \/ /\ ClockDrift + /\ localClock = [p \in Corr |-> 0] + /\ realTime = 0 + /\ step = [p \in Corr |-> "PROPOSE"] + /\ decision = [p \in Corr |-> NilDecision] + /\ lockedValue = [p \in Corr |-> NilValue] + /\ lockedRound = [p \in Corr |-> NilRound] + /\ validValue = [p \in Corr |-> NilValue] + /\ validRound = [p \in Corr |-> NilRound] + /\ msgsPropose \in [Rounds -> SUBSET AllFaultyProposals] + /\ msgsPrevote \in [Rounds -> SUBSET AllFaultyPrevotes] + /\ msgsPrecommit \in [Rounds -> SUBSET AllFaultyPrecommits] + /\ receivedTimelyProposal = EmptyRcvProp + /\ inspectedProposal = [r \in Rounds |-> EmptyProcSet] + /\ BenignRoundsInMessages(msgsPropose) + /\ BenignRoundsInMessages(msgsPrevote) + /\ BenignRoundsInMessages(msgsPrecommit) + /\ evidence = EmptyMsgSet + /\ action' = "Init" + /\ beginConsensus = Min({localClock[p] : p \in Corr}) + /\ endConsensus = [p \in Corr |-> NilTimestamp] + /\ lastBeginConsensus = Max({localClock[p] : p \in Corr}) + /\ proposalTime = [r \in Rounds |-> NilTimestamp] + /\ proposalReceivedTime = [r \in Rounds |-> NilTimestamp] + +(************************ MESSAGE PASSING ********************************) +BroadcastProposal(pSrc, pRound, pProposal, pValidRound) == + LET newMsg == + AsMsg([type |-> "PROPOSAL", src |-> pSrc, round |-> pRound, + proposal |-> pProposal, validRound |-> pValidRound]) + IN + msgsPropose' = [msgsPropose EXCEPT ![pRound] = msgsPropose[pRound] \union {newMsg}] + +BroadcastPrevote(pSrc, pRound, pId) == + LET newMsg == AsMsg([type |-> "PREVOTE", + src |-> pSrc, round |-> pRound, id |-> pId]) + IN + msgsPrevote' = [msgsPrevote EXCEPT ![pRound] = msgsPrevote[pRound] \union {newMsg}] + +BroadcastPrecommit(pSrc, pRound, pId) == + LET newMsg == AsMsg([type |-> "PRECOMMIT", + src |-> pSrc, round |-> pRound, id |-> pId]) + IN + msgsPrecommit' = [msgsPrecommit EXCEPT ![pRound] = msgsPrecommit[pRound] \union {newMsg}] + + +(***************************** TIME **************************************) + +\* [PBTS-CLOCK-PRECISION.0] +SynchronizedLocalClocks == + \A p \in Corr : \A q \in Corr : + p /= q => + \/ /\ localClock[p] >= localClock[q] + /\ localClock[p] - localClock[q] < Precision + \/ /\ localClock[p] < localClock[q] + /\ localClock[q] - localClock[p] < Precision + +\* [PBTS-PROPOSE.0] +Proposal(v, t) == + <> + +\* [PBTS-DECISION-ROUND.0] +Decision(v, t, r) == + <> + +(**************** MESSAGE PROCESSING TRANSITIONS *************************) +\* lines 12-13 +StartRound(p, r) == + /\ step[p] /= "DECIDED" \* a decided process does not participate in consensus + /\ round' = [round EXCEPT ![p] = r] + /\ step' = [step EXCEPT ![p] = "PROPOSE"] + +\* lines 14-19, a proposal may be sent later +InsertProposal(p) == + LET r == round[p] IN + /\ p = Proposer[r] + /\ step[p] = "PROPOSE" + \* if the proposer is sending a proposal, then there are no other proposals + \* by the correct processes for the same round + /\ \A m \in msgsPropose[r]: m.src /= p + /\ \E v \in ValidValues: + LET proposal == IF validValue[p] /= NilValue + THEN Proposal(validValue[p], localClock[p]) + ELSE Proposal(v, localClock[p]) IN + + /\ BroadcastProposal(p, round[p], proposal, validRound[p]) + /\ proposalTime' = [proposalTime EXCEPT ![r] = realTime] + /\ UNCHANGED <> + /\ action' = "InsertProposal" + +\* a new action used to filter messages that are not on time +\* [PBTS-RECEPTION-STEP.0] +ReceiveProposal(p) == + \E v \in Values, t \in Timestamps: + /\ LET r == round[p] IN + LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> Proposal(v, t), validRound |-> NilRound]) IN + /\ msg \in msgsPropose[round[p]] + /\ p \notin inspectedProposal[r] + /\ <> \notin receivedTimelyProposal + /\ inspectedProposal' = [inspectedProposal EXCEPT ![r] = @ \union {p}] + /\ \/ /\ localClock[p] - Precision < t + /\ t < localClock[p] + Precision + Delay + /\ receivedTimelyProposal' = receivedTimelyProposal \union {<>} + /\ \/ /\ proposalReceivedTime[r] = NilTimestamp + /\ proposalReceivedTime' = [proposalReceivedTime EXCEPT ![r] = realTime] + \/ /\ proposalReceivedTime[r] /= NilTimestamp + /\ UNCHANGED proposalReceivedTime + \/ /\ \/ localClock[p] - Precision >= t + \/ t >= localClock[p] + Precision + Delay + /\ UNCHANGED <> + /\ UNCHANGED <> + /\ action' = "ReceiveProposal" + +\* lines 22-27 +UponProposalInPropose(p) == + \E v \in Values, t \in Timestamps: + /\ step[p] = "PROPOSE" (* line 22 *) + /\ LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> Proposal(v, t), validRound |-> NilRound]) IN + /\ <> \in receivedTimelyProposal \* updated line 22 + /\ evidence' = {msg} \union evidence + /\ LET mid == (* line 23 *) + IF IsValid(v) /\ (lockedRound[p] = NilRound \/ lockedValue[p] = v) + THEN Id(Proposal(v, t)) + ELSE NilProposal + IN + BroadcastPrevote(p, round[p], mid) \* lines 24-26 + /\ step' = [step EXCEPT ![p] = "PREVOTE"] + /\ UNCHANGED <> + /\ action' = "UponProposalInPropose" + +\* lines 28-33 +\* [PBTS-ALG-OLD-PREVOTE.0] +UponProposalInProposeAndPrevote(p) == + \E v \in Values, t1 \in Timestamps, t2 \in Timestamps, vr \in Rounds: + /\ step[p] = "PROPOSE" /\ 0 <= vr /\ vr < round[p] \* line 28, the while part + /\ LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> Proposal(v, t1), validRound |-> vr]) + IN + /\ <> \in receivedTimelyProposal \* updated line 28 + /\ LET PV == { m \in msgsPrevote[vr]: m.id = Id(Proposal(v, t2)) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 28 + /\ evidence' = PV \union {msg} \union evidence + /\ LET mid == (* line 29 *) + IF IsValid(v) /\ (lockedRound[p] <= vr \/ lockedValue[p] = v) + THEN Id(Proposal(v, t1)) + ELSE NilProposal + IN + BroadcastPrevote(p, round[p], mid) \* lines 24-26 + /\ step' = [step EXCEPT ![p] = "PREVOTE"] + /\ UNCHANGED <> + /\ action' = "UponProposalInProposeAndPrevote" + + \* lines 34-35 + lines 61-64 (onTimeoutPrevote) +UponQuorumOfPrevotesAny(p) == + /\ step[p] = "PREVOTE" \* line 34 and 61 + /\ \E MyEvidence \in SUBSET msgsPrevote[round[p]]: + \* find the unique voters in the evidence + LET Voters == { m.src: m \in MyEvidence } IN + \* compare the number of the unique voters against the threshold + /\ Cardinality(Voters) >= THRESHOLD2 \* line 34 + /\ evidence' = MyEvidence \union evidence + /\ BroadcastPrecommit(p, round[p], NilProposal) + /\ step' = [step EXCEPT ![p] = "PRECOMMIT"] + /\ UNCHANGED <> + /\ action' = "UponQuorumOfPrevotesAny" + +\* lines 36-46 +\* [PBTS-ALG-NEW-PREVOTE.0] +UponProposalInPrevoteOrCommitAndPrevote(p) == + \E v \in ValidValues, t \in Timestamps, vr \in RoundsOrNil: + /\ step[p] \in {"PREVOTE", "PRECOMMIT"} \* line 36 + /\ LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> Proposal(v, t), validRound |-> vr]) IN + /\ <> \in receivedTimelyProposal \* updated line 36 + /\ LET PV == { m \in msgsPrevote[round[p]]: m.id = Id(Proposal(v, t)) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 36 + /\ evidence' = PV \union {msg} \union evidence + /\ IF step[p] = "PREVOTE" + THEN \* lines 38-41: + /\ lockedValue' = [lockedValue EXCEPT ![p] = v] + /\ lockedRound' = [lockedRound EXCEPT ![p] = round[p]] + /\ BroadcastPrecommit(p, round[p], Id(Proposal(v, t))) + /\ step' = [step EXCEPT ![p] = "PRECOMMIT"] + ELSE + UNCHANGED <> + \* lines 42-43 + /\ validValue' = [validValue EXCEPT ![p] = v] + /\ validRound' = [validRound EXCEPT ![p] = round[p]] + /\ UNCHANGED <> + /\ action' = "UponProposalInPrevoteOrCommitAndPrevote" + +\* lines 47-48 + 65-67 (onTimeoutPrecommit) +UponQuorumOfPrecommitsAny(p) == + /\ \E MyEvidence \in SUBSET msgsPrecommit[round[p]]: + \* find the unique committers in the evidence + LET Committers == { m.src: m \in MyEvidence } IN + \* compare the number of the unique committers against the threshold + /\ Cardinality(Committers) >= THRESHOLD2 \* line 47 + /\ evidence' = MyEvidence \union evidence + /\ round[p] + 1 \in Rounds + /\ StartRound(p, round[p] + 1) + /\ UNCHANGED <> + /\ action' = "UponQuorumOfPrecommitsAny" + +\* lines 49-54 +\* [PBTS-ALG-DECIDE.0] +UponProposalInPrecommitNoDecision(p) == + /\ decision[p] = NilDecision \* line 49 + /\ \E v \in ValidValues, t \in Timestamps (* line 50*) , r \in Rounds, vr \in RoundsOrNil: + /\ LET msg == AsMsg([type |-> "PROPOSAL", src |-> Proposer[r], + round |-> r, proposal |-> Proposal(v, t), validRound |-> vr]) IN + /\ msg \in msgsPropose[r] \* line 49 + /\ p \in inspectedProposal[r] + /\ LET PV == { m \in msgsPrecommit[r]: m.id = Id(Proposal(v, t)) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 49 + /\ evidence' = PV \union {msg} \union evidence + /\ decision' = [decision EXCEPT ![p] = Decision(v, t, round[p])] \* update the decision, line 51 + \* The original algorithm does not have 'DECIDED', but it increments the height. + \* We introduced 'DECIDED' here to prevent the process from changing its decision. + /\ endConsensus' = [endConsensus EXCEPT ![p] = localClock[p]] + /\ step' = [step EXCEPT ![p] = "DECIDED"] + /\ UNCHANGED <> + /\ action' = "UponProposalInPrecommitNoDecision" + +\* the actions below are not essential for safety, but added for completeness + +\* lines 20-21 + 57-60 +OnTimeoutPropose(p) == + /\ step[p] = "PROPOSE" + /\ p /= Proposer[round[p]] + /\ BroadcastPrevote(p, round[p], NilProposal) + /\ step' = [step EXCEPT ![p] = "PREVOTE"] + /\ UNCHANGED <> + /\ action' = "OnTimeoutPropose" + +\* lines 44-46 +OnQuorumOfNilPrevotes(p) == + /\ step[p] = "PREVOTE" + /\ LET PV == { m \in msgsPrevote[round[p]]: m.id = Id(NilProposal) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 36 + /\ evidence' = PV \union evidence + /\ BroadcastPrecommit(p, round[p], Id(NilProposal)) + /\ step' = [step EXCEPT ![p] = "PRECOMMIT"] + /\ UNCHANGED <> + /\ action' = "OnQuorumOfNilPrevotes" + +\* lines 55-56 +OnRoundCatchup(p) == + \E r \in {rr \in Rounds: rr > round[p]}: + LET RoundMsgs == msgsPropose[r] \union msgsPrevote[r] \union msgsPrecommit[r] IN + \E MyEvidence \in SUBSET RoundMsgs: + LET Faster == { m.src: m \in MyEvidence } IN + /\ Cardinality(Faster) >= THRESHOLD1 + /\ evidence' = MyEvidence \union evidence + /\ StartRound(p, r) + /\ UNCHANGED <> + /\ action' = "OnRoundCatchup" + + +(********************* PROTOCOL TRANSITIONS ******************************) +\* advance the global clock +AdvanceRealTime == + /\ realTime < MaxTimestamp + /\ realTime' = realTime + 1 + /\ \/ /\ ~ClockDrift + /\ localClock' = [p \in Corr |-> localClock[p] + 1] + \/ /\ ClockDrift + /\ UNCHANGED localClock + /\ UNCHANGED <> + /\ action' = "AdvanceRealTime" + +\* advance the local clock of node p +AdvanceLocalClock(p) == + /\ localClock[p] < MaxTimestamp + /\ localClock' = [localClock EXCEPT ![p] = @ + 1] + /\ UNCHANGED <> + /\ action' = "AdvanceLocalClock" + +\* process timely messages +MessageProcessing(p) == + \* start round + \/ InsertProposal(p) + \* reception step + \/ ReceiveProposal(p) + \* processing step + \/ UponProposalInPropose(p) + \/ UponProposalInProposeAndPrevote(p) + \/ UponQuorumOfPrevotesAny(p) + \/ UponProposalInPrevoteOrCommitAndPrevote(p) + \/ UponQuorumOfPrecommitsAny(p) + \/ UponProposalInPrecommitNoDecision(p) + \* the actions below are not essential for safety, but added for completeness + \/ OnTimeoutPropose(p) + \/ OnQuorumOfNilPrevotes(p) + \/ OnRoundCatchup(p) + +(* + * A system transition. In this specificatiom, the system may eventually deadlock, + * e.g., when all processes decide. This is expected behavior, as we focus on safety. + *) +Next == + \/ AdvanceRealTime + \/ /\ ClockDrift + /\ \E p \in Corr: AdvanceLocalClock(p) + \/ /\ SynchronizedLocalClocks + /\ \E p \in Corr: MessageProcessing(p) + +----------------------------------------------------------------------------- + +(*************************** INVARIANTS *************************************) + +\* [PBTS-INV-AGREEMENT.0] +AgreementOnValue == + \A p, q \in Corr: + /\ decision[p] /= NilDecision + /\ decision[q] /= NilDecision + => \E v \in ValidValues, t1 \in Timestamps, t2 \in Timestamps, r1 \in Rounds, r2 \in Rounds : + /\ decision[p] = Decision(v, t1, r1) + /\ decision[q] = Decision(v, t2, r2) + +\* [PBTS-INV-TIME-AGR.0] +AgreementOnTime == + \A p, q \in Corr: + \A v1 \in ValidValues, v2 \in ValidValues, t1 \in Timestamps, t2 \in Timestamps, r \in Rounds : + /\ decision[p] = Decision(v1, t1, r) + /\ decision[q] = Decision(v2, t2, r) + => t1 = t2 + +\* [PBTS-CONSENSUS-TIME-VALID.0] +ConsensusTimeValid == + \A p \in Corr, t \in Timestamps : + \* if a process decides on v and t + (\E v \in ValidValues, r \in Rounds : decision[p] = Decision(v, t, r)) + \* then + => /\ beginConsensus - Precision <= t + /\ t < endConsensus[p] + Precision + Delay + +\* [PBTS-CONSENSUS-SAFE-VALID-CORR-PROP.0] +ConsensusSafeValidCorrProp == + \A v \in ValidValues, t \in Timestamps : + \* if the proposer in the first round is correct + (/\ Proposer[0] \in Corr + \* and there exists a process that decided on v, t + /\ \E p \in Corr, r \in Rounds : decision[p] = Decision(v, t, r)) + \* then t is between the minimal and maximal initial local time + => /\ beginConsensus <= t + /\ t <= lastBeginConsensus + +\* [PBTS-CONSENSUS-REALTIME-VALID-CORR.0] +ConsensusRealTimeValidCorr == + \A t \in Timestamps, r \in Rounds : + (/\ \E p \in Corr, v \in ValidValues : decision[p] = Decision(v, t, r) + /\ proposalTime[r] /= NilTimestamp) + => /\ proposalTime[r] - Accuracy < t + /\ t < proposalTime[r] + Accuracy + +\* [PBTS-CONSENSUS-REALTIME-VALID.0] +ConsensusRealTimeValid == + \A t \in Timestamps, r \in Rounds : + (\E p \in Corr, v \in ValidValues : decision[p] = Decision(v, t, r)) + => /\ proposalReceivedTime[r] - Accuracy - Precision < t + /\ t < proposalReceivedTime[r] + Accuracy + Precision + Delay + +\* [PBTS-MSG-FAIR.0] +BoundedDelay == + \A r \in Rounds : + (/\ proposalTime[r] /= NilTimestamp + /\ proposalTime[r] + Delay < realTime) + => inspectedProposal[r] = Corr + +\* [PBTS-CONSENSUS-TIME-LIVE.0] +ConsensusTimeLive == + \A r \in Rounds, p \in Corr : + (/\ proposalTime[r] /= NilTimestamp + /\ proposalTime[r] + Delay < realTime + /\ Proposer[r] \in Corr + /\ round[p] <= r) + => \E msg \in RoundProposals(r) : <> \in receivedTimelyProposal + +\* a conjunction of all invariants +Inv == + /\ AgreementOnValue + /\ AgreementOnTime + /\ ConsensusTimeValid + /\ ConsensusSafeValidCorrProp + /\ ConsensusRealTimeValid + /\ ConsensusRealTimeValidCorr + /\ BoundedDelay + +Liveness == + ConsensusTimeLive + +============================================================================= diff --git a/spec/consensus/proposer-selection.md b/spec/consensus/proposer-selection.md new file mode 100644 index 000000000..3cea3d5cd --- /dev/null +++ b/spec/consensus/proposer-selection.md @@ -0,0 +1,323 @@ +--- +order: 3 +--- + +# Proposer Selection Procedure + +This document specifies the Proposer Selection Procedure that is used in Tendermint to choose a round proposer. +As Tendermint is “leader-based protocol”, the proposer selection is critical for its correct functioning. + +At a given block height, the proposer selection algorithm runs with the same validator set at each round . +Between heights, an updated validator set may be specified by the application as part of the ABCIResponses' EndBlock. + +## Requirements for Proposer Selection + +This sections covers the requirements with Rx being mandatory and Ox optional requirements. +The following requirements must be met by the Proposer Selection procedure: + +### R1: Determinism + +Given a validator set `V`, and two honest validators `p` and `q`, for each height `h` and each round `r` the following must hold: + + `proposer_p(h,r) = proposer_q(h,r)` + +where `proposer_p(h,r)` is the proposer returned by the Proposer Selection Procedure at process `p`, at height `h` and round `r`. + +### R2: Fairness + +Given a validator set with total voting power P and a sequence S of elections. In any sub-sequence of S with length C*P, a validator v must be elected as proposer P/VP(v) times, i.e. with frequency: + + f(v) ~ VP(v) / P + +where C is a tolerance factor for validator set changes with following values: + +- C == 1 if there are no validator set changes +- C ~ k when there are validator changes + +*[this needs more work]* + +## Basic Algorithm + +At its core, the proposer selection procedure uses a weighted round-robin algorithm. + +A model that gives a good intuition on how/ why the selection algorithm works and it is fair is that of a priority queue. The validators move ahead in this queue according to their voting power (the higher the voting power the faster a validator moves towards the head of the queue). When the algorithm runs the following happens: + +- all validators move "ahead" according to their powers: for each validator, increase the priority by the voting power +- first in the queue becomes the proposer: select the validator with highest priority +- move the proposer back in the queue: decrease the proposer's priority by the total voting power + +Notation: + +- vset - the validator set +- n - the number of validators +- VP(i) - voting power of validator i +- A(i) - accumulated priority for validator i +- P - total voting power of set +- avg - average of all validator priorities +- prop - proposer + +Simple view at the Selection Algorithm: + +```md + def ProposerSelection (vset): + + // compute priorities and elect proposer + for each validator i in vset: + A(i) += VP(i) + prop = max(A) + A(prop) -= P +``` + +## Stable Set + +Consider the validator set: + +Validator | p1 | p2 +----------|----|--- +VP | 1 | 3 + +Assuming no validator changes, the following table shows the proposer priority computation over a few runs. Four runs of the selection procedure are shown, starting with the 5th the same values are computed. +Each row shows the priority queue and the process place in it. The proposer is the closest to the head, the rightmost validator. As priorities are updated, the validators move right in the queue. The proposer moves left as its priority is reduced after election. + +| Priority Run | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | Alg step | +|----------------|----|----|-------|----|-------|----|----|----|------------------| +| | | | p1,p2 | | | | | | Initialized to 0 | +| run 1 | | | | p1 | | p2 | | | A(i)+=VP(i) | +| | | p2 | | p1 | | | | | A(p2)-= P | +| run 2 | | | | | p1,p2 | | | | A(i)+=VP(i) | +| | p1 | | | | p2 | | | | A(p1)-= P | +| run 3 | | p1 | | | | | | p2 | A(i)+=VP(i) | +| | | p1 | | p2 | | | | | A(p2)-= P | +| run 4 | | | p1 | | | | p2 | | A(i)+=VP(i) | +| | | | p1,p2 | | | | | | A(p2)-= P | + +It can be shown that: + +- At the end of each run k+1 the sum of the priorities is the same as at end of run k. If a new set's priorities are initialized to 0 then the sum of priorities will be 0 at each run while there are no changes. +- The max distance between priorites is (n-1) *P.*[formal proof not finished]* + +## Validator Set Changes + +Between proposer selection runs the validator set may change. Some changes have implications on the proposer election. + +### Voting Power Change + +Consider again the earlier example and assume that the voting power of p1 is changed to 4: + +Validator | p1 | p2 +----------|----|--- +VP | 4 | 3 + +Let's also assume that before this change the proposer priorites were as shown in first row (last run). As it can be seen, the selection could run again, without changes, as before. + +| Priority Run | -2 | -1 | 0 | 1 | 2 | Comment | +|----------------|----|----|---|----|----|-------------------| +| last run | | p2 | | p1 | | __update VP(p1)__ | +| next run | | | | | p2 | A(i)+=VP(i) | +| | p1 | | | | p2 | A(p1)-= P | + +However, when a validator changes power from a high to a low value, some other validator remain far back in the queue for a long time. This scenario is considered again in the Proposer Priority Range section. + +As before: + +- At the end of each run k+1 the sum of the priorities is the same as at run k. +- The max distance between priorites is (n-1) * P. + +### Validator Removal + +Consider a new example with set: + +Validator | p1 | p2 | p3 +----------|----|----|--- +VP | 1 | 2 | 3 + +Let's assume that after the last run the proposer priorities were as shown in first row with their sum being 0. After p2 is removed, at the end of next proposer selection run (penultimate row) the sum of priorities is -2 (minus the priority of the removed process). + +The procedure could continue without modifications. However, after a sufficiently large number of modifications in validator set, the priority values would migrate towards maximum or minimum allowed values causing truncations due to overflow detection. +For this reason, the selection procedure adds another __new step__ that centers the current priority values such that the priority sum remains close to 0. + +| Priority Run | -3 | -2 | -1 | 0 | 1 | 2 | 4 | Comment | +|----------------|----|----|----|---|----|----|---|-----------------------| +| last run | p3 | | | | p1 | p2 | | __remove p2__ | +| nextrun | | | | | | | | | +| __new step__ | | p3 | | | | p1 | | A(i) -= avg, avg = -1 | +| | | | | | p3 | p1 | | A(i)+=VP(i) | +| | | | p1 | | p3 | | | A(p1)-= P | + +The modified selection algorithm is: + +```md + def ProposerSelection (vset): + + // center priorities around zero + avg = sum(A(i) for i in vset)/len(vset) + for each validator i in vset: + A(i) -= avg + + // compute priorities and elect proposer + for each validator i in vset: + A(i) += VP(i) + prop = max(A) + A(prop) -= P +``` + +Observations: + +- The sum of priorities is now close to 0. Due to integer division the sum is an integer in (-n, n), where n is the number of validators. + +### New Validator + +When a new validator is added, same problem as the one described for removal appears, the sum of priorities in the new set is not zero. This is fixed with the centering step introduced above. + +One other issue that needs to be addressed is the following. A validator V that has just been elected is moved to the end of the queue. If the validator set is large and/ or other validators have significantly higher power, V will have to wait many runs to be elected. If V removes and re-adds itself to the set, it would make a significant (albeit unfair) "jump" ahead in the queue. + +In order to prevent this, when a new validator is added, its initial priority is set to: + +```md + A(V) = -1.125 * P +``` + +where P is the total voting power of the set including V. + +Curent implementation uses the penalty factor of 1.125 because it provides a small punishment that is efficient to calculate. See [here](https://github.com/tendermint/tendermint/pull/2785#discussion_r235038971) for more details. + +If we consider the validator set where p3 has just been added: + +Validator | p1 | p2 | p3 +----------|----|----|--- +VP | 1 | 3 | 8 + +then p3 will start with proposer priority: + +```md + A(p3) = -1.125 * (1 + 3 + 8) ~ -13 +``` + +Note that since current computation uses integer division there is penalty loss when sum of the voting power is less than 8. + +In the next run, p3 will still be ahead in the queue, elected as proposer and moved back in the queue. + +| Priority Run | -13 | -9 | -5 | -2 | -1 | 0 | 1 | 2 | 5 | 6 | 7 | Alg step | +|----------------|-----|----|----|----|----|---|---|----|----|----|----|-----------------------| +| last run | | | | p2 | | | | p1 | | | | __add p3__ | +| | p3 | | | p2 | | | | p1 | | | | A(p3) = -4 | +| next run | | p3 | | | | | | p2 | | p1 | | A(i) -= avg, avg = -4 | +| | | | | | p3 | | | | p2 | | p1 | A(i)+=VP(i) | +| | | | p1 | | p3 | | | | p2 | | | A(p1)-=P | + +## Proposer Priority Range + +With the introduction of centering, some interesting cases occur. Low power validators that bind early in a set that includes high power validator(s) benefit from subsequent additions to the set. This is because these early validators run through more right shift operations during centering, operations that increase their priority. + +As an example, consider the set where p2 is added after p1, with priority -1.125 * 80k = -90k. After the selection procedure runs once: + +Validator | p1 | p2 | Comment +----------|------|------|------------------ +VP | 80k | 10 | +A | 0 | -90k | __added p2__ +A | -45k | 45k | __run selection__ + +Then execute the following steps: + +1. Add a new validator p3: + + Validator | p1 | p2 | p3 + ----------|-----|----|--- + VP | 80k | 10 | 10 + +2. Run selection once. The notation '..p'/'p..' means very small deviations compared to column priority. + + | Priority Run | -90k.. | -60k | -45k | -15k | 0 | 45k | 75k | 155k | Comment | + |---------------|--------|------|------|------|---|-----|-----|------|--------------| + | last run | p3 | | p2 | | | p1 | | | __added p3__ | + | next run + | *right_shift*| | p3 | | p2 | | | p1 | | A(i) -= avg,avg=-30k + | | | ..p3| | ..p2| | | | p1 | A(i)+=VP(i) + | | | ..p3| | ..p2| | | p1.. | | A(p1)-=P, P=80k+20 + +3. Remove p1 and run selection once: + + Validator | p3 | p2 | Comment + ----------|--------|-------|------------------ + VP | 10 | 10 | + A | -60k | -15k | + A | -22.5k | 22.5k | __run selection__ + +At this point, while the total voting power is 20, the distance between priorities is 45k. It will take 4500 runs for p3 to catch up with p2. + +In order to prevent these types of scenarios, the selection algorithm performs scaling of priorities such that the difference between min and max values is smaller than two times the total voting power. + +The modified selection algorithm is: + +```md + def ProposerSelection (vset): + + // scale the priority values + diff = max(A)-min(A) + threshold = 2 * P + if diff > threshold: + scale = diff/threshold + for each validator i in vset: + A(i) = A(i)/scale + + // center priorities around zero + avg = sum(A(i) for i in vset)/len(vset) + for each validator i in vset: + A(i) -= avg + + // compute priorities and elect proposer + for each validator i in vset: + A(i) += VP(i) + prop = max(A) + A(prop) -= P +``` + +Observations: + +- With this modification, the maximum distance between priorites becomes 2 * P. + +Note also that even during steady state the priority range may increase beyond 2 * P. The scaling introduced here helps to keep the range bounded. + +## Wrinkles + +### Validator Power Overflow Conditions + +The validator voting power is a positive number stored as an int64. When a validator is added the `1.125 * P` computation must not overflow. As a consequence the code handling validator updates (add and update) checks for overflow conditions making sure the total voting power is never larger than the largest int64 `MAX`, with the property that `1.125 * MAX` is still in the bounds of int64. Fatal error is return when overflow condition is detected. + +### Proposer Priority Overflow/ Underflow Handling + +The proposer priority is stored as an int64. The selection algorithm performs additions and subtractions to these values and in the case of overflows and underflows it limits the values to: + +```go + MaxInt64 = 1 << 63 - 1 + MinInt64 = -1 << 63 +``` + +## Requirement Fulfillment Claims + +__[R1]__ + +The proposer algorithm is deterministic giving consistent results across executions with same transactions and validator set modifications. +[WIP - needs more detail] + +__[R2]__ + +Given a set of processes with the total voting power P, during a sequence of elections of length P, the number of times any process is selected as proposer is equal to its voting power. The sequence of the P proposers then repeats. If we consider the validator set: + +Validator | p1 | p2 +----------|----|--- +VP | 1 | 3 + +With no other changes to the validator set, the current implementation of proposer selection generates the sequence: +`p2, p1, p2, p2, p2, p1, p2, p2,...` or [`p2, p1, p2, p2`]* +A sequence that starts with any circular permutation of the [`p2, p1, p2, p2`] sub-sequence would also provide the same degree of fairness. In fact these circular permutations show in the sliding window (over the generated sequence) of size equal to the length of the sub-sequence. + +Assigning priorities to each validator based on the voting power and updating them at each run ensures the fairness of the proposer selection. In addition, every time a validator is elected as proposer its priority is decreased with the total voting power. + +Intuitively, a process v jumps ahead in the queue at most (max(A) - min(A))/VP(v) times until it reaches the head and is elected. The frequency is then: + +```md + f(v) ~ VP(v)/(max(A)-min(A)) = 1/k * VP(v)/P +``` + +For current implementation, this means v should be proposer at least VP(v) times out of k * P runs, with scaling factor k=2. diff --git a/spec/consensus/readme.md b/spec/consensus/readme.md new file mode 100644 index 000000000..aa79ba192 --- /dev/null +++ b/spec/consensus/readme.md @@ -0,0 +1,32 @@ +--- +order: 1 +parent: + title: Consensus + order: 4 +--- + +# Consensus + +Specification of the Tendermint consensus protocol. + +## Contents + +- [Consensus Paper](./consensus-paper) - Latex paper on + [arxiv](https://arxiv.org/abs/1807.04938) describing the + core Tendermint consensus state machine with proofs of safety and termination. +- [BFT Time](./bft-time.md) - How the timestamp in a Tendermint + block header is computed in a Byzantine Fault Tolerant manner +- [Creating Proposal](./creating-proposal.md) - How a proposer + creates a block proposal for consensus +- [Light Client Protocol](./light-client) - A protocol for light weight consensus + verification and syncing to the latest state +- [Signing](./signing.md) - Rules for cryptographic signatures + produced by validators. +- [Write Ahead Log](./wal.md) - Write ahead log used by the + consensus state machine to recover from crashes. + +The protocol used to gossip consensus messages between peers, which is critical +for liveness, is described in the [reactors section](../reactors/consensus/consensus.md). + +There is also a [stale markdown description](consensus.md) of the consensus state machine +(TODO update this). diff --git a/spec/consensus/signing.md b/spec/consensus/signing.md new file mode 100644 index 000000000..907a5a01a --- /dev/null +++ b/spec/consensus/signing.md @@ -0,0 +1,229 @@ +# Validator Signing + +Here we specify the rules for validating a proposal and vote before signing. +First we include some general notes on validating data structures common to both types. +We then provide specific validation rules for each. Finally, we include validation rules to prevent double-sigining. + +## SignedMsgType + +The `SignedMsgType` is a single byte that refers to the type of the message +being signed. It is defined in Go as follows: + +```go +// SignedMsgType is a type of signed message in the consensus. +type SignedMsgType byte + +const ( + // Votes + PrevoteType SignedMsgType = 0x01 + PrecommitType SignedMsgType = 0x02 + + // Proposals + ProposalType SignedMsgType = 0x20 +) +``` + +All signed messages must correspond to one of these types. + +## Timestamp + +Timestamp validation is subtle and there are currently no bounds placed on the +timestamp included in a proposal or vote. It is expected that validators will honestly +report their local clock time. The median of all timestamps +included in a commit is used as the timestamp for the next block height. + +Timestamps are expected to be strictly monotonic for a given validator, though +this is not currently enforced. + +## ChainID + +ChainID is an unstructured string with a max length of 50-bytes. +In the future, the ChainID may become structured, and may take on longer lengths. +For now, it is recommended that signers be configured for a particular ChainID, +and to only sign votes and proposals corresponding to that ChainID. + +## BlockID + +BlockID is the structure used to represent the block: + +```go +type BlockID struct { + Hash []byte + PartsHeader PartSetHeader +} + +type PartSetHeader struct { + Hash []byte + Total int +} +``` + +To be included in a valid vote or proposal, BlockID must either represent a `nil` block, or a complete one. +We introduce two methods, `BlockID.IsZero()` and `BlockID.IsComplete()` for these cases, respectively. + +`BlockID.IsZero()` returns true for BlockID `b` if each of the following +are true: + +```go +b.Hash == nil +b.PartsHeader.Total == 0 +b.PartsHeader.Hash == nil +``` + +`BlockID.IsComplete()` returns true for BlockID `b` if each of the following +are true: + +```go +len(b.Hash) == 32 +b.PartsHeader.Total > 0 +len(b.PartsHeader.Hash) == 32 +``` + +## Proposals + +The structure of a proposal for signing looks like: + +```go +type CanonicalProposal struct { + Type SignedMsgType // type alias for byte + Height int64 `binary:"fixed64"` + Round int64 `binary:"fixed64"` + POLRound int64 `binary:"fixed64"` + BlockID BlockID + Timestamp time.Time + ChainID string +} +``` + +A proposal is valid if each of the following lines evaluates to true for proposal `p`: + +```go +p.Type == 0x20 +p.Height > 0 +p.Round >= 0 +p.POLRound >= -1 +p.BlockID.IsComplete() +``` + +In other words, a proposal is valid for signing if it contains the type of a Proposal +(0x20), has a positive, non-zero height, a +non-negative round, a POLRound not less than -1, and a complete BlockID. + +## Votes + +The structure of a vote for signing looks like: + +```go +type CanonicalVote struct { + Type SignedMsgType // type alias for byte + Height int64 `binary:"fixed64"` + Round int64 `binary:"fixed64"` + BlockID BlockID + Timestamp time.Time + ChainID string +} +``` + +A vote is valid if each of the following lines evaluates to true for vote `v`: + +```go +v.Type == 0x1 || v.Type == 0x2 +v.Height > 0 +v.Round >= 0 +v.BlockID.IsZero() || v.BlockID.IsComplete() +``` + +In other words, a vote is valid for signing if it contains the type of a Prevote +or Precommit (0x1 or 0x2, respectively), has a positive, non-zero height, a +non-negative round, and an empty or valid BlockID. + +## Invalid Votes and Proposals + +Votes and proposals which do not satisfy the above rules are considered invalid. +Peers gossipping invalid votes and proposals may be disconnected from other peers on the network. +Note, however, that there is not currently any explicit mechanism to punish validators signing votes or proposals that fail +these basic validation rules. + +## Double Signing + +Signers must be careful not to sign conflicting messages, also known as "double signing" or "equivocating". +Tendermint has mechanisms to publish evidence of validators that signed conflicting votes, so they can be punished +by the application. Note Tendermint does not currently handle evidence of conflciting proposals, though it may in the future. + +### State + +To prevent such double signing, signers must track the height, round, and type of the last message signed. +Assume the signer keeps the following state, `s`: + +```go +type LastSigned struct { + Height int64 + Round int64 + Type SignedMsgType // byte +} +``` + +After signing a vote or proposal `m`, the signer sets: + +```go +s.Height = m.Height +s.Round = m.Round +s.Type = m.Type +``` + +### Proposals + +A signer should only sign a proposal `p` if any of the following lines are true: + +```go +p.Height > s.Height +p.Height == s.Height && p.Round > s.Round +``` + +In other words, a proposal should only be signed if it's at a higher height, or a higher round for the same height. +Once a proposal or vote has been signed for a given height and round, a proposal should never be signed for the same height and round. + +### Votes + +A signer should only sign a vote `v` if any of the following lines are true: + +```go +v.Height > s.Height +v.Height == s.Height && v.Round > s.Round +v.Height == s.Height && v.Round == s.Round && v.Step == 0x1 && s.Step == 0x20 +v.Height == s.Height && v.Round == s.Round && v.Step == 0x2 && s.Step != 0x2 +``` + +In other words, a vote should only be signed if it's: + +- at a higher height +- at a higher round for the same height +- a prevote for the same height and round where we haven't signed a prevote or precommit (but have signed a proposal) +- a precommit for the same height and round where we haven't signed a precommit (but have signed a proposal and/or a prevote) + +This means that once a validator signs a prevote for a given height and round, the only other message it can sign for that height and round is a precommit. +And once a validator signs a precommit for a given height and round, it must not sign any other message for that same height and round. + +Note this includes votes for `nil`, ie. where `BlockID.IsZero()` is true. If a +signer has already signed a vote where `BlockID.IsZero()` is true, it cannot +sign another vote with the same type for the same height and round where +`BlockID.IsComplete()` is true. Thus only a single vote of a particular type +(ie. 0x01 or 0x02) can be signed for the same height and round. + +### Other Rules + +According to the rules of Tendermint consensus, once a validator precommits for +a block, they become "locked" on that block, which means they can't prevote for +another block unless they see sufficient justification (ie. a polka from a +higher round). For more details, see the [consensus +spec](https://arxiv.org/abs/1807.04938). + +Violating this rule is known as "amnesia". In contrast to equivocation, +which is easy to detect, amnesia is difficult to detect without access to votes +from all the validators, as this is what constitutes the justification for +"unlocking". Hence, amnesia is not punished within the protocol, and cannot +easily be prevented by a signer. If enough validators simultaneously commit an +amnesia attack, they may cause a fork of the blockchain, at which point an +off-chain protocol must be engaged to collect votes from all the validators and +determine who misbehaved. For more details, see [fork +detection](https://github.com/tendermint/tendermint/pull/3978). diff --git a/spec/consensus/wal.md b/spec/consensus/wal.md new file mode 100644 index 000000000..95d1bad12 --- /dev/null +++ b/spec/consensus/wal.md @@ -0,0 +1,32 @@ +# WAL + +Consensus module writes every message to the WAL (write-ahead log). + +It also issues fsync syscall through +[File#Sync](https://golang.org/pkg/os/#File.Sync) for messages signed by this +node (to prevent double signing). + +Under the hood, it uses +[autofile.Group](https://godoc.org/github.com/tendermint/tmlibs/autofile#Group), +which rotates files when those get too big (> 10MB). + +The total maximum size is 1GB. We only need the latest block and the block before it, +but if the former is dragging on across many rounds, we want all those rounds. + +## Replay + +Consensus module will replay all the messages of the last height written to WAL +before a crash (if such occurs). + +The private validator may try to sign messages during replay because it runs +somewhat autonomously and does not know about replay process. + +For example, if we got all the way to precommit in the WAL and then crash, +after we replay the proposal message, the private validator will try to sign a +prevote. But it will fail. That's ok because we’ll see the prevote later in the +WAL. Then it will go to precommit, and that time it will work because the +private validator contains the `LastSignBytes` and then we’ll replay the +precommit from the WAL. + +Make sure to read about [WAL corruption](https://github.com/tendermint/tendermint/blob/master/docs/tendermint-core/running-in-production.md#wal-corruption) +and recovery strategies. diff --git a/spec/core/data_structures.md b/spec/core/data_structures.md new file mode 100644 index 000000000..620f567fe --- /dev/null +++ b/spec/core/data_structures.md @@ -0,0 +1,456 @@ +# Data Structures + +Here we describe the data structures in the Tendermint blockchain and the rules for validating them. + +The Tendermint blockchains consists of a short list of data types: + +- [Data Structures](#data-structures) + - [Block](#block) + - [Execution](#execution) + - [Header](#header) + - [Version](#version) + - [BlockID](#blockid) + - [PartSetHeader](#partsetheader) + - [Part](#part) + - [Time](#time) + - [Data](#data) + - [Commit](#commit) + - [CommitSig](#commitsig) + - [BlockIDFlag](#blockidflag) + - [Vote](#vote) + - [CanonicalVote](#canonicalvote) + - [Proposal](#proposal) + - [SignedMsgType](#signedmsgtype) + - [Signature](#signature) + - [EvidenceList](#evidencelist) + - [Evidence](#evidence) + - [DuplicateVoteEvidence](#duplicatevoteevidence) + - [LightClientAttackEvidence](#lightclientattackevidence) + - [LightBlock](#lightblock) + - [SignedHeader](#signedheader) + - [ValidatorSet](#validatorset) + - [Validator](#validator) + - [Address](#address) + - [ConsensusParams](#consensusparams) + - [BlockParams](#blockparams) + - [EvidenceParams](#evidenceparams) + - [ValidatorParams](#validatorparams) + - [VersionParams](#versionparams) + - [Proof](#proof) + + +## Block + +A block consists of a header, transactions, votes (the commit), +and a list of evidence of malfeasance (ie. signing conflicting votes). + +| Name | Type | Description | Validation | +|--------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------| +| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header] (#header) | Must adhere to the validation rules of [header](#header) | +| Data | [Data](#data) | Data contains a list of transactions. The contents of the transaction is unknown to Tendermint. | This field can be empty or populated, but no validation is performed. Applications can perform validation on individual transactions prior to block creation using [checkTx](../abci/abci.md#checktx). +| Evidence | [EvidenceList](#evidence_list) | Evidence contains a list of infractions committed by validators. | Can be empty, but when populated the validations rules from [evidenceList](#evidence_list) apply | +| LastCommit | [Commit](#commit) | `LastCommit` includes one vote for every validator. All votes must either be for the previous block, nil or absent. If a vote is for the previous block it must have a valid signature from the corresponding validator. The sum of the voting power of the validators that voted must be greater than 2/3 of the total voting power of the complete validator set. The number of votes in a commit is limited to 10000 (see `types.MaxVotesCount`). | Must be empty for the initial height and must adhere to the validation rules of [commit](#commit). | + +## Execution + +Once a block is validated, it can be executed against the state. + +The state follows this recursive equation: + +```go +state(initialHeight) = InitialState +state(h+1) <- Execute(state(h), ABCIApp, block(h)) +``` + +where `InitialState` includes the initial consensus parameters and validator set, +and `ABCIApp` is an ABCI application that can return results and changes to the validator +set (TODO). Execute is defined as: + +```go +func Execute(s State, app ABCIApp, block Block) State { + // Fuction ApplyBlock executes block of transactions against the app and returns the new root hash of the app state, + // modifications to the validator set and the changes of the consensus parameters. + AppHash, ValidatorChanges, ConsensusParamChanges := app.ApplyBlock(block) + + nextConsensusParams := UpdateConsensusParams(state.ConsensusParams, ConsensusParamChanges) + return State{ + ChainID: state.ChainID, + InitialHeight: state.InitialHeight, + LastResults: abciResponses.DeliverTxResults, + AppHash: AppHash, + InitialHeight: state.InitialHeight, + LastValidators: state.Validators, + Validators: state.NextValidators, + NextValidators: UpdateValidators(state.NextValidators, ValidatorChanges), + ConsensusParams: nextConsensusParams, + Version: { + Consensus: { + AppVersion: nextConsensusParams.Version.AppVersion, + }, + }, + } +} +``` + +Validating a new block is first done prior to the `prevote`, `precommit` & `finalizeCommit` stages. + +The steps to validate a new block are: + +- Check the validity rules of the block and its fields. +- Check the versions (Block & App) are the same as in local state. +- Check the chainID's match. +- Check the height is correct. +- Check the `LastBlockID` corresponds to BlockID currently in state. +- Check the hashes in the header match those in state. +- Verify the LastCommit against state, this step is skipped for the initial height. + - This is where checking the signatures correspond to the correct block will be made. +- Make sure the proposer is part of the validator set. +- Validate bock time. + - Make sure the new blocks time is after the previous blocks time. + - Calculate the medianTime and check it against the blocks time. + - If the blocks height is the initial height then check if it matches the genesis time. +- Validate the evidence in the block. Note: Evidence can be empty + +## Header + +A block header contains metadata about the block and about the consensus, as well as commitments to +the data in the current block, the previous block, and the results returned by the application: + +| Name | Type | Description | Validation | +|-------------------|---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Version | [Version](#version) | Version defines the application and protocol version being used. | Must adhere to the validation rules of [Version](#version) | +| ChainID | String | ChainID is the ID of the chain. This must be unique to your chain. | ChainID must be less than 50 bytes. | +| Height | uint64 | Height is the height for this header. | Must be > 0, >= initialHeight, and == previous Height+1 | +| Time | [Time](#time) | The timestamp is equal to the weighted median of validators present in the last commit. Read more on time in the [BFT-time section](../consensus/bft-time.md). Note: the timestamp of a vote must be greater by at least one millisecond than that of the block being voted on. | Time must be >= previous header timestamp + consensus parameters TimeIotaMs. The timestamp of the first block must be equal to the genesis time (since there's no votes to compute the median). | +| LastBlockID | [BlockID](#blockid) | BlockID of the previous block. | Must adhere to the validation rules of [blockID](#blockid). The first block has `block.Header.LastBlockID == BlockID{}`. | +| LastCommitHash | slice of bytes (`[]byte`) | MerkleRoot of the lastCommit's signatures. The signatures represent the validators that committed to the last block. The first block has an empty slices of bytes for the hash. | Must be of length 32 | +| DataHash | slice of bytes (`[]byte`) | MerkleRoot of the hash of transactions. **Note**: The transactions are hashed before being included in the merkle tree, the leaves of the Merkle tree are the hashes, not the transactions themselves. | Must be of length 32 | +| ValidatorHash | slice of bytes (`[]byte`) | MerkleRoot of the current validator set. The validators are first sorted by voting power (descending), then by address (ascending) prior to computing the MerkleRoot. | Must be of length 32 | +| NextValidatorHash | slice of bytes (`[]byte`) | MerkleRoot of the next validator set. The validators are first sorted by voting power (descending), then by address (ascending) prior to computing the MerkleRoot. | Must be of length 32 | +| ConsensusHash | slice of bytes (`[]byte`) | Hash of the protobuf encoded consensus parameters. | Must be of length 32 | +| AppHash | slice of bytes (`[]byte`) | Arbitrary byte array returned by the application after executing and commiting the previous block. It serves as the basis for validating any merkle proofs that comes from the ABCI application and represents the state of the actual application rather than the state of the blockchain itself. The first block's `block.Header.AppHash` is given by `ResponseInitChain.app_hash`. | This hash is determined by the application, Tendermint can not perform validation on it. | +| LastResultHash | slice of bytes (`[]byte`) | `LastResultsHash` is the root hash of a Merkle tree built from `ResponseDeliverTx` responses (only `Code` and `Data` are included. All other fields are ignored). | Must be of length 32. The first block has `block.Header.ResultsHash == MerkleRoot(nil)`, i.e. the hash of an empty input, for RFC-6962 conformance. | +| EvidenceHash | slice of bytes (`[]byte`) | MerkleRoot of the evidence of Byzantine behavior included in this block. | Must be of length 32 | +| ProposerAddress | slice of bytes (`[]byte`) | Address of the original proposer of the block. Validator must be in the current validatorSet. | Must be of length 20 | + +## Version + +NOTE: that this is more specifically the consensus version and doesn't include information like the +P2P Version. (TODO: we should write a comprehensive document about +versioning that this can refer to) + +| Name | type | Description | Validation | +|-------|--------|-----------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------| +| Block | uint64 | This number represents the version of the block protocol and must be the same throughout an operational network | Must be equal to protocol version being used in a network (`block.Version.Block == state.Version.Consensus.Block`) | +| App | uint64 | App version is decided on by the application. Read [here](../abci/abci.md#info) | `block.Version.App == state.Version.Consensus.App` | + +## BlockID + +The `BlockID` contains two distinct Merkle roots of the block. The `BlockID` includes these two hashes, as well as the number of parts (ie. `len(MakeParts(block))`) + +| Name | Type | Description | Validation | +|---------------|---------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------| +| Hash | slice of bytes (`[]byte`) | MerkleRoot of all the fields in the header (ie. `MerkleRoot(header)`. | hash must be of length 32 | +| PartSetHeader | [PartSetHeader](#PartSetHeader) | Used for secure gossiping of the block during consensus, is the MerkleRoot of the complete serialized block cut into parts (ie. `MerkleRoot(MakeParts(block))`). | Must adhere to the validation rules of [PartSetHeader](#PartSetHeader) | + +See [MerkleRoot](./encoding.md#MerkleRoot) for details. + +## PartSetHeader + +| Name | Type | Description | Validation | +|-------|---------------------------|-----------------------------------|----------------------| +| Total | int32 | Total amount of parts for a block | Must be > 0 | +| Hash | slice of bytes (`[]byte`) | MerkleRoot of a serialized block | Must be of length 32 | + +## Part + +Part defines a part of a block. In Tendermint blocks are broken into `parts` for gossip. + +| Name | Type | Description | Validation | +|-------|-----------------|-----------------------------------|----------------------| +| index | int32 | Total amount of parts for a block | Must be > 0 | +| bytes | bytes | MerkleRoot of a serialized block | Must be of length 32 | +| proof | [Proof](#proof) | MerkleRoot of a serialized block | Must be of length 32 | + +## Time + +Tendermint uses the [Google.Protobuf.Timestamp](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Timestamp) +format, which uses two integers, one 64 bit integer for Seconds and a 32 bit integer for Nanoseconds. + +## Data + +Data is just a wrapper for a list of transactions, where transactions are arbitrary byte arrays: + +| Name | Type | Description | Validation | +|------|----------------------------|------------------------|-----------------------------------------------------------------------------| +| Txs | Matrix of bytes ([][]byte) | Slice of transactions. | Validation does not occur on this field, this data is unknown to Tendermint | + +## Commit + +Commit is a simple wrapper for a list of signatures, with one for each validator. It also contains the relevant BlockID, height and round: + +| Name | Type | Description | Validation | +|------------|----------------------------------|----------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------| +| Height | uint64 | Height at which this commit was created. | Must be > 0 | +| Round | int32 | Round that the commit corresponds to. | Must be > 0 | +| BlockID | [BlockID](#blockid) | The blockID of the corresponding block. | Must adhere to the validation rules of [BlockID](#blockid). | +| Signatures | Array of [CommitSig](#commitsig) | Array of commit signatures that correspond to current validator set. | Length of signatures must be > 0 and adhere to the validation of each individual [Commitsig](#commitsig) | + +## CommitSig + +`CommitSig` represents a signature of a validator, who has voted either for nil, +a particular `BlockID` or was absent. It's a part of the `Commit` and can be used +to reconstruct the vote set given the validator set. + +| Name | Type | Description | Validation | +|------------------|-----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------| +| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: Either voted for the block that received the majority, voted for another block, voted nil or did not vote | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum | +| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 | +| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) | +| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 | + +NOTE: `ValidatorAddress` and `Timestamp` fields may be removed in the future +(see [ADR-25](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-025-commit.md)). + +## BlockIDFlag + +BlockIDFlag represents which BlockID the [signature](#commitsig) is for. + +```go +enum BlockIDFlag { + BLOCK_ID_FLAG_UNKNOWN = 0; + BLOCK_ID_FLAG_ABSENT = 1; // signatures for other blocks are also considered absent + BLOCK_ID_FLAG_COMMIT = 2; + BLOCK_ID_FLAG_NIL = 3; +} +``` + +## Vote + +A vote is a signed message from a validator for a particular block. +The vote includes information about the validator signing it. When stored in the blockchain or propagated over the network, votes are encoded in Protobuf. + +| Name | Type | Description | Validation | +|------------------|---------------------------------|---------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| +| Type | [SignedMsgType](#signedmsgtype) | Either prevote or precommit. [SignedMsgType](#signedmsgtype) | A Vote is valid if its corresponding fields are included in the enum [signedMsgType](#signedmsgtype) | +| Height | uint64 | Height for which this vote was created for | Must be > 0 | +| Round | int32 | Round that the commit corresponds to. | Must be > 0 | +| BlockID | [BlockID](#blockid) | The blockID of the corresponding block. | [BlockID](#blockid) | +| Timestamp | [Time](#Time) | Timestamp represents the time at which a validator signed. | [Time](#time) | +| ValidatorAddress | slice of bytes (`[]byte`) | Address of the validator | Length must be equal to 20 | +| ValidatorIndex | int32 | Index at a specific block height that corresponds to the Index of the validator in the set. | must be > 0 | +| Signature | slice of bytes (`[]byte`) | Signature by the validator if they participated in consensus for the associated bock. | Length of signature must be > 0 and < 64 | + +## CanonicalVote + +CanonicalVote is for validator signing. This type will not be present in a block. Votes are represented via `CanonicalVote` and also encoded using protobuf via `type.SignBytes` which includes the `ChainID`, and uses a different ordering of +the fields. + +```proto +message CanonicalVote { + SignedMsgType type = 1; + fixed64 height = 2; + sfixed64 round = 3; + CanonicalBlockID block_id = 4; + google.protobuf.Timestamp timestamp = 5; + string chain_id = 6; +} +``` + +For signing, votes are represented via [`CanonicalVote`](#canonicalvote) and also encoded using protobuf via +`type.SignBytes` which includes the `ChainID`, and uses a different ordering of +the fields. + +We define a method `Verify` that returns `true` if the signature verifies against the pubkey for the `SignBytes` +using the given ChainID: + +```go +func (vote *Vote) Verify(chainID string, pubKey crypto.PubKey) error { + if !bytes.Equal(pubKey.Address(), vote.ValidatorAddress) { + return ErrVoteInvalidValidatorAddress + } + + if !pubKey.VerifyBytes(types.VoteSignBytes(chainID), vote.Signature) { + return ErrVoteInvalidSignature + } + return nil +} +``` + +## Proposal + +Proposal contains height and round for which this proposal is made, BlockID as a unique identifier +of proposed block, timestamp, and POLRound (a so-called Proof-of-Lock (POL) round) that is needed for +termination of the consensus. If POLRound >= 0, then BlockID corresponds to the block that +is locked in POLRound. The message is signed by the validator private key. + +| Name | Type | Description | Validation | +|-----------|---------------------------------|---------------------------------------------------------------------------------------|---------------------------------------------------------| +| Type | [SignedMsgType](#signedmsgtype) | Represents a Proposal [SignedMsgType](#signedmsgtype) | Must be `ProposalType` [signedMsgType](#signedmsgtype) | +| Height | uint64 | Height for which this vote was created for | Must be > 0 | +| Round | int32 | Round that the commit corresponds to. | Must be > 0 | +| POLRound | int64 | Proof of lock | Must be > 0 | +| BlockID | [BlockID](#blockid) | The blockID of the corresponding block. | [BlockID](#blockid) | +| Timestamp | [Time](#Time) | Timestamp represents the time at which a validator signed. | [Time](#time) | +| Signature | slice of bytes (`[]byte`) | Signature by the validator if they participated in consensus for the associated bock. | Length of signature must be > 0 and < 64 | + +## SignedMsgType + +Signed message type represents a signed messages in consensus. + +```proto +enum SignedMsgType { + + SIGNED_MSG_TYPE_UNKNOWN = 0; + // Votes + SIGNED_MSG_TYPE_PREVOTE = 1; + SIGNED_MSG_TYPE_PRECOMMIT = 2; + + // Proposal + SIGNED_MSG_TYPE_PROPOSAL = 32; +} +``` + +## Signature + +Signatures in Tendermint are raw bytes representing the underlying signature. + +See the [signature spec](./encoding.md#key-types) for more. + +## EvidenceList + +EvidenceList is a simple wrapper for a list of evidence: + +| Name | Type | Description | Validation | +|----------|--------------------------------|----------------------------------------|-----------------------------------------------------------------| +| Evidence | Array of [Evidence](#evidence) | List of verified [evidence](#evidence) | Validation adheres to individual types of [Evidence](#evidence) | + +## Evidence + +Evidence in Tendermint is used to indicate breaches in the consensus by a validator. + +More information on how evidence works in Tendermint can be found [here](../consensus/evidence.md) + +### DuplicateVoteEvidence + +`DuplicateVoteEvidence` represents a validator that has voted for two different blocks +in the same round of the same height. Votes are lexicographically sorted on `BlockID`. + +| Name | Type | Description | Validation | +|------------------|---------------|--------------------------------------------------------------------|-----------------------------------------------------| +| VoteA | [Vote](#vote) | One of the votes submitted by a validator when they equivocated | VoteA must adhere to [Vote](#vote) validation rules | +| VoteB | [Vote](#vote) | The second vote submitted by a validator when they equivocated | VoteB must adhere to [Vote](#vote) validation rules | +| TotalVotingPower | int64 | The total power of the validator set at the height of equivocation | Must be equal to nodes own copy of the data | +| ValidatorPower | int64 | Power of the equivocating validator at the height | Must be equal to the nodes own copy of the data | +| Timestamp | [Time](#Time) | Time of the block where the equivocation occurred | Must be equal to the nodes own copy of the data | + +### LightClientAttackEvidence + +`LightClientAttackEvidence` is a generalized evidence that captures all forms of known attacks on +a light client such that a full node can verify, propose and commit the evidence on-chain for +punishment of the malicious validators. There are three forms of attacks: Lunatic, Equivocation +and Amnesia. These attacks are exhaustive. You can find a more detailed overview of this [here](../light-client/accountability#the_misbehavior_of_faulty_validators) + +| Name | Type | Description | Validation | +|----------------------|------------------------------------|----------------------------------------------------------------------|------------------------------------------------------------------| +| ConflictingBlock | [LightBlock](#LightBlock) | Read Below | Must adhere to the validation rules of [lightBlock](#lightblock) | +| CommonHeight | int64 | Read Below | must be > 0 | +| Byzantine Validators | Array of [Validators](#Validators) | validators that acted maliciously | Read Below | +| TotalVotingPower | int64 | The total power of the validator set at the height of the infraction | Must be equal to the nodes own copy of the data | +| Timestamp | [Time](#Time) | Time of the block where the infraction occurred | Must be equal to the nodes own copy of the data | + +## LightBlock + +LightBlock is the core data structure of the [light client](../light-client/README.md). It combines two data structures needed for verification ([signedHeader](#signedheader) & [validatorSet](#validatorset)). + +| Name | Type | Description | Validation | +|--------------|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------| +| SignedHeader | [SignedHeader](#signedheader) | The header and commit, these are used for verification purposes. To find out more visit [light client docs](../light-client/README.md) | Must not be nil and adhere to the validation rules of [signedHeader](#signedheader) | +| ValidatorSet | [ValidatorSet](#validatorset) | The validatorSet is used to help with verify that the validators in that committed the infraction were truly in the validator set. | Must not be nil and adhere to the validation rules of [validatorSet](#validatorset) | + +The `SignedHeader` and `ValidatorSet` are linked by the hash of the validator set(`SignedHeader.ValidatorsHash == ValidatorSet.Hash()`. + +## SignedHeader + +The SignedhHeader is the [header](#header) accompanied by the commit to prove it. + +| Name | Type | Description | Validation | +|--------|-------------------|-------------------|-----------------------------------------------------------------------------------| +| Header | [Header](#Header) | [Header](#header) | Header cannot be nil and must adhere to the [Header](#Header) validation criteria | +| Commit | [Commit](#commit) | [Commit](#commit) | Commit cannot be nil and must adhere to the [Commit](#commit) criteria | + +## ValidatorSet + +| Name | Type | Description | Validation | +|------------|----------------------------------|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------| +| Validators | Array of [validator](#validator) | List of the active validators at a specific height | The list of validators can not be empty or nil and must adhere to the validation rules of [validator](#validator) | +| Proposer | [validator](#validator) | The block proposer for the corresponding block | The proposer cannot be nil and must adhere to the validation rules of [validator](#validator) | + +## Validator + +| Name | Type | Description | Validation | +|------------------|---------------------------|---------------------------------------------------------------------------------------------------|---------------------------------------------------| +| Address | [Address](#address) | Validators Address | Length must be of size 20 | +| Pubkey | slice of bytes (`[]byte`) | Validators Public Key | must be a length greater than 0 | +| VotingPower | int64 | Validators voting power | cannot be < 0 | +| ProposerPriority | int64 | Validators proposer priority. This is used to gauge when a validator is up next to propose blocks | No validation, value can be negative and positive | + +## Address + +Address is a type alias of a slice of bytes. The address is calculated by hashing the public key using sha256 and truncating it to only use the first 20 bytes of the slice. + +```go +const ( + TruncatedSize = 20 +) + +func SumTruncated(bz []byte) []byte { + hash := sha256.Sum256(bz) + return hash[:TruncatedSize] +} +``` + +## ConsensusParams + +| Name | Type | Description | Field Number | +|-----------|-------------------------------------|------------------------------------------------------------------------------|--------------| +| block | [BlockParams](#blockparams) | Parameters limiting the size of a block and time between consecutive blocks. | 1 | +| evidence | [EvidenceParams](#evidenceparams) | Parameters limiting the validity of evidence of byzantine behavior. | 2 | +| validator | [ValidatorParams](#validatorparams) | Parameters limiting the types of public keys validators can use. | 3 | +| version | [BlockParams](#blockparams) | The ABCI application version. | 4 | + +### BlockParams + +| Name | Type | Description | Field Number | +|--------------|-------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| +| max_bytes | int64 | Max size of a block, in bytes. | 1 | +| max_gas | int64 | Max sum of `GasWanted` in a proposed block. NOTE: blocks that violate this may be committed if there are Byzantine proposers. It's the application's responsibility to handle this when processing a block! | 2 | + +### EvidenceParams + +| Name | Type | Description | Field Number | +|--------------------|------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| +| max_age_num_blocks | int64 | Max age of evidence, in blocks. | 1 | +| max_age_duration | [google.protobuf.Duration](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#google.protobuf.Duration) | Max age of evidence, in time. It should correspond with an app's "unbonding period" or other similar mechanism for handling [Nothing-At-Stake attacks](https://github.com/ethereum/wiki/wiki/Proof-of-Stake-FAQ#what-is-the-nothing-at-stake-problem-and-how-can-it-be-fixed). | 2 | +| max_bytes | int64 | maximum size in bytes of total evidence allowed to be entered into a block | 3 | + +### ValidatorParams + +| Name | Type | Description | Field Number | +|---------------|-----------------|-----------------------------------------------------------------------|--------------| +| pub_key_types | repeated string | List of accepted public key types. Uses same naming as `PubKey.Type`. | 1 | + +### VersionParams + +| Name | Type | Description | Field Number | +|-------------|--------|-------------------------------|--------------| +| app_version | uint64 | The ABCI application version. | 1 | + +## Proof + +| Name | Type | Description | Field Number | +|-----------|----------------|-----------------------------------------------|--------------| +| total | int64 | Total number of items. | 1 | +| index | int64 | Index item to prove. | 2 | +| leaf_hash | bytes | Hash of item value. | 3 | +| aunts | repeated bytes | Hashes from leaf's sibling to a root's child. | 4 | diff --git a/spec/core/encoding.md b/spec/core/encoding.md new file mode 100644 index 000000000..c137575d7 --- /dev/null +++ b/spec/core/encoding.md @@ -0,0 +1,300 @@ +# Encoding + +## Protocol Buffers + +Tendermint uses [Protocol Buffers](https://developers.google.com/protocol-buffers), specifically proto3, for all data structures. + +Please see the [Proto3 language guide](https://developers.google.com/protocol-buffers/docs/proto3) for more details. + +## Byte Arrays + +The encoding of a byte array is simply the raw-bytes prefixed with the length of +the array as a `UVarint` (what proto calls a `Varint`). + +For details on varints, see the [protobuf +spec](https://developers.google.com/protocol-buffers/docs/encoding#varints). + +For example, the byte-array `[0xA, 0xB]` would be encoded as `0x020A0B`, +while a byte-array containing 300 entires beginning with `[0xA, 0xB, ...]` would +be encoded as `0xAC020A0B...` where `0xAC02` is the UVarint encoding of 300. + +## Hashing + +Tendermint uses `SHA256` as its hash function. +Objects are always serialized before being hashed. +So `SHA256(obj)` is short for `SHA256(ProtoEncoding(obj))`. + +## Public Key Cryptography + +Tendermint uses Protobuf [Oneof](https://developers.google.com/protocol-buffers/docs/proto3#oneof) +to distinguish between different types public keys, and signatures. +Additionally, for each public key, Tendermint +defines an Address function that can be used as a more compact identifier in +place of the public key. Here we list the concrete types, their names, +and prefix bytes for public keys and signatures, as well as the address schemes +for each PubKey. Note for brevity we don't +include details of the private keys beyond their type and name. + +### Key Types + +Each type specifies it's own pubkey, address, and signature format. + +#### Ed25519 + +The address is the first 20-bytes of the SHA256 hash of the raw 32-byte public key: + +```go +address = SHA256(pubkey)[:20] +``` + +The signature is the raw 64-byte ED25519 signature. + +Tendermint adopted [zip215](https://zips.z.cash/zip-0215) for verification of ed25519 signatures. + +> Note: This change will be released in the next major release of Tendermint-Go (0.35). + +#### Secp256k1 + +The address is the first 20-bytes of the SHA256 hash of the raw 32-byte public key: + +```go +address = SHA256(pubkey)[:20] +``` + +## Other Common Types + +### BitArray + +The BitArray is used in some consensus messages to represent votes received from +validators, or parts received in a block. It is represented +with a struct containing the number of bits (`Bits`) and the bit-array itself +encoded in base64 (`Elems`). + +| Name | Type | +|-------|----------------------------| +| bits | int64 | +| elems | slice of int64 (`[]int64`) | + +Note BitArray receives a special JSON encoding in the form of `x` and `_` +representing `1` and `0`. Ie. the BitArray `10110` would be JSON encoded as +`"x_xx_"` + +### Part + +Part is used to break up blocks into pieces that can be gossiped in parallel +and securely verified using a Merkle tree of the parts. + +Part contains the index of the part (`Index`), the actual +underlying data of the part (`Bytes`), and a Merkle proof that the part is contained in +the set (`Proof`). + +| Name | Type | +|-------|---------------------------| +| index | uint32 | +| bytes | slice of bytes (`[]byte`) | +| proof | [proof](#merkle-proof) | + +See details of SimpleProof, below. + +### MakeParts + +Encode an object using Protobuf and slice it into parts. +Tendermint uses a part size of 65536 bytes, and allows a maximum of 1601 parts +(see `types.MaxBlockPartsCount`). This corresponds to the hard-coded block size +limit of 100MB. + +```go +func MakeParts(block Block) []Part +``` + +## Merkle Trees + +For an overview of Merkle trees, see +[wikipedia](https://en.wikipedia.org/wiki/Merkle_tree) + +We use the RFC 6962 specification of a merkle tree, with sha256 as the hash function. +Merkle trees are used throughout Tendermint to compute a cryptographic digest of a data structure. +The differences between RFC 6962 and the simplest form a merkle tree are that: + +1. leaf nodes and inner nodes have different hashes. + This is for "second pre-image resistance", to prevent the proof to an inner node being valid as the proof of a leaf. + The leaf nodes are `SHA256(0x00 || leaf_data)`, and inner nodes are `SHA256(0x01 || left_hash || right_hash)`. + +2. When the number of items isn't a power of two, the left half of the tree is as big as it could be. + (The largest power of two less than the number of items) This allows new leaves to be added with less + recomputation. For example: + +```md + Simple Tree with 6 items Simple Tree with 7 items + + * * + / \ / \ + / \ / \ + / \ / \ + / \ / \ + * * * * + / \ / \ / \ / \ + / \ / \ / \ / \ + / \ / \ / \ / \ + * * h4 h5 * * * h6 + / \ / \ / \ / \ / \ +h0 h1 h2 h3 h0 h1 h2 h3 h4 h5 +``` + +### MerkleRoot + +The function `MerkleRoot` is a simple recursive function defined as follows: + +```go +// SHA256([]byte{}) +func emptyHash() []byte { + return tmhash.Sum([]byte{}) +} + +// SHA256(0x00 || leaf) +func leafHash(leaf []byte) []byte { + return tmhash.Sum(append(0x00, leaf...)) +} + +// SHA256(0x01 || left || right) +func innerHash(left []byte, right []byte) []byte { + return tmhash.Sum(append(0x01, append(left, right...)...)) +} + +// largest power of 2 less than k +func getSplitPoint(k int) { ... } + +func MerkleRoot(items [][]byte) []byte{ + switch len(items) { + case 0: + return empthHash() + case 1: + return leafHash(items[0]) + default: + k := getSplitPoint(len(items)) + left := MerkleRoot(items[:k]) + right := MerkleRoot(items[k:]) + return innerHash(left, right) + } +} +``` + +Note: `MerkleRoot` operates on items which are arbitrary byte arrays, not +necessarily hashes. For items which need to be hashed first, we introduce the +`Hashes` function: + +```go +func Hashes(items [][]byte) [][]byte { + return SHA256 of each item +} +``` + +Note: we will abuse notion and invoke `MerkleRoot` with arguments of type `struct` or type `[]struct`. +For `struct` arguments, we compute a `[][]byte` containing the protobuf encoding of each +field in the struct, in the same order the fields appear in the struct. +For `[]struct` arguments, we compute a `[][]byte` by protobuf encoding the individual `struct` elements. + +### Merkle Proof + +Proof that a leaf is in a Merkle tree is composed as follows: + +| Name | Type | +|----------|----------------------------| +| total | int64 | +| index | int64 | +| leafHash | slice of bytes (`[]byte`) | +| aunts | Matrix of bytes ([][]byte) | + +Which is verified as follows: + +```golang +func (proof Proof) Verify(rootHash []byte, leaf []byte) bool { + assert(proof.LeafHash, leafHash(leaf) + + computedHash := computeHashFromAunts(proof.Index, proof.Total, proof.LeafHash, proof.Aunts) + return computedHash == rootHash +} + +func computeHashFromAunts(index, total int, leafHash []byte, innerHashes [][]byte) []byte{ + assert(index < total && index >= 0 && total > 0) + + if total == 1{ + assert(len(proof.Aunts) == 0) + return leafHash + } + + assert(len(innerHashes) > 0) + + numLeft := getSplitPoint(total) // largest power of 2 less than total + if index < numLeft { + leftHash := computeHashFromAunts(index, numLeft, leafHash, innerHashes[:len(innerHashes)-1]) + assert(leftHash != nil) + return innerHash(leftHash, innerHashes[len(innerHashes)-1]) + } + rightHash := computeHashFromAunts(index-numLeft, total-numLeft, leafHash, innerHashes[:len(innerHashes)-1]) + assert(rightHash != nil) + return innerHash(innerHashes[len(innerHashes)-1], rightHash) +} +``` + +The number of aunts is limited to 100 (`MaxAunts`) to protect the node against DOS attacks. +This limits the tree size to 2^100 leaves, which should be sufficient for any +conceivable purpose. + +### IAVL+ Tree + +Because Tendermint only uses a Simple Merkle Tree, application developers are expect to use their own Merkle tree in their applications. For example, the IAVL+ Tree - an immutable self-balancing binary tree for persisting application state is used by the [Cosmos SDK](https://github.com/cosmos/cosmos-sdk/blob/ae77f0080a724b159233bd9b289b2e91c0de21b5/docs/interfaces/lite/specification.md) + +## JSON + +Tendermint has its own JSON encoding in order to keep backwards compatibility with the previous RPC layer. + +Registered types are encoded as: + +```json +{ + "type": "", + "value": +} +``` + +For instance, an ED25519 PubKey would look like: + +```json +{ + "type": "tendermint/PubKeyEd25519", + "value": "uZ4h63OFWuQ36ZZ4Bd6NF+/w9fWUwrOncrQsackrsTk=" +} +``` + +Where the `"value"` is the base64 encoding of the raw pubkey bytes, and the +`"type"` is the type name for Ed25519 pubkeys. + +### Signed Messages + +Signed messages (eg. votes, proposals) in the consensus are encoded using protobuf. + +When signing, the elements of a message are re-ordered so the fixed-length fields +are first, making it easy to quickly check the type, height, and round. +The `ChainID` is also appended to the end. +We call this encoding the SignBytes. For instance, SignBytes for a vote is the protobuf encoding of the following struct: + +```protobuf +message CanonicalVote { + SignedMsgType type = 1; + sfixed64 height = 2; // canonicalization requires fixed size encoding here + sfixed64 round = 3; // canonicalization requires fixed size encoding here + CanonicalBlockID block_id = 4; + google.protobuf.Timestamp timestamp = 5; + string chain_id = 6; +} +``` + +The field ordering and the fixed sized encoding for the first three fields is optimized to ease parsing of SignBytes +in HSMs. It creates fixed offsets for relevant fields that need to be read in this context. + +> Note: All canonical messages are length prefixed. + +For more details, see the [signing spec](../consensus/signing.md). +Also, see the motivating discussion in +[#1622](https://github.com/tendermint/tendermint/issues/1622). diff --git a/spec/core/genesis.md b/spec/core/genesis.md new file mode 100644 index 000000000..1dc019e77 --- /dev/null +++ b/spec/core/genesis.md @@ -0,0 +1,34 @@ +# Genesis + +The genesis file is the starting point of a chain. An application will populate the `app_state` field in the genesis with their required fields. Tendermint is not able to validate this section because it is unaware what application state consists of. + +## Genesis Fields + +- `genesis_time`: The genesis time is the time the blockchain started or will start. If nodes are started before this time they will sit idle until the time specified. +- `chain_id`: The chainid is the chain identifier. Every chain should have a unique identifier. When conducting a fork based upgrade, we recommend changing the chainid to avoid network or consensus errors. +- `initial_height`: This field is the starting height of the blockchain. When conducting a chain restart to avoid restarting at height 1, the network is able to start at a specified height. +- `consensus_params` + - `block` + - `max_bytes`: The max amount of bytes a block can be. + - `max_gas`: The maximum amount of gas that a block can have. + - `time_iota_ms`: This parameter has no value anymore in Tendermint-core. + +- `evidence` + - `max_age_num_blocks`: After this preset amount of blocks has passed a single piece of evidence is considered invalid + - `max_age_duration`: After this preset amount of time has passed a single piece of evidence is considered invalid. + - `max_bytes`: The max amount of bytes of all evidence included in a block. + +> Note: For evidence to be considered invalid, evidence must be older than both `max_age_num_blocks` and `max_age_duration` + +- `validator` + - `pub_key_types`: Defines which curves are to be accepted as a valid validator consensus key. Tendermint supports ed25519, sr25519 and secp256k1. + +- `version` + - `app_version`: The version of the application. This is set by the application and is used to identify which version of the app a user should be using in order to operate a node. + +- `validators` + - This is an array of validators. This validator set is used as the starting validator set of the chain. This field can be empty, if the application sets the validator set in `InitChain`. + +- `app_hash`: The applications state root hash. This field does not need to be populated at the start of the chain, the application may provide the needed information via `Initchain`. + +- `app_state`: This section is filled in by the application and is unknown to Tendermint. diff --git a/spec/core/readme.md b/spec/core/readme.md new file mode 100644 index 000000000..46f95f1b7 --- /dev/null +++ b/spec/core/readme.md @@ -0,0 +1,13 @@ +--- +order: 1 +parent: + title: Core + order: 3 +--- + +This section describes the core types and functionality of the Tendermint protocol implementation. + +- [Core Data Structures](./data_structures.md) +- [Encoding](./encoding.md) +- [Genesis](./genesis.md) +- [State](./state.md) diff --git a/spec/core/state.md b/spec/core/state.md new file mode 100644 index 000000000..5138c0950 --- /dev/null +++ b/spec/core/state.md @@ -0,0 +1,121 @@ +# State + +The state contains information whose cryptographic digest is included in block headers, and thus is +necessary for validating new blocks. For instance, the validators set and the results of +transactions are never included in blocks, but their Merkle roots are: +the state keeps track of them. + +The `State` object itself is an implementation detail, since it is never +included in a block or gossiped over the network, and we never compute +its hash. The persistence or query interface of the `State` object +is an implementation detail and not included in the specification. +However, the types in the `State` object are part of the specification, since +the Merkle roots of the `State` objects are included in blocks and values are used during +validation. + +```go +type State struct { + ChainID string + InitialHeight int64 + + LastBlockHeight int64 + LastBlockID types.BlockID + LastBlockTime time.Time + + Version Version + LastResults []Result + AppHash []byte + + LastValidators ValidatorSet + Validators ValidatorSet + NextValidators ValidatorSet + + ConsensusParams ConsensusParams +} +``` + +The chain ID and initial height are taken from the genesis file, and not changed again. The +initial height will be `1` in the typical case, `0` is an invalid value. + +Note there is a hard-coded limit of 10000 validators. This is inherited from the +limit on the number of votes in a commit. + +Further information on [`Validator`'s](./data_structures.md#validator), +[`ValidatorSet`'s](./data_structures.md#validatorset) and +[`ConsensusParams`'s](./data_structures.md#consensusparams) can +be found in [data structures](./data_structures.md) + +## Execution + +State gets updated at the end of executing a block. Of specific interest is `ResponseEndBlock` and +`ResponseCommit` + +```go +type ResponseEndBlock struct { + ValidatorUpdates []ValidatorUpdate `protobuf:"bytes,1,rep,name=validator_updates,json=validatorUpdates,proto3" json:"validator_updates"` + ConsensusParamUpdates *types1.ConsensusParams `protobuf:"bytes,2,opt,name=consensus_param_updates,json=consensusParamUpdates,proto3" json:"consensus_param_updates,omitempty"` + Events []Event `protobuf:"bytes,3,rep,name=events,proto3" json:"events,omitempty"` +} +``` + +where + +```go +type ValidatorUpdate struct { + PubKey crypto.PublicKey `protobuf:"bytes,1,opt,name=pub_key,json=pubKey,proto3" json:"pub_key"` + Power int64 `protobuf:"varint,2,opt,name=power,proto3" json:"power,omitempty"` +} +``` + +and + +```go +type ResponseCommit struct { + // reserve 1 + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` + RetainHeight int64 `protobuf:"varint,3,opt,name=retain_height,json=retainHeight,proto3" json:"retain_height,omitempty"` +} +``` + +`ValidatorUpdates` are used to add and remove validators to the current set as well as update +validator power. Setting validator power to 0 in `ValidatorUpdate` will cause the validator to be +removed. `ConsensusParams` are safely copied across (i.e. if a field is nil it gets ignored) and the +`Data` from the `ResponseCommit` is used as the `AppHash` + +## Version + +```go +type Version struct { + consensus Consensus + software string +} +``` + +[`Consensus`](./data_structures.md#version) contains the protocol version for the blockchain and the +application. + +## Block + +The total size of a block is limited in bytes by the `ConsensusParams.Block.MaxBytes`. +Proposed blocks must be less than this size, and will be considered invalid +otherwise. + +Blocks should additionally be limited by the amount of "gas" consumed by the +transactions in the block, though this is not yet implemented. + +## Evidence + +For evidence in a block to be valid, it must satisfy: + +```go +block.Header.Time-evidence.Time < ConsensusParams.Evidence.MaxAgeDuration && + block.Header.Height-evidence.Height < ConsensusParams.Evidence.MaxAgeNumBlocks +``` + +A block must not contain more than `ConsensusParams.Evidence.MaxBytes` of evidence. This is +implemented to mitigate spam attacks. + +## Validator + +Validators from genesis file and `ResponseEndBlock` must have pubkeys of type ∈ +`ConsensusParams.Validator.PubKeyTypes`. diff --git a/spec/ivy-proofs/Dockerfile b/spec/ivy-proofs/Dockerfile new file mode 100644 index 000000000..be60151fd --- /dev/null +++ b/spec/ivy-proofs/Dockerfile @@ -0,0 +1,37 @@ +# we need python2 support, which was dropped after buster: +FROM debian:buster + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections +RUN apt-get update +RUN apt-get install -y apt-utils + +# Install and configure locale `en_US.UTF-8` +RUN apt-get install -y locales && \ + sed -i -e "s/# $en_US.*/en_US.UTF-8 UTF-8/" /etc/locale.gen && \ + dpkg-reconfigure --frontend=noninteractive locales && \ + update-locale LANG=en_US.UTF-8 +ENV LANG=en_US.UTF-8 + +RUN apt-get update +RUN apt-get install -y git python2 python-pip g++ cmake python-ply python-tk tix pkg-config libssl-dev python-setuptools + +# create a user: +RUN useradd -ms /bin/bash user +USER user +WORKDIR /home/user + +RUN git clone --recurse-submodules https://github.com/kenmcmil/ivy.git +WORKDIR /home/user/ivy/ +RUN git checkout 271ee38980699115508eb90a0dd01deeb750a94b + +RUN python2.7 build_submodules.py +RUN mkdir -p "/home/user/python/lib/python2.7/site-packages" +ENV PYTHONPATH="/home/user/python/lib/python2.7/site-packages" +# need to install pyparsing manually because otherwise wrong version found +RUN pip install pyparsing +RUN python2.7 setup.py install --prefix="/home/user/python/" +ENV PATH=$PATH:"/home/user/python/bin/" +WORKDIR /home/user/tendermint-proof/ + +ENTRYPOINT ["/home/user/tendermint-proof/check_proofs.sh"] + diff --git a/spec/ivy-proofs/README.md b/spec/ivy-proofs/README.md new file mode 100644 index 000000000..00a4bed25 --- /dev/null +++ b/spec/ivy-proofs/README.md @@ -0,0 +1,33 @@ +# Ivy Proofs + +```copyright +Copyright (c) 2020 Galois, Inc. +SPDX-License-Identifier: Apache-2.0 +``` + +## Contents + +This folder contains: + +* `tendermint.ivy`, a specification of Tendermint algorithm as described in *The latest gossip on BFT consensus* by E. Buchman, J. Kwon, Z. Milosevic. +* `abstract_tendermint.ivy`, a more abstract specification of Tendermint that is more verification-friendly. +* `classic_safety.ivy`, a proof that Tendermint satisfies the classic safety property of BFT consensus: if every two quorums have a well-behaved node in common, then no two well-behaved nodes ever disagree. +* `accountable_safety_1.ivy`, a proof that, assuming every quorum contains at least one well-behaved node, if two well-behaved nodes disagree, then there is evidence demonstrating at least f+1 nodes misbehaved. +* `accountable_safety_2.ivy`, a proof that, regardless of any assumption about quorums, well-behaved nodes cannot be framed by malicious nodes. In other words, malicious nodes can never construct evidence that incriminates a well-behaved node. +* `network_shim.ivy`, the network model and a convenience `shim` object to interface with the Tendermint specification. +* `domain_model.ivy`, a specification of the domain model underlying the Tendermint specification, i.e. rounds, value, quorums, etc. + +All specifications and proofs are written in [Ivy](https://github.com/kenmcmil/ivy). + +The license above applies to all files in this folder. + + +## Building and running + +The easiest way to check the proofs is to use [Docker](https://www.docker.com/). + +1. Install [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/). +2. Build a Docker image: `docker-compose build` +3. Run the proofs inside the Docker container: `docker-compose run +tendermint-proof`. This will check all the proofs with the `ivy_check` +command and write the output of `ivy_check` to a subdirectory of `./output/' diff --git a/spec/ivy-proofs/abstract_tendermint.ivy b/spec/ivy-proofs/abstract_tendermint.ivy new file mode 100644 index 000000000..4a160be2a --- /dev/null +++ b/spec/ivy-proofs/abstract_tendermint.ivy @@ -0,0 +1,178 @@ +#lang ivy1.7 +# --- +# layout: page +# title: Abstract specification of Tendermint in Ivy +# --- + +# Here we define an abstract version of the Tendermint specification. We use +# two main forms of abstraction: a) We abstract over how information is +# transmitted (there is no network). b) We abstract functions using relations. +# For example, we abstract over a node's current round, instead only tracking +# with a relation which rounds the node has left. We do something similar for +# the `lockedRound` variable. This is in order to avoid using a function from +# node to round, and it allows us to emit verification conditions that are +# efficiently solvable by Z3. + +# This specification also defines the observations that are used to adjudicate +# misbehavior. Well-behaved nodes faithfully observe every message that they +# use to take a step, while Byzantine nodes can fake observations about +# themselves (including withholding observations). Misbehavior is defined using +# the collection of all observations made (in reality, those observations must +# be collected first, but we do not model this process). + +include domain_model + +module abstract_tendermint = { + +# Protocol state +# ############## + + relation left_round(N:node, R:round) + relation prevoted(N:node, R:round, V:value) + relation precommitted(N:node, R:round, V:value) + relation decided(N:node, R:round, V:value) + relation locked(N:node, R:round, V:value) + +# Accountability relations +# ######################## + + relation observed_prevoted(N:node, R:round, V:value) + relation observed_precommitted(N:node, R:round, V:value) + +# relations that are defined in terms of the previous two: + relation observed_equivocation(N:node) + relation observed_unlawful_prevote(N:node) + relation agreement + relation accountability_violation + + object defs = { # we hide those definitions and use them only when needed + private { + definition [observed_equivocation_def] observed_equivocation(N) = exists V1,V2,R . + V1 ~= V2 & (observed_precommitted(N,R,V1) & observed_precommitted(N,R,V2) | observed_prevoted(N,R,V1) & observed_prevoted(N,R,V2)) + + definition [observed_unlawful_prevote_def] observed_unlawful_prevote(N) = exists V1,V2,R1,R2 . + V1 ~= value.nil & V2 ~= value.nil & V1 ~= V2 & R1 < R2 & observed_precommitted(N,R1,V1) & observed_prevoted(N,R2,V2) + & forall Q,R . R1 <= R & R < R2 & nset.is_quorum(Q) -> exists N2 . nset.member(N2,Q) & ~observed_prevoted(N2,R,V2) + + definition [agreement_def] agreement = forall N1,N2,R1,R2,V1,V2 . well_behaved(N1) & well_behaved(N2) & decided(N1,R1,V1) & decided(N2,R2,V2) -> V1 = V2 + + definition [accountability_violation_def] accountability_violation = exists Q1,Q2 . nset.is_quorum(Q1) & nset.is_quorum(Q2) & (forall N . nset.member(N,Q1) & nset.member(N,Q2) -> observed_equivocation(N) | observed_unlawful_prevote(N)) + } + } + +# Protocol transitions +# #################### + + after init { + left_round(N,R) := R < 0; + prevoted(N,R,V) := false; + precommitted(N,R,V) := false; + decided(N,R,V) := false; + locked(N,R,V) := false; + + observed_prevoted(N,R,V) := false; + observed_precommitted(N,R,V) := false; + } + +# Actions are named after the corresponding line numbers in the Tendermint +# arXiv paper. + + action l_11(n:node, r:round) = { # start round r + require ~left_round(n,r); + left_round(n,R) := R < r; + } + + action l_22(n:node, rp:round, v:value) = { + require ~left_round(n,rp); + require ~prevoted(n,rp,V) & ~precommitted(n,rp,V); + require (forall R,V . locked(n,R,V) -> V = v) | v = value.nil; + prevoted(n, rp, v) := true; + left_round(n, R) := R < rp; # leave all lower rounds. + + observed_prevoted(n, rp, v) := observed_prevoted(n, rp, v) | well_behaved(n); # the node observes itself + } + + action l_28(n:node, rp:round, v:value, vr:round, q:nset) = { + require ~left_round(n,rp) & ~prevoted(n,rp,V); + require ~prevoted(n,rp,V) & ~precommitted(n,rp,V); + require vr < rp; + require nset.is_quorum(q) & (forall N . nset.member(N,q) -> (prevoted(N,vr,v) | ~well_behaved(N))); + var proposal:value; + if value.valid(v) & ((forall R0,V0 . locked(n,R0,V0) -> R0 <= vr) | (forall R,V . locked(n,R,V) -> V = v)) { + proposal := v; + } + else { + proposal := value.nil; + }; + prevoted(n, rp, proposal) := true; + left_round(n, R) := R < rp; # leave all lower rounds + + observed_prevoted(N, vr, v) := observed_prevoted(N, vr, v) | (well_behaved(n) & nset.member(N,q)); # the node observes the prevotes of quorum q + observed_prevoted(n, rp, proposal) := observed_prevoted(n, rp, proposal) | well_behaved(n); # the node observes itself + } + + action l_36(n:node, rp:round, v:value, q:nset) = { + require v ~= value.nil; + require ~left_round(n,rp); + require exists V . prevoted(n,rp,V); + require ~precommitted(n,rp,V); + require nset.is_quorum(q) & (forall N . nset.member(N,q) -> (prevoted(N,rp,v) | ~well_behaved(N))); + precommitted(n, rp, v) := true; + left_round(n, R) := R < rp; # leave all lower rounds + locked(n,R,V) := R <= rp & V = v; + + observed_prevoted(N, rp, v) := observed_prevoted(N, rp, v) | (well_behaved(n) & nset.member(N,q)); # the node observes the prevotes of quorum q + observed_precommitted(n, rp, v) := observed_precommitted(n, rp, v) | well_behaved(n); # the node observes itself + } + + action l_44(n:node, rp:round, q:nset) = { + require ~left_round(n,rp); + require ~precommitted(n,rp,V); + require nset.is_quorum(q) & (forall N .nset.member(N,q) -> (prevoted(N,rp,value.nil) | ~well_behaved(N))); + precommitted(n, rp, value.nil) := true; + left_round(n, R) := R < rp; # leave all lower rounds + + observed_prevoted(N, rp, value.nil) := observed_prevoted(N, rp, value.nil) | (well_behaved(n) & nset.member(N,q)); # the node observes the prevotes of quorum q + observed_precommitted(n, rp, value.nil) := observed_precommitted(n, rp, value.nil) | well_behaved(n); # the node observes itself + } + + action l_57(n:node, rp:round) = { + require ~left_round(n,rp); + require ~prevoted(n,rp,V); + prevoted(n, rp, value.nil) := true; + left_round(n, R) := R < rp; # leave all lower rounds + + observed_prevoted(n, rp, value.nil) := observed_prevoted(n, rp, value.nil) | well_behaved(n); # the node observes itself + } + + action l_61(n:node, rp:round) = { + require ~left_round(n,rp); + require ~precommitted(n,rp,V); + precommitted(n, rp, value.nil) := true; + left_round(n, R) := R < rp; # leave all lower rounds + + observed_precommitted(n, rp, value.nil) := observed_precommitted(n, rp, value.nil) | well_behaved(n); # the node observes itself + } + + action decide(n:node, r:round, v:value, q:nset) = { + require v ~= value.nil; + require nset.is_quorum(q) & (forall N . nset.member(N, q) -> (precommitted(N, r, v) | ~well_behaved(N))); + decided(n, r, v) := true; + + observed_precommitted(N, r, v) := observed_precommitted(N, r, v) | (well_behaved(n) & nset.member(N,q)); # the node observes the precommits of quorum q + + } + + action misbehave = { +# Byzantine nodes can claim they observed whatever they want about themselves, +# but they cannot remove observations. Note that we use assume because we don't +# want those to be checked; we just want them to be true (that's the model of +# Byzantine behavior). + observed_prevoted(N,R,V) := *; + assume (old observed_prevoted(N,R,V)) -> observed_prevoted(N,R,V); + assume well_behaved(N) -> old observed_prevoted(N,R,V) = observed_prevoted(N,R,V); + observed_precommitted(N,R,V) := *; + assume (old observed_precommitted(N,R,V)) -> observed_precommitted(N,R,V); + assume well_behaved(N) -> old observed_precommitted(N,R,V) = observed_precommitted(N,R,V); + } +} diff --git a/spec/ivy-proofs/accountable_safety_1.ivy b/spec/ivy-proofs/accountable_safety_1.ivy new file mode 100644 index 000000000..02bdf1add --- /dev/null +++ b/spec/ivy-proofs/accountable_safety_1.ivy @@ -0,0 +1,143 @@ +#lang ivy1.7 +# --- +# layout: page +# title: Proof of Classic Safety +# --- + +include tendermint +include abstract_tendermint + +# Here we prove the first accountability property: if two well-behaved nodes +# disagree, then there are two quorums Q1 and Q2 such that all members of the +# intersection of Q1 and Q2 have violated the accountability properties. + +# The proof is done in two steps: first we prove the abstract specification +# satisfies the property, and then we show by refinement that this property +# also holds in the concrete specification. + +# To see what is checked in the refinement proof, use `ivy_show isolate=accountable_safety_1 accountable_safety_1.ivy` +# To see what is checked in the abstract correctness proof, use `ivy_show isolate=abstract_accountable_safety_1 accountable_safety_1.ivy` +# To check the whole proof, use `ivy_check accountable_safety_1.ivy`. + + +# Proof of the accountability property in the abstract specification +# ================================================================== + +# We prove with tactics (see `lemma_1` and `lemma_2`) that, if some basic +# invariants hold (see `invs` below), then the accountability property holds. + +isolate abstract_accountable_safety = { + + instantiate abstract_tendermint + +# The main property +# ----------------- + +# If there is disagreement, then there is evidence that a third of the nodes +# have violated the protocol: + invariant [accountability] agreement | accountability_violation + proof { + apply lemma_1.thm # this reduces to goal to three subgoals: p1, p2, and p3 (see their definition below) + proof [p1] { + assume invs.inv1 + } + proof [p2] { + assume invs.inv2 + } + proof [p3] { + assume invs.inv3 + } + } + +# The invariants +# -------------- + + isolate invs = { + + # well-behaved nodes observe their own actions faithfully: + invariant [inv1] well_behaved(N) -> (observed_precommitted(N,R,V) = precommitted(N,R,V)) + # if a value is precommitted by a well-behaved node, then a quorum is observed to prevote it: + invariant [inv2] (exists N . well_behaved(N) & precommitted(N,R,V)) & V ~= value.nil -> exists Q . nset.is_quorum(Q) & forall N2 . nset.member(N2,Q) -> observed_prevoted(N2,R,V) + # if a value is decided by a well-behaved node, then a quorum is observed to precommit it: + invariant [inv3] (exists N . well_behaved(N) & decided(N,R,V)) -> 0 <= R & V ~= value.nil & exists Q . nset.is_quorum(Q) & forall N2 . nset.member(N2,Q) -> observed_precommitted(N2,R,V) + private { + invariant (precommitted(N,R,V) | prevoted(N,R,V)) -> 0 <= R + invariant R < 0 -> left_round(N,R) + } + + } with this, nset, round, accountable_bft.max_2f_byzantine + +# The theorems proved with tactics +# -------------------------------- + +# Using complete induction on rounds, we prove that, assuming that the +# invariants inv1, inv2, and inv3 hold, the accountability property holds. + +# For technical reasons, we separate the proof in two steps + isolate lemma_1 = { + + specification { + theorem [thm] { + property [p1] forall N,R,V . well_behaved(N) -> (observed_precommitted(N,R,V) = precommitted(N,R,V)) + property [p2] forall R,V . (exists N . well_behaved(N) & precommitted(N,R,V)) & V ~= value.nil -> exists Q . nset.is_quorum(Q) & forall N2 . nset.member(N2,Q) -> observed_prevoted(N2,R,V) + property [p3] forall R,V. (exists N . well_behaved(N) & decided(N,R,V)) -> 0 <= R & V ~= value.nil & exists Q . nset.is_quorum(Q) & forall N2 . nset.member(N2,Q) -> observed_precommitted(N2,R,V) + #------------------------------------------------------------------------------------------------------------------------------------------- + property agreement | accountability_violation + } + proof { + assume inductive_property # the theorem follows from what we prove by induction below + } + } + + implementation { + # complete induction is not built-in, so we introduce it with an axiom. Note that this only holds for a type where 0 is the smallest element + axiom [complete_induction] { + relation p(X:round) + { # base case + property p(0) + } + { # inductive step: show that if the property is true for all X lower or equal to x and y=x+1, then the property is true of y + individual a:round + individual b:round + property (forall X. 0 <= X & X <= a -> p(X)) & round.succ(a,b) -> p(b) + } + #-------------------------- + property forall X . 0 <= X -> p(X) + } + + # The main lemma: if inv1 and inv2 below hold and a quorum is observed to + # precommit V1 at R1 and another quorum is observed to precommit V2~=V1 at + # R2>=R1, then the intersection of two quorums (i.e. f+1 nodes) is observed to + # violate the protocol. We prove this by complete induction on R2. + theorem [inductive_property] { + property [p1] forall N,R,V . well_behaved(N) -> (observed_precommitted(N,R,V) = precommitted(N,R,V)) + property [p2] forall R,V . (exists N . well_behaved(N) & precommitted(N,R,V)) -> V = value.nil | exists Q . nset.is_quorum(Q) & forall N2 . nset.member(N2,Q) -> observed_prevoted(N2,R,V) + #----------------------------------------------------------------------------------------------------------------------- + property forall R2. 0 <= R2 -> ((exists V2,Q1,R1,V1,Q1 . V1 ~= value.nil & V2 ~= value.nil & V1 ~= V2 & 0 <= R1 & R1 <= R2 & nset.is_quorum(Q1) & (forall N . nset.member(N,Q1) -> observed_precommitted(N,R1,V1)) & (exists Q2 . nset.is_quorum(Q2) & forall N . nset.member(N,Q2) -> observed_prevoted(N,R2,V2))) -> accountability_violation) + } + proof { + apply complete_induction # the two subgoals (base case and inductive case) are then discharged automatically + # NOTE: this can take a long time depending on the SMT random seed (to try a different seed, use `ivy_check seed=$RANDOM` + } + } + } with this, round, nset, accountable_bft.max_2f_byzantine, defs.observed_equivocation_def, defs.observed_unlawful_prevote_def, defs.accountability_violation_def, defs.agreement_def + +} with round + +# The final proof +# =============== + +isolate accountable_safety_1 = { + +# First we instantiate the concrete protocol: + instantiate tendermint(abstract_accountable_safety) + +# We then define what we mean by agreement + relation agreement + definition [agreement_def] agreement = forall N1,N2. well_behaved(N1) & well_behaved(N2) & server.decision(N1) ~= value.nil & server.decision(N2) ~= value.nil -> server.decision(N1) = server.decision(N2) + + invariant abstract_accountable_safety.agreement -> agreement + + invariant [accountability] agreement | abstract_accountable_safety.accountability_violation + +} with value, round, proposers, shim, abstract_accountable_safety, abstract_accountable_safety.defs.agreement_def, accountable_safety_1.agreement_def diff --git a/spec/ivy-proofs/accountable_safety_2.ivy b/spec/ivy-proofs/accountable_safety_2.ivy new file mode 100644 index 000000000..7fb928909 --- /dev/null +++ b/spec/ivy-proofs/accountable_safety_2.ivy @@ -0,0 +1,52 @@ +#lang ivy1.7 + +include tendermint +include abstract_tendermint + +# Here we prove the second accountability property: no well-behaved node is +# ever observed to violate the accountability properties. + +# The proof is done in two steps: first we prove the the abstract specification +# satisfies the property, and then we show by refinement that this property +# also holds in the concrete specification. + +# To see what is checked in the refinement proof, use `ivy_show isolate=accountable_safety_2 accountable_safety_2.ivy` +# To see what is checked in the abstract correctness proof, use `ivy_show isolate=abstract_accountable_safety_2 accountable_safety_2.ivy` +# To check the whole proof, use `ivy_check complete=fo accountable_safety_2.ivy`. + +# Proof that the property holds in the abstract specification +# ============================================================ + +isolate abstract_accountable_safety_2 = { + + instantiate abstract_tendermint + +# the main property: + invariant [wb_never_punished] well_behaved(N) -> ~(observed_equivocation(N) | observed_unlawful_prevote(N)) + +# the main invariant for proving wb_not_punished: + invariant well_behaved(N) & precommitted(N,R,V) & ~locked(N,R,V) & V ~= value.nil -> exists R2,V2 . V2 ~= value.nil & R < R2 & precommitted(N,R2,V2) & locked(N,R2,V2) + + invariant (exists N . well_behaved(N) & precommitted(N,R,V) & V ~= value.nil) -> exists Q . nset.is_quorum(Q) & forall N . nset.member(N,Q) -> observed_prevoted(N,R,V) + + invariant well_behaved(N) -> (observed_prevoted(N,R,V) <-> prevoted(N,R,V)) + invariant well_behaved(N) -> (observed_precommitted(N,R,V) <-> precommitted(N,R,V)) + +# nodes stop prevoting or precommitting in lower rounds when doing so in a higher round: + invariant well_behaved(N) & prevoted(N,R2,V2) & R1 < R2 -> left_round(N,R1) + invariant well_behaved(N) & locked(N,R2,V2) & R1 < R2 -> left_round(N,R1) + + invariant [precommit_unique_per_round] well_behaved(N) & precommitted(N,R,V1) & precommitted(N,R,V2) -> V1 = V2 + +} with nset, round, abstract_accountable_safety_2.defs.observed_equivocation_def, abstract_accountable_safety_2.defs.observed_unlawful_prevote_def + +# Proof that the property holds in the concrete specification +# =========================================================== + +isolate accountable_safety_2 = { + + instantiate tendermint(abstract_accountable_safety_2) + + invariant well_behaved(N) -> ~(abstract_accountable_safety_2.observed_equivocation(N) | abstract_accountable_safety_2.observed_unlawful_prevote(N)) + +} with round, value, shim, abstract_accountable_safety_2, abstract_accountable_safety_2.defs.observed_equivocation_def, abstract_accountable_safety_2.defs.observed_unlawful_prevote_def diff --git a/spec/ivy-proofs/check_proofs.sh b/spec/ivy-proofs/check_proofs.sh new file mode 100755 index 000000000..6afd1a962 --- /dev/null +++ b/spec/ivy-proofs/check_proofs.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# returns non-zero error code if any proof fails + +success=0 +log_dir=$(cat /dev/urandom | tr -cd 'a-f0-9' | head -c 6) +cmd="ivy_check seed=$RANDOM" +mkdir -p output/$log_dir + +echo "Checking classic safety:" +res=$($cmd classic_safety.ivy | tee "output/$log_dir/classic_safety.txt" | tail -n 1) +if [ "$res" = "OK" ]; then + echo "OK" +else + echo "FAILED" + success=1 +fi + +echo "Checking accountable safety 1:" +res=$($cmd accountable_safety_1.ivy | tee "output/$log_dir/accountable_safety_1.txt" | tail -n 1) +if [ "$res" = "OK" ]; then + echo "OK" +else + echo "FAILED" + success=1 +fi + +echo "Checking accountable safety 2:" +res=$($cmd complete=fo accountable_safety_2.ivy | tee "output/$log_dir/accountable_safety_2.txt" | tail -n 1) +if [ "$res" = "OK" ]; then + echo "OK" +else + echo "FAILED" + success=1 +fi + +echo +echo "See ivy_check output in the output/ folder" +exit $success diff --git a/spec/ivy-proofs/classic_safety.ivy b/spec/ivy-proofs/classic_safety.ivy new file mode 100644 index 000000000..b422a2c17 --- /dev/null +++ b/spec/ivy-proofs/classic_safety.ivy @@ -0,0 +1,85 @@ +#lang ivy1.7 +# --- +# layout: page +# title: Proof of Classic Safety +# --- + +include tendermint +include abstract_tendermint + +# Here we prove the classic safety property: assuming that every two quorums +# have a well-behaved node in common, no two well-behaved nodes ever disagree. + +# The proof is done in two steps: first we prove the the abstract specification +# satisfies the property, and then we show by refinement that this property +# also holds in the concrete specification. + +# To see what is checked in the refinement proof, use `ivy_show isolate=classic_safety classic_safety.ivy` +# To see what is checked in the abstract correctness proof, use `ivy_show isolate=abstract_classic_safety classic_safety.ivy` + +# To check the whole proof, use `ivy_check classic_safety.ivy`. + +# Note that all the verification conditions sent to Z3 for this proof are in +# EPR. + +# Classic safety in the abstract model +# ==================================== + +# We start by proving that classic safety holds in the abstract model. + +isolate abstract_classic_safety = { + + instantiate abstract_tendermint + + invariant [classic_safety] classic_bft.quorum_intersection & decided(N1,R1,V1) & decided(N2,R2,V2) -> V1 = V2 + +# The notion of choosable value +# ----------------------------- + + relation choosable(R:round, V:value) + definition choosable(R,V) = exists Q . nset.is_quorum(Q) & forall N . well_behaved(N) & nset.member(N,Q) -> ~left_round(N,R) | precommitted(N,R,V) + +# Main invariants +# --------------- + +# `classic_safety` is inductive relative to those invariants + + invariant [decision_is_quorum_precommit] (exists N1 . decided(N1,R,V)) -> exists Q. nset.is_quorum(Q) & forall N2. well_behaved(N2) & nset.member(N2, Q) -> precommitted(N2,R,V) + + invariant [precommitted_is_quorum_prevote] V ~= value.nil & (exists N1 . precommitted(N1,R,V)) -> exists Q. nset.is_quorum(Q) & forall N2. well_behaved(N2) & nset.member(N2, Q) -> prevoted(N2,R,V) + + invariant [prevote_unique_per_round] prevoted(N,R,V1) & prevoted(N,R,V2) -> V1 = V2 + +# This is the core invariant: as long as a precommitted value is still choosable, it remains protected by a lock and prevents any new value from being prevoted: + invariant [locks] classic_bft.quorum_intersection & V ~= value.nil & precommitted(N,R,V) & choosable(R,V) -> locked(N,R,V) & forall R2,V2 . R < R2 & prevoted(N,R2,V2) -> V2 = V | V2 = value.nil + +# Supporting invariants +# --------------------- + +# The main invariants are inductive relative to those + + invariant decided(N,R,V) -> V ~= value.nil + + invariant left_round(N,R2) & R1 < R2 -> left_round(N,R1) # if a node left round R2>R1, then it also left R1: + + invariant prevoted(N,R2,V2) & R1 < R2 -> left_round(N,R1) + invariant precommitted(N,R2,V2) & R1 < R2 -> left_round(N,R1) + +} with round, nset, classic_bft.quorum_intersection_def + +# The refinement proof +# ==================== + +# Now, thanks to the refinement relation that we establish in +# `concrete_tendermint.ivy`, we prove that classic safety transfers to the +# concrete specification: +isolate classic_safety = { + + # We instantiate the `tendermint` module providing `abstract_classic_safety` as abstract model. + instantiate tendermint(abstract_classic_safety) + + # We prove that if every two quorums have a well-behaved node in common, + # then well-behaved nodes never disagree: + invariant [classic_safety] classic_bft.quorum_intersection & server.decision(N1) ~= value.nil & server.decision(N2) ~= value.nil -> server.decision(N1) = server.decision(N2) + +} with value, round, proposers, shim, abstract_classic_safety # here we list all the specifications that we rely on for this proof diff --git a/spec/ivy-proofs/count_lines.sh b/spec/ivy-proofs/count_lines.sh new file mode 100755 index 000000000..b2c457e21 --- /dev/null +++ b/spec/ivy-proofs/count_lines.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +r='^\s*$\|^\s*\#\|^\s*\}\s*$\|^\s*{\s*$' # removes comments and blank lines and lines that contain only { or } +N1=`cat tendermint.ivy domain_model.ivy network_shim.ivy | grep -v $r'\|.*invariant.*' | wc -l` +N2=`cat abstract_tendermint.ivy | grep "observed_" | wc -l` # the observed_* variables specify the observations of the nodes +SPEC_LINES=`expr $N1 + $N2` +echo "spec lines: $SPEC_LINES" +N3=`cat abstract_tendermint.ivy | grep -v $r'\|.*observed_.*' | wc -l` +N4=`cat accountable_safety_1.ivy | grep -v $r | wc -l` +PROOF_LINES=`expr $N3 + $N4` +echo "proof lines: $PROOF_LINES" +RATIO=`bc <<< "scale=2;$PROOF_LINES / $SPEC_LINES"` +echo "proof-to-code ratio for the accountable-safety property: $RATIO" diff --git a/spec/ivy-proofs/docker-compose.yml b/spec/ivy-proofs/docker-compose.yml new file mode 100644 index 000000000..e0612d4b1 --- /dev/null +++ b/spec/ivy-proofs/docker-compose.yml @@ -0,0 +1,7 @@ +version: '3' +services: + tendermint-proof: + build: . + volumes: + - ./:/home/user/tendermint-proof:ro + - ./output:/home/user/tendermint-proof/output:rw diff --git a/spec/ivy-proofs/domain_model.ivy b/spec/ivy-proofs/domain_model.ivy new file mode 100644 index 000000000..0f12f7288 --- /dev/null +++ b/spec/ivy-proofs/domain_model.ivy @@ -0,0 +1,143 @@ +#lang ivy1.7 + +include order # this is a file from the standard library (`ivy/ivy/include/1.7/order.ivy`) + +isolate round = { + type this + individual minus_one:this + relation succ(R1:round, R2:round) + action incr(i:this) returns (j:this) + specification { +# to simplify verification, we treat rounds as an abstract totally ordered set with a successor relation. + instantiate totally_ordered(this) + property minus_one < 0 + property succ(X,Z) -> (X < Z & ~(X < Y & Y < Z)) + after incr { + ensure succ(i,j) + } + } + implementation { +# here we prove that the abstraction is sound. + interpret this -> int # rounds are integers in the Tendermint specification. + definition minus_one = 0-1 + definition succ(R1,R2) = R2 = R1 + 1 + implement incr { + j := i+1; + } + } +} + +instance node : iterable # nodes are a set with an order, that can be iterated over (see order.ivy in the standard library) + +relation well_behaved(N:node) # whether a node is well-behaved or not. NOTE: Used only in the proof and the Byzantine model; Nodes do know know who is well-behaved and who is not. + +isolate proposers = { + # each round has a unique proposer in Tendermint. In order to avoid a + # function from round to node (which makes verification more difficult), we + # abstract over this function using a relation. + relation is_proposer(N:node, R:round) + export action get_proposer(r:round) returns (n:node) + specification { + property is_proposer(N1,R) & is_proposer(N2,R) -> N1 = N2 + after get_proposer { + ensure is_proposer(n,r); + } + } + implementation { + function f(R:round):node + definition f(r:round) = <<>> + definition is_proposer(N,R) = N = f(R) + implement get_proposer { + n := f(r); + } + } +} + +isolate value = { # the type of values + type this + relation valid(V:value) + individual nil:value + specification { + property ~valid(nil) + } + implementation { + interpret value -> bv[2] + definition nil = <<< -1 >>> # let's say nil is -1 + definition valid(V) = V ~= nil + } +} + +object nset = { # the type of node sets + type this # a set of N=3f+i nodes for 0 + #include + namespace hash_space { + template + class hash > { + public: + size_t operator()(const std::set &s) const { + hash h; + size_t res = 0; + for (const T &e : s) + res += h(e); + return res; + } + }; + } + >>> + interpret nset -> <<< std::set<`node`> >>> + definition member(n:node, s:nset) = <<< `s`.find(`n`) != `s`.end() >>> + definition is_quorum(s:nset) = <<< 3*`s`.size() > 2*`node.size` >>> + definition is_blocking(s:nset) = <<< 3*`s`.size() > `node.size` >>> + implement empty { + <<< + >>> + } + implement insert { + <<< + `t` = `s`; + `t`.insert(`n`); + >>> + } + <<< encode `nset` + + std::ostream &operator <<(std::ostream &s, const `nset` &a) { + s << "{"; + for (auto iter = a.begin(); iter != a.end(); iter++) { + if (iter != a.begin()) s << ", "; + s << *iter; + } + s << "}"; + return s; + } + + template <> + `nset` _arg<`nset`>(std::vector &args, unsigned idx, long long bound) { + throw std::invalid_argument("Not implemented"); // no syntax for nset values in the REPL + } + + >>> + } +} + +object classic_bft = { + relation quorum_intersection + private { + definition [quorum_intersection_def] quorum_intersection = forall Q1,Q2. exists N. well_behaved(N) & nset.member(N, Q1) & nset.member(N, Q2) # every two quorums have a well-behaved node in common + } +} + +trusted isolate accountable_bft = { + # this is our baseline assumption about quorums: + private { + property [max_2f_byzantine] exists N . well_behaved(N) & nset.member(N,Q) # every quorum has a well-behaved member + } +} diff --git a/spec/ivy-proofs/network_shim.ivy b/spec/ivy-proofs/network_shim.ivy new file mode 100644 index 000000000..ebc3a04fc --- /dev/null +++ b/spec/ivy-proofs/network_shim.ivy @@ -0,0 +1,133 @@ +#lang ivy1.7 +# --- +# layout: page +# title: Network model and network shim +# --- + +# Here we define a network module, which is our model of the network, and a +# shim module that sits on top of the network and which, upon receiving a +# message, calls the appropriate protocol handler. + +include domain_model + +# Here we define an enumeration type for identifying the 3 different types of +# messages that nodes send. +object msg_kind = { # TODO: merge with step_t + type this = {proposal, prevote, precommit} +} + +# Here we define the type of messages `msg`. Its members are structs with the fields described below. +object msg = { + type this = struct { + m_kind : msg_kind, + m_src : node, + m_round : round, + m_value : value, + m_vround : round + } +} + +# This is our model of the network: +isolate net = { + + export action recv(dst:node,v:msg) + action send(src:node,dst:node,v:msg) + # Note that the `recv` action is exported, meaning that it can be called + # non-deterministically by the environment any time it is enabled. In other + # words, a packet that is in flight can be received at any time. In this + # sense, the network is fully asynchronous. Moreover, there is no + # requirement that a given message will be received at all. + + # The state of the network consists of all the packets that have been + # sent so far, along with their destination. + relation sent(V:msg, N:node) + + after init { + sent(V, N) := false + } + + before send { + sent(v,dst) := true + } + + before recv { + require sent(v,dst) # only sent messages can be received. + } +} + +# The network shim sits on top of the network and, upon receiving a message, +# calls the appropriate protocol handler. It also exposes a `broadcast` action +# that sends to all nodes. + +isolate shim = { + + # In order not repeat the same code for each handler, we use a handler + # module parameterized by the type of message it will handle. Below we + # instantiate this module for the 3 types of messages of Tendermint + module handler(p_kind) = { + action handle(dst:node,m:msg) + object spec = { + before handle { + assert sent(m,dst) & m.m_kind = p_kind + } + } + } + + instance proposal_handler : handler(msg_kind.proposal) + instance prevote_handler : handler(msg_kind.prevote) + instance precommit_handler : handler(msg_kind.precommit) + + relation sent(M:msg,N:node) + + action broadcast(src:node,m:msg) + action send(src:node,dst:node,m:msg) + + specification { + after init { + sent(M,D) := false; + } + before broadcast { + sent(m,D) := true + } + before send { + sent(m,dst) := true + } + } + + # Here we give an implementation of it that satisfies its specification: + implementation { + + implement net.recv(dst:node,m:msg) { + + if m.m_kind = msg_kind.proposal { + call proposal_handler.handle(dst,m) + } + else if m.m_kind = msg_kind.prevote { + call prevote_handler.handle(dst,m) + } + else if m.m_kind = msg_kind.precommit { + call precommit_handler.handle(dst,m) + } + } + + implement broadcast { # broadcast sends to all nodes, including the sender. + var iter := node.iter.create(0); + while ~iter.is_end + invariant net.sent(M,D) -> sent(M,D) + { + var n := iter.val; + call net.send(src,n,m); + iter := iter.next; + } + } + + implement send { + call net.send(src,dst,m) + } + + private { + invariant net.sent(M,D) -> sent(M,D) + } + } + +} with net, node # to prove that the shim implementation satisfies the shim specification, we rely on the specification of net and node. diff --git a/spec/ivy-proofs/output/.gitignore b/spec/ivy-proofs/output/.gitignore new file mode 100644 index 000000000..5e7d2734c --- /dev/null +++ b/spec/ivy-proofs/output/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/spec/ivy-proofs/tendermint.ivy b/spec/ivy-proofs/tendermint.ivy new file mode 100644 index 000000000..b7678bef9 --- /dev/null +++ b/spec/ivy-proofs/tendermint.ivy @@ -0,0 +1,420 @@ +#lang ivy1.7 +# --- +# layout: page +# title: Specification of Tendermint in Ivy +# --- + +# This specification closely follows the pseudo-code given in "The latest +# gossip on BFT consensus" by E. Buchman, J. Kwon, Z. Milosevic +# + +include domain_model +include network_shim + +# We model the Tendermint protocol as an Ivy object. Like in Object-Oriented +# Programming, the basic structuring unit in Ivy is the object. Objects have +# internal state and actions (i.e. methods in OO parlance) that modify their +# state. We model Tendermint as an object whose actions represent steps taken +# by individual nodes in the protocol. Actions in Ivy can have preconditions, +# and a valid execution is a sequence of actions whose preconditions are all +# satisfied in the state in which they are called. + +# For technical reasons, we define below a `tendermint` module instead of an +# object. Ivy modules are a little bit like classes in OO programs, and like +# classes they can be instantiated to obtain objects. To instantiate the +# `tendermint` module, we must provide an abstract-protocol object. This allows +# us to use different abstract-protocol objects for different parts of the +# proof, and to do so without too much notational burden (we could have used +# Ivy monitors, but then we would need to prefix every variable name by the +# name of the object containing it, which clutters things a bit compared to the +# approach we took). + +# The abstract-protocol object is called by the resulting tendermint object so +# as to run the abstract protocol alongside the concrete protocol. This allows +# us to transfer properties proved of the abstract protocol to the concrete +# protocol, as follows. First, we prove that running the abstract protocol in +# this way results in a valid execution of the abstract protocol. This is done +# by checking that all preconditions of the abstract actions are satisfied at +# their call sites. Second, we establish a relation between abstract state and +# concrete state (in the form of invariants of the resulting, two-object +# transition system) that allow us to transfer properties proved in the +# abstract protocol to the concrete protocol (for example, we prove that any +# decision made in the Tendermint protocol is also made in the abstract +# protocol; if the abstract protocol satisfies the agreement property, this +# allows us to conclude that the Tendermint protocol also does). + +# The abstract protocol object that we will use is always the same, and only +# the abstract properties that we prove about it change in the different +# instantiations of the `tendermint` module. Thus we provide common invariants +# that a) allow to prove that the abstract preconditions are met, and b) +# provide a refinement relation (see end of the module) relating the state of +# Tendermint to the state of the abstract protocol. + +# In the model, Byzantine nodes can send whatever messages they want, except +# that they cannot forge sender identities. This reflects the fact that, in +# practice, nodes use public key cryptography to sign their messages. + +# Finally, note that the observations that serve to adjudicate misbehavior are +# defined only in the abstract protocol (they happen in the abstract actions). + +module tendermint(abstract_protocol) = { + + # the initial value of a node: + function init_val(N:node): value + + # the three type of steps + object step_t = { + type this = {propose, prevote, precommit} + } # refer to those e.g. as step_t.propose + + object server(n:node) = { + + # the current round of a node + individual round_p: round + + individual step: step_t + + individual decision: value + + individual lockedValue: value + individual lockedRound: round + + individual validValue: value + individual validRound: round + + + relation done_l34(R:round) + relation done_l36(R:round, V:value) + relation done_l47(R:round) + + # variables for scheduling request + relation propose_timer_scheduled(R:round) + relation prevote_timer_scheduled(R:round) + relation precommit_timer_scheduled(R:round) + + relation _recved_proposal(Sender:node, R:round, V:value, VR:round) + relation _recved_prevote(Sender:node, R:round, V:value) + relation _recved_precommit(Sender:node, R:round, V:value) + + relation _has_started + + after init { + round_p := 0; + step := step_t.propose; + decision := value.nil; + + lockedValue := value.nil; + lockedRound := round.minus_one; + + validValue := value.nil; + validRound := round.minus_one; + + done_l34(R) := false; + done_l36(R, V) := false; + done_l47(R) := false; + + propose_timer_scheduled(R) := false; + prevote_timer_scheduled(R) := false; + precommit_timer_scheduled(R) := false; + + _recved_proposal(Sender, R, V, VR) := false; + _recved_prevote(Sender, R, V) := false; + _recved_precommit(Sender, R, V) := false; + + _has_started := false; + } + + action getValue returns (v:value) = { + v := init_val(n) + } + + export action start = { + require ~_has_started; + _has_started := true; + # line 10 + call startRound(0); + } + + # line 11-21 + action startRound(r:round) = { + # line 12 + round_p := r; + + # line 13 + step := step_t.propose; + + var proposal : value; + + # line 14 + if (proposers.get_proposer(r) = n) { + if validValue ~= value.nil { # line 15 + proposal := validValue; # line 16 + } else { + proposal := getValue(); # line 18 + }; + call broadcast_proposal(r, proposal, validRound); # line 19 + } else { + propose_timer_scheduled(r) := true; # line 21 + }; + + call abstract_protocol.l_11(n, r); + } + + # This action, as not exported, can only be called at specific call sites. + action broadcast_proposal(r:round, v:value, vr:round) = { + var m: msg; + m.m_kind := msg_kind.proposal; + m.m_src := n; + m.m_round := r; + m.m_value := v; + m.m_vround := vr; + call shim.broadcast(n,m); + } + + implement shim.proposal_handler.handle(msg:msg) { + _recved_proposal(msg.m_src, msg.m_round, msg.m_value, msg.m_vround) := true; + } + + # line 22-27 + export action l_22(v:value) = { + require _has_started; + require _recved_proposal(proposers.get_proposer(round_p), round_p, v, round.minus_one); + require step = step_t.propose; + + if (value.valid(v) & (lockedRound = round.minus_one | lockedValue = v)) { + call broadcast_prevote(round_p, v); # line 24 + call abstract_protocol.l_22(n, round_p, v); + } else { + call broadcast_prevote(round_p, value.nil); # line 26 + call abstract_protocol.l_22(n, round_p, value.nil); + }; + + # line 27 + step := step_t.prevote; + } + + # line 28-33 + export action l_28(r:round, v:value, vr:round, q:nset) = { + require _has_started; + require r = round_p; + require _recved_proposal(proposers.get_proposer(r), r, v, vr); + require nset.is_quorum(q); + require nset.member(N,q) -> _recved_prevote(N,vr,v); + require step = step_t.propose; + require vr >= 0 & vr < r; + + # line 29 + if (value.valid(v) & (lockedRound <= vr | lockedValue = v)) { + call broadcast_prevote(r, v); + } else { + call broadcast_prevote(r, value.nil); + }; + + call abstract_protocol.l_28(n,r,v,vr,q); + step := step_t.prevote; + } + + action broadcast_prevote(r:round, v:value) = { + var m: msg; + m.m_kind := msg_kind.prevote; + m.m_src := n; + m.m_round := r; + m.m_value := v; + call shim.broadcast(n,m); + } + + implement shim.prevote_handler.handle(msg:msg) { + _recved_prevote(msg.m_src, msg.m_round, msg.m_value) := true; + } + + # line 34-35 + export action l_34(r:round, q:nset) = { + require _has_started; + require round_p = r; + require nset.is_quorum(q); + require exists V . nset.member(N,q) -> _recved_prevote(N,r,V); + require step = step_t.prevote; + require ~done_l34(r); + done_l34(r) := true; + + prevote_timer_scheduled(r) := true; + } + + + # line 36-43 + export action l_36(r:round, v:value, q:nset) = { + require _has_started; + require r = round_p; + require exists VR . round.minus_one <= VR & VR < r & _recved_proposal(proposers.get_proposer(r), r, v, VR); + require nset.is_quorum(q); + require nset.member(N,q) -> _recved_prevote(N,r,v); + require value.valid(v); + require step = step_t.prevote | step = step_t.precommit; + + require ~done_l36(r,v); + done_l36(r, v) := true; + + if step = step_t.prevote { + lockedValue := v; # line 38 + lockedRound := r; # line 39 + call broadcast_precommit(r, v); # line 40 + step := step_t.precommit; # line 41 + call abstract_protocol.l_36(n, r, v, q); + }; + + validValue := v; # line 42 + validRound := r; # line 43 + } + + # line 44-46 + export action l_44(r:round, q:nset) = { + require _has_started; + require r = round_p; + require nset.is_quorum(q); + require nset.member(N,q) -> _recved_prevote(N,r,value.nil); + require step = step_t.prevote; + + call broadcast_precommit(r, value.nil); # line 45 + step := step_t.precommit; # line 46 + + call abstract_protocol.l_44(n, r, q); + } + + action broadcast_precommit(r:round, v:value) = { + var m: msg; + m.m_kind := msg_kind.precommit; + m.m_src := n; + m.m_round := r; + m.m_value := v; + call shim.broadcast(n,m); + } + + implement shim.precommit_handler.handle(msg:msg) { + _recved_precommit(msg.m_src, msg.m_round, msg.m_value) := true; + } + + + # line 47-48 + export action l_47(r:round, q:nset) = { + require _has_started; + require round_p = r; + require nset.is_quorum(q); + require nset.member(N,q) -> exists V . _recved_precommit(N,r,V); + require ~done_l47(r); + done_l47(r) := true; + + precommit_timer_scheduled(r) := true; + } + + + # line 49-54 + export action l_49_decide(r:round, v:value, q:nset) = { + require _has_started; + require exists VR . round.minus_one <= VR & VR < r & _recved_proposal(proposers.get_proposer(r), r, v, VR); + require nset.is_quorum(q); + require nset.member(N,q) -> _recved_precommit(N,r,v); + require decision = value.nil; + + if value.valid(v) { + decision := v; + # MORE for next height + call abstract_protocol.decide(n, r, v, q); + } + } + + # line 55-56 + export action l_55(r:round, b:nset) = { + require _has_started; + require nset.is_blocking(b); + require nset.member(N,b) -> exists VR . round.minus_one <= VR & VR < r & exists V . _recved_proposal(N,r,V,VR) | _recved_prevote(N,r,V) | _recved_precommit(N,r,V); + require r > round_p; + call startRound(r); # line 56 + } + + # line 57-60 + export action onTimeoutPropose(r:round) = { + require _has_started; + require propose_timer_scheduled(r); + require r = round_p; + require step = step_t.propose; + call broadcast_prevote(r,value.nil); + step := step_t.prevote; + + call abstract_protocol.l_57(n,r); + + propose_timer_scheduled(r) := false; + } + + # line 61-64 + export action onTimeoutPrevote(r:round) = { + require _has_started; + require prevote_timer_scheduled(r); + require r = round_p; + require step = step_t.prevote; + call broadcast_precommit(r,value.nil); + step := step_t.precommit; + + call abstract_protocol.l_61(n,r); + + prevote_timer_scheduled(r) := false; + } + + # line 65-67 + export action onTimeoutPrecommit(r:round) = { + require _has_started; + require precommit_timer_scheduled(r); + require r = round_p; + call startRound(round.incr(r)); + + precommit_timer_scheduled(r) := false; + } + +# The Byzantine actions +# --------------------- + +# Byzantine nodes can send whatever they want, but they cannot send +# messages on behalf of well-behaved nodes. In practice this is implemented +# using cryptography (e.g. public-key cryptography). + + export action byzantine_send(m:msg, dst:node) = { + require ~well_behaved(n); + require ~well_behaved(m.m_src); # cannot forge the identity of well-behaved nodes + call shim.send(n,dst,m); + } + +# Byzantine nodes can also report fake observations, as defined in the abstract protocol. + export action fake_observations = { + call abstract_protocol.misbehave + } + +# Invariants +# ---------- + +# We provide common invariants that a) allow to prove that the abstract +# preconditions are met, and b) provide a refinement relation. + + + specification { + + invariant 0 <= round_p + invariant abstract_protocol.left_round(n,R) <-> R < round_p + + invariant lockedRound ~= round.minus_one -> forall R,V . abstract_protocol.locked(n,R,V) <-> R <= lockedRound & lockedValue = V + invariant lockedRound = round.minus_one -> forall R,V . ~abstract_protocol.locked(n,R,V) + + invariant forall M:msg . well_behaved(M.m_src) & M.m_kind = msg_kind.prevote & shim.sent(M,N) -> abstract_protocol.prevoted(M.m_src,M.m_round,M.m_value) + invariant well_behaved(N) & _recved_prevote(N,R,V) -> abstract_protocol.prevoted(N,R,V) + invariant forall M:msg . well_behaved(M.m_src) & M.m_kind = msg_kind.precommit & shim.sent(M,N) -> abstract_protocol.precommitted(M.m_src,M.m_round,M.m_value) + invariant well_behaved(N) & _recved_precommit(N,R,V) -> abstract_protocol.precommitted(N,R,V) + + invariant (step = step_t.prevote | step = step_t.propose) -> ~abstract_protocol.precommitted(n,round_p,V) + invariant step = step_t.propose -> ~abstract_protocol.prevoted(n,round_p,V) + invariant step = step_t.prevote -> exists V . abstract_protocol.prevoted(n,round_p,V) + + invariant round_p < R -> ~(abstract_protocol.prevoted(n,R,V) | abstract_protocol.precommitted(n,R,V)) + invariant ~_has_started -> step = step_t.propose & ~(abstract_protocol.prevoted(n,R,V) | abstract_protocol.precommitted(n,R,V)) & round_p = 0 + + invariant decision ~= value.nil -> exists R . abstract_protocol.decided(n,R,decision) + } + } +} diff --git a/spec/ivy-proofs/tendermint_test.ivy b/spec/ivy-proofs/tendermint_test.ivy new file mode 100644 index 000000000..1299fc086 --- /dev/null +++ b/spec/ivy-proofs/tendermint_test.ivy @@ -0,0 +1,127 @@ +#lang ivy1.7 + +include tendermint +include abstract_tendermint + +isolate ghost_ = { + instantiate abstract_tendermint +} + +isolate protocol = { + instantiate tendermint(ghost_) # here we instantiate the parameter of the tendermint module with `ghost_`; however note that we don't extract any code for `ghost_` (it's not in the list of object in the extract, and it's thus sliced away). + implementation { + definition init_val(n:node) = <<< `n`%2 >>> + } + # attribute test = impl +} with ghost_, shim, value, round, proposers + +# Here we run a simple scenario that exhibits an execution in which nodes make +# a decision. We do this to rule out trivial modeling errors. + +# One option to check that this scenario is valid is to run it in Ivy's REPL. +# For this, first compile the scenario: +#```ivyc target=repl isolate=code trace=true tendermint_test.ivy +# Then, run the produced binary (e.g. for 4 nodes): +#``` ./tendermint_test 4 +# Finally, call the action: +#``` scenarios.scenario_1 +# Note that Ivy will check at runtime that all action preconditions are +# satisfied. For example, runing the scenario twice will cause a violation of +# the precondition of the `start` action, because a node cannot start twice +# (see `require ~_has_started` in action `start`). + +# Another possibility would be to run `ivy_check` on the scenario, but that +# does not seem to work at the moment. + +isolate scenarios = { + individual all:nset # will be used as parameter to actions requiring a quorum + + after init { + var iter := node.iter.create(0); + while ~iter.is_end + { + all := all.insert(iter.val); + iter := iter.next; + }; + assert nset.is_quorum(all); # we can also use asserts to make sure we are getting what we expect + } + + export action scenario_1 = { + # all nodes start: + var iter := node.iter.create(0); + while ~iter.is_end + { + call protocol.server.start(iter.val); + iter := iter.next; + }; + # all nodes receive the leader's proposal: + var m:msg; + m.m_kind := msg_kind.proposal; + m.m_src := 0; + m.m_round := 0; + m.m_value := 0; + m.m_vround := round.minus_one; + iter := node.iter.create(0); + while ~iter.is_end + { + call net.recv(iter.val,m); + iter := iter.next; + }; + # all nodes prevote: + iter := node.iter.create(0); + while ~iter.is_end + { + call protocol.server.l_22(iter.val,0); + iter := iter.next; + }; + # all nodes receive each other's prevote messages; + m.m_kind := msg_kind.prevote; + m.m_vround := 0; + iter := node.iter.create(0); + while ~iter.is_end + { + var iter2 := node.iter.create(0); # the sender + while ~iter2.is_end + { + m.m_src := iter2.val; + call net.recv(iter.val,m); + iter2 := iter2.next; + }; + iter := iter.next; + }; + # all nodes precommit: + iter := node.iter.create(0); + while ~iter.is_end + { + call protocol.server.l_36(iter.val,0,0,all); + iter := iter.next; + }; + # all nodes receive each other's pre-commits + m.m_kind := msg_kind.precommit; + iter := node.iter.create(0); + while ~iter.is_end + { + var iter2 := node.iter.create(0); # the sender + while ~iter2.is_end + { + m.m_src := iter2.val; + call net.recv(iter.val,m); + iter2 := iter2.next; + }; + iter := iter.next; + }; + # now all nodes can decide: + iter := node.iter.create(0); + while ~iter.is_end + { + call protocol.server.l_49_decide(iter.val,0,0,all); + iter := iter.next; + }; + } + + # TODO: add more scenarios + +} with round, node, proposers, value, nset, protocol, shim, net + +# extract code = protocol, shim, round, node +extract code = round, node, proposers, value, nset, protocol, shim, net, scenarios diff --git a/spec/light-client/README.md b/spec/light-client/README.md new file mode 100644 index 000000000..42f20d46c --- /dev/null +++ b/spec/light-client/README.md @@ -0,0 +1,205 @@ +--- +order: 1 +parent: + title: Light Client + order: 5 +--- + +# Light Client Specification + +This directory contains work-in-progress English and TLA+ specifications for the Light Client +protocol. Implementations of the light client can be found in +[Rust](https://github.com/informalsystems/tendermint-rs/tree/master/light-client) and +[Go](https://github.com/tendermint/tendermint/tree/master/light). + +Light clients are assumed to be initialized once from a trusted source +with a trusted header and validator set. The light client +protocol allows a client to then securely update its trusted state by requesting and +verifying a minimal set of data from a network of full nodes (at least one of which is correct). + +The light client is decomposed into two main components: + +- [Commit Verification](#Commit-Verification) - verify signed headers and associated validator + set changes from a single full node, called primary +- [Attack Detection](#Attack-Detection) - verify commits across multiple full nodes (called secondaries) and detect conflicts (ie. the existence of a lightclient attack) + +In case a lightclient attack is detected, the lightclient submits evidence to a full node which is responsible for "accountability", that is, punishing attackers: + +- [Accountability](#Accountability) - given evidence for an attack, compute a set of validators that are responsible for it. + +## Commit Verification + +The [English specification](verification/verification_001_published.md) describes the light client +commit verification problem in terms of the temporal properties +[LCV-DIST-SAFE.1](https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/verification/verification_001_published.md#lcv-dist-safe1) and +[LCV-DIST-LIVE.1](https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/verification/verification_001_published.md#lcv-dist-live1). +Commit verification is assumed to operate within the Tendermint Failure Model, where +2/3 of validators are correct for some time period and +validator sets can change arbitrarily at each height. + +A light client protocol is also provided, including all checks that +need to be performed on headers, commits, and validator sets +to satisfy the temporal properties - so a light client can continuously +synchronize with a blockchain. Clients can skip possibly +many intermediate headers by exploiting overlap in trusted and untrusted validator sets. +When there is not enough overlap, a bisection routine can be used to find a +minimal set of headers that do provide the required overlap. + +The [TLA+ specification ver. 001](verification/Lightclient_A_1.tla) +is a formal description of the +commit verification protocol executed by a client, including the safety and +termination, which can be model checked with Apalache. + +A more detailed TLA+ specification of +[Light client verification ver. 003](verification/Lightclient_003_draft.tla) +is currently under peer review. + +The `MC*.tla` files contain concrete parameters for the +[TLA+ specification](verification/Lightclient_A_1.tla), in order to do model checking. +For instance, [MC4_3_faulty.tla](verification/MC4_3_faulty.tla) contains the following parameters +for the nodes, heights, the trusting period, the clock drifts, +correctness of the primary node, and the ratio of the faulty processes: + +```tla +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 3 +TRUSTING_PERIOD == 1400 \* the trusting period in some time units +CLOCK_DRIFT = 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT = 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators +``` + +To run a complete set of experiments, clone [apalache](https://github.com/informalsystems/apalache) and [apalache-tests](https://github.com/informalsystems/apalache-tests) into a directory `$DIR` and run the following commands: + +```sh +$DIR/apalache-tests/scripts/mk-run.py --memlimit 28 002bmc-apalache-ok.csv $DIR/apalache . out +./out/run-all.sh +``` + +After the experiments have finished, you can collect the logs by executing the following command: + +```sh +cd ./out +$DIR/apalache-tests/scripts/parse-logs.py --human . +``` + +All lines in `results.csv` should report `Deadlock`, which means that the algorithm +has terminated and no invariant violation was found. + +Similar to [002bmc-apalache-ok.csv](verification/002bmc-apalache-ok.csv), +file [003bmc-apalache-error.csv](verification/003bmc-apalache-error.csv) specifies +the set of experiments that should result in counterexamples: + +```sh +$DIR/apalache-tests/scripts/mk-run.py --memlimit 28 003bmc-apalache-error.csv $DIR/apalache . out +./out/run-all.sh +``` + +All lines in `results.csv` should report `Error`. + +The following table summarizes the experimental results for Light client verification +version 001. The TLA+ properties can be found in the +[TLA+ specification](verification/Lightclient_A_1.tla). + The experiments were run in an AWS instance equipped with 32GB +RAM and a 4-core Intel® Xeon® CPU E5-2686 v4 @ 2.30GHz CPU. +We write “✗=k” when a bug is reported at depth k, and “✓<=k” when +no bug is reported up to depth k. + +![Experimental results](experiments.png) + +The experimental results for version 003 are to be added. + +## Attack Detection + +The [English specification](detection/detection_003_reviewed.md) +defines light client attacks (and how they differ from blockchain +forks), and describes the problem of a light client detecting +these attacks by communicating with a network of full nodes, +where at least one is correct. + +The specification also contains a detection protocol that checks +whether the header obtained from the primary via the verification +protocol matches corresponding headers provided by the secondaries. +If this is not the case, the protocol analyses the verification traces +of the involved full nodes +and generates +[evidence](detection/detection_003_reviewed.md#tmbc-lc-evidence-data1) +of misbehavior that can be submitted to a full node so that +the faulty validators can be punished. + +The [TLA+ specification](detection/LCDetector_003_draft.tla) +is a formal description of the +detection protocol for two peers, including the safety and +termination, which can be model checked with Apalache. + +The `LCD_MC*.tla` files contain concrete parameters for the +[TLA+ specification](detection/LCDetector_003_draft.tla), +in order to run the model checker. +For instance, [LCD_MC4_4_faulty.tla](detection/MC4_4_faulty.tla) +contains the following parameters +for the nodes, heights, the trusting period, the clock drifts, +correctness of the nodes, and the ratio of the faulty processes: + +```tla +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 3 +TRUSTING_PERIOD == 1400 \* the trusting period in some time units +CLOCK_DRIFT = 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT = 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +IS_SECONDARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators +``` + +To run a complete set of experiments, clone [apalache](https://github.com/informalsystems/apalache) and [apalache-tests](https://github.com/informalsystems/apalache-tests) into a directory `$DIR` and run the following commands: + +```sh +$DIR/apalache-tests/scripts/mk-run.py --memlimit 28 004bmc-apalache-ok.csv $DIR/apalache . out +./out/run-all.sh +``` + +After the experiments have finished, you can collect the logs by executing the following command: + +```sh +cd ./out +$DIR/apalache-tests/scripts/parse-logs.py --human . +``` + +All lines in `results.csv` should report `Deadlock`, which means that the algorithm +has terminated and no invariant violation was found. + +Similar to [004bmc-apalache-ok.csv](verification/004bmc-apalache-ok.csv), +file [005bmc-apalache-error.csv](verification/005bmc-apalache-error.csv) specifies +the set of experiments that should result in counterexamples: + +```sh +$DIR/apalache-tests/scripts/mk-run.py --memlimit 28 005bmc-apalache-error.csv $DIR/apalache . out +./out/run-all.sh +``` + +All lines in `results.csv` should report `Error`. + +The detailed experimental results are to be added soon. + +## Accountability + +The [English specification](attacks/isolate-attackers_002_reviewed.md) +defines the protocol that is executed on a full node upon receiving attack [evidence](detection/detection_003_reviewed.md#tmbc-lc-evidence-data1) from a lightclient. In particular, the protocol handles three types of attacks + +- lunatic +- equivocation +- amnesia + +We discussed in the [last part](attacks/isolate-attackers_002_reviewed.md#Part-III---Completeness) of the English specification +that the non-lunatic cases are defined by having the same validator set in the conflicting blocks. For these cases, +computer-aided analysis of [Tendermint Consensus in TLA+](./accountability/README.md) shows that equivocation and amnesia capture all non-lunatic attacks. + +The [TLA+ specification](attacks/Isolation_001_draft.tla) +is a formal description of the +protocol, including the safety property, which can be model checked with Apalache. + +Similar to the other specifications, [MC_5_3.tla](attacks/MC_5_3.tla) contains concrete parameters to run the model checker. The specification can be checked within seconds. + +[tendermint-accountability](./accountability/README.md) diff --git a/spec/light-client/accountability/001indinv-apalache.csv b/spec/light-client/accountability/001indinv-apalache.csv new file mode 100644 index 000000000..37c6aeda2 --- /dev/null +++ b/spec/light-client/accountability/001indinv-apalache.csv @@ -0,0 +1,13 @@ +no,filename,tool,timeout,init,inv,next,args +1,MC_n4_f1.tla,apalache,10h,TypedInv,TypedInv,,--length=1 --cinit=ConstInit +2,MC_n4_f2.tla,apalache,10h,TypedInv,TypedInv,,--length=1 --cinit=ConstInit +3,MC_n5_f1.tla,apalache,10h,TypedInv,TypedInv,,--length=1 --cinit=ConstInit +4,MC_n5_f2.tla,apalache,10h,TypedInv,TypedInv,,--length=1 --cinit=ConstInit +5,MC_n4_f1.tla,apalache,20h,Init,TypedInv,,--length=0 --cinit=ConstInit +6,MC_n4_f2.tla,apalache,20h,Init,TypedInv,,--length=0 --cinit=ConstInit +7,MC_n5_f1.tla,apalache,20h,Init,TypedInv,,--length=0 --cinit=ConstInit +8,MC_n5_f2.tla,apalache,20h,Init,TypedInv,,--length=0 --cinit=ConstInit +9,MC_n4_f1.tla,apalache,20h,TypedInv,Agreement,,--length=0 --cinit=ConstInit +10,MC_n4_f2.tla,apalache,20h,TypedInv,Accountability,,--length=0 --cinit=ConstInit +11,MC_n5_f1.tla,apalache,20h,TypedInv,Agreement,,--length=0 --cinit=ConstInit +12,MC_n5_f2.tla,apalache,20h,TypedInv,Accountability,,--length=0 --cinit=ConstInit diff --git a/spec/light-client/accountability/MC_n4_f1.tla b/spec/light-client/accountability/MC_n4_f1.tla new file mode 100644 index 000000000..7a828b498 --- /dev/null +++ b/spec/light-client/accountability/MC_n4_f1.tla @@ -0,0 +1,22 @@ +----------------------------- MODULE MC_n4_f1 ------------------------------- +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +INSTANCE TendermintAccDebug_004_draft WITH + Corr <- {"c1", "c2", "c3"}, + Faulty <- {"f1"}, + N <- 4, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2 + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/MC_n4_f2.tla b/spec/light-client/accountability/MC_n4_f2.tla new file mode 100644 index 000000000..893f18db6 --- /dev/null +++ b/spec/light-client/accountability/MC_n4_f2.tla @@ -0,0 +1,22 @@ +----------------------------- MODULE MC_n4_f2 ------------------------------- +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +INSTANCE TendermintAccDebug_004_draft WITH + Corr <- {"c1", "c2"}, + Faulty <- {"f3", "f4"}, + N <- 4, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2 + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/MC_n4_f2_amnesia.tla b/spec/light-client/accountability/MC_n4_f2_amnesia.tla new file mode 100644 index 000000000..434fffaeb --- /dev/null +++ b/spec/light-client/accountability/MC_n4_f2_amnesia.tla @@ -0,0 +1,40 @@ +---------------------- MODULE MC_n4_f2_amnesia ------------------------------- +EXTENDS Sequences + +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +\* the variable declared in TendermintAccTrace3 +VARIABLE + toReplay + +\* old apalache annotations, fix with the new release +a <: b == a + +INSTANCE TendermintAccTrace_004_draft WITH + Corr <- {"c1", "c2"}, + Faulty <- {"f3", "f4"}, + N <- 4, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2, + Trace <- << + "UponProposalInPropose", + "UponProposalInPrevoteOrCommitAndPrevote", + "UponProposalInPrecommitNoDecision", + "OnRoundCatchup", + "UponProposalInPropose", + "UponProposalInPrevoteOrCommitAndPrevote", + "UponProposalInPrecommitNoDecision" + >> <: Seq(STRING) + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/MC_n4_f3.tla b/spec/light-client/accountability/MC_n4_f3.tla new file mode 100644 index 000000000..b794fff5e --- /dev/null +++ b/spec/light-client/accountability/MC_n4_f3.tla @@ -0,0 +1,22 @@ +----------------------------- MODULE MC_n4_f3 ------------------------------- +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +INSTANCE TendermintAccDebug_004_draft WITH + Corr <- {"c1"}, + Faulty <- {"f2", "f3", "f4"}, + N <- 4, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2 + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/MC_n5_f1.tla b/spec/light-client/accountability/MC_n5_f1.tla new file mode 100644 index 000000000..d65673a58 --- /dev/null +++ b/spec/light-client/accountability/MC_n5_f1.tla @@ -0,0 +1,22 @@ +----------------------------- MODULE MC_n5_f1 ------------------------------- +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +INSTANCE TendermintAccDebug_004_draft WITH + Corr <- {"c1", "c2", "c3", "c4"}, + Faulty <- {"f5"}, + N <- 5, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2 + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/MC_n5_f2.tla b/spec/light-client/accountability/MC_n5_f2.tla new file mode 100644 index 000000000..c19aa98cc --- /dev/null +++ b/spec/light-client/accountability/MC_n5_f2.tla @@ -0,0 +1,22 @@ +----------------------------- MODULE MC_n5_f2 ------------------------------- +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +INSTANCE TendermintAccDebug_004_draft WITH + Corr <- {"c1", "c2", "c3"}, + Faulty <- {"f4", "f5"}, + N <- 5, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2 + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/MC_n6_f1.tla b/spec/light-client/accountability/MC_n6_f1.tla new file mode 100644 index 000000000..2e992974f --- /dev/null +++ b/spec/light-client/accountability/MC_n6_f1.tla @@ -0,0 +1,22 @@ +----------------------------- MODULE MC_n6_f1 ------------------------------- +CONSTANT Proposer \* the proposer function from 0..NRounds to 1..N + +\* the variables declared in TendermintAcc3 +VARIABLES + round, step, decision, lockedValue, lockedRound, validValue, validRound, + msgsPropose, msgsPrevote, msgsPrecommit, evidence, action + +INSTANCE TendermintAccDebug_004_draft WITH + Corr <- {"c1", "c2", "c3", "c4", "c5"}, + Faulty <- {"f6"}, + N <- 4, + T <- 1, + ValidValues <- { "v0", "v1" }, + InvalidValues <- {"v2"}, + MaxRound <- 2 + +\* run Apalache with --cinit=ConstInit +ConstInit == \* the proposer is arbitrary -- works for safety + Proposer \in [Rounds -> AllProcs] + +============================================================================= diff --git a/spec/light-client/accountability/README.md b/spec/light-client/accountability/README.md new file mode 100644 index 000000000..bb872649b --- /dev/null +++ b/spec/light-client/accountability/README.md @@ -0,0 +1,308 @@ +--- +order: 1 +parent: + title: Accountability + order: 4 +--- + +# Fork accountability + +## Problem Statement + +Tendermint consensus guarantees the following specifications for all heights: + +* agreement -- no two correct full nodes decide differently. +* validity -- the decided block satisfies the predefined predicate *valid()*. +* termination -- all correct full nodes eventually decide, + +If the faulty validators have less than 1/3 of voting power in the current validator set. In the case where this assumption +does not hold, each of the specification may be violated. + +The agreement property says that for a given height, any two correct validators that decide on a block for that height decide on the same block. That the block was indeed generated by the blockchain, can be verified starting from a trusted (genesis) block, and checking that all subsequent blocks are properly signed. + +However, faulty nodes may forge blocks and try to convince users (light clients) that the blocks had been correctly generated. In addition, Tendermint agreement might be violated in the case where 1/3 or more of the voting power belongs to faulty validators: Two correct validators decide on different blocks. The latter case motivates the term "fork": as Tendermint consensus also agrees on the next validator set, correct validators may have decided on disjoint next validator sets, and the chain branches into two or more partitions (possibly having faulty validators in common) and each branch continues to generate blocks independently of the other. + +We say that a fork is a case in which there are two commits for different blocks at the same height of the blockchain. The proplem is to ensure that in those cases we are able to detect faulty validators (and not mistakenly accuse correct validators), and incentivize therefore validators to behave according to the protocol specification. + +**Conceptual Limit.** In order to prove misbehavior of a node, we have to show that the behavior deviates from correct behavior with respect to a given algorithm. Thus, an algorithm that detects misbehavior of nodes executing some algorithm *A* must be defined with respect to algorithm *A*. In our case, *A* is Tendermint consensus (+ other protocols in the infrastructure; e.g.,full nodes and the Light Client). If the consensus algorithm is changed/updated/optimized in the future, we have to check whether changes to the accountability algorithm are also required. All the discussions in this document are thus inherently specific to Tendermint consensus and the Light Client specification. + +**Q:** Should we distinguish agreement for validators and full nodes for agreement? The case where all correct validators agree on a block, but a correct full node decides on a different block seems to be slightly less severe that the case where two correct validators decide on different blocks. Still, if a contaminated full node becomes validator that may be problematic later on. Also it is not clear how gossiping is impaired if a contaminated full node is on a different branch. + +*Remark.* In the case 1/3 or more of the voting power belongs to faulty validators, also validity and termination can be broken. Termination can be broken if faulty processes just do not send the messages that are needed to make progress. Due to asynchrony, this is not punishable, because faulty validators can always claim they never received the messages that would have forced them to send messages. + +## The Misbehavior of Faulty Validators + +Forks are the result of faulty validators deviating from the protocol. In principle several such deviations can be detected without a fork actually occurring: + +1. double proposal: A faulty proposer proposes two different values (blocks) for the same height and the same round in Tendermint consensus. + +2. double signing: Tendermint consensus forces correct validators to prevote and precommit for at most one value per round. In case a faulty validator sends multiple prevote and/or precommit messages for different values for the same height/round, this is a misbehavior. + +3. lunatic validator: Tendermint consensus forces correct validators to prevote and precommit only for values *v* that satisfy *valid(v)*. If faulty validators prevote and precommit for *v* although *valid(v)=false* this is misbehavior. + +*Remark.* In isolation, Point 3 is an attack on validity (rather than agreement). However, the prevotes and precommits can then also be used to forge blocks. + +1. amnesia: Tendermint consensus has a locking mechanism. If a validator has some value v locked, then it can only prevote/precommit for v or nil. Sending prevote/precomit message for a different value v' (that is not nil) while holding lock on value v is misbehavior. + +2. spurious messages: In Tendermint consensus most of the message send instructions are guarded by threshold guards, e.g., one needs to receive *2f + 1* prevote messages to send precommit. Faulty validators may send precommit without having received the prevote messages. + +Independently of a fork happening, punishing this behavior might be important to prevent forks altogether. This should keep attackers from misbehaving: if less than 1/3 of the voting power is faulty, this misbehavior is detectable but will not lead to a safety violation. Thus, unless they have 1/3 or more (or in some cases more than 2/3) of the voting power attackers have the incentive to not misbehave. If attackers control too much voting power, we have to deal with forks, as discussed in this document. + +## Two types of forks + +* Fork-Full. Two correct validators decide on different blocks for the same height. Since also the next validator sets are decided upon, the correct validators may be partitioned to participate in two distinct branches of the forked chain. + +As in this case we have two different blocks (both having the same right/no right to exist), a central system invariant (one block per height decided by correct validators) is violated. As full nodes are contaminated in this case, the contamination can spread also to light clients. However, even without breaking this system invariant, light clients can be subject to a fork: + +* Fork-Light. All correct validators decide on the same block for height *h*, but faulty processes (validators or not), forge a different block for that height, in order to fool users (who use the light client). + +# Attack scenarios + +## On-chain attacks + +### Equivocation (one round) + +There are several scenarios in which forks might happen. The first is double signing within a round. + +* F1. Equivocation: faulty validators sign multiple vote messages (prevote and/or precommit) for different values *during the same round r* at a given height h. + +### Flip-flopping + +Tendermint consensus implements a locking mechanism: If a correct validator *p* receives proposal for value v and *2f + 1* prevotes for a value *id(v)* in round *r*, it locks *v* and remembers *r*. In this case, *p* also sends a precommit message for *id(v)*, which later may serve as proof that *p* locked *v*. +In subsequent rounds, *p* only sends prevote messages for a value it had previously locked. However, it is possible to change the locked value if in a future round *r' > r*, if the process receives proposal and *2f + 1* prevotes for a different value *v'*. In this case, *p* could send a prevote/precommit for *id(v')*. This algorithmic feature can be exploited in two ways: + +* F2. Faulty Flip-flopping (Amnesia): faulty validators precommit some value *id(v)* in round *r* (value *v* is locked in round *r*) and then prevote for different value *id(v')* in higher round *r' > r* without previously correctly unlocking value *v*. In this case faulty processes "forget" that they have locked value *v* and prevote some other value in the following rounds. +Some correct validators might have decided on *v* in *r*, and other correct validators decide on *v'* in *r'*. Here we can have branching on the main chain (Fork-Full). + +* F3. Correct Flip-flopping (Back to the past): There are some precommit messages signed by (correct) validators for value *id(v)* in round *r*. Still, *v* is not decided upon, and all processes move on to the next round. Then correct validators (correctly) lock and decide a different value *v'* in some round *r' > r*. And the correct validators continue; there is no branching on the main chain. +However, faulty validators may use the correct precommit messages from round *r* together with a posteriori generated faulty precommit messages for round *r* to forge a block for a value that was not decided on the main chain (Fork-Light). + +## Off-chain attacks + +F1-F3 may contaminate the state of full nodes (and even validators). Contaminated (but otherwise correct) full nodes may thus communicate faulty blocks to light clients. +Similarly, without actually interfering with the main chain, we can have the following: + +* F4. Phantom validators: faulty validators vote (sign prevote and precommit messages) in heights in which they are not part of the validator sets (at the main chain). + +* F5. Lunatic validator: faulty validator that sign vote messages to support (arbitrary) application state that is different from the application state that resulted from valid state transitions. + +## Types of victims + +We consider three types of potential attack victims: + +* FN: full node +* LCS: light client with sequential header verification +* LCB: light client with bisection based header verification + +F1 and F2 can be used by faulty validators to actually create multiple branches on the blockchain. That means that correctly operating full nodes decide on different blocks for the same height. Until a fork is detected locally by a full node (by receiving evidence from others or by some other local check that fails), the full node can spread corrupted blocks to light clients. + +*Remark.* If full nodes take a branch different from the one taken by the validators, it may be that the liveness of the gossip protocol may be affected. We should eventually look at this more closely. However, as it does not influence safety it is not a primary concern. + +F3 is similar to F1, except that no two correct validators decide on different blocks. It may still be the case that full nodes become affected. + +In addition, without creating a fork on the main chain, light clients can be contaminated by more than a third of validators that are faulty and sign a forged header +F4 cannot fool correct full nodes as they know the current validator set. Similarly, LCS know who the validators are. Hence, F4 is an attack against LCB that do not necessarily know the complete prefix of headers (Fork-Light), as they trust a header that is signed by at least one correct validator (trusting period method). + +The following table gives an overview of how the different attacks may affect different nodes. F1-F3 are *on-chain* attacks so they can corrupt the state of full nodes. Then if a light client (LCS or LCB) contacts a full node to obtain headers (or blocks), the corrupted state may propagate to the light client. + +F4 and F5 are *off-chain*, that is, these attacks cannot be used to corrupt the state of full nodes (which have sufficient knowledge on the state of the chain to not be fooled). + +| Attack | FN | LCS | LCB | +|:------:|:------:|:------:|:------:| +| F1 | direct | FN | FN | +| F2 | direct | FN | FN | +| F3 | direct | FN | FN | +| F4 | | | direct | +| F5 | | | direct | + +**Q:** Light clients are more vulnerable than full nodes, because the former do only verify headers but do not execute transactions. What kind of certainty is gained by a full node that executes a transaction? + +As a full node verifies all transactions, it can only be +contaminated by an attack if the blockchain itself violates its invariant (one block per height), that is, in case of a fork that leads to branching. + +## Detailed Attack Scenarios + +### Equivocation based attacks + +In case of equivocation based attacks, faulty validators sign multiple votes (prevote and/or precommit) in the same +round of some height. This attack can be executed on both full nodes and light clients. It requires 1/3 or more of voting power to be executed. + +#### Scenario 1: Equivocation on the main chain + +Validators: + +* CA - a set of correct validators with less than 1/3 of the voting power +* CB - a set of correct validators with less than 1/3 of the voting power +* CA and CB are disjoint +* F - a set of faulty validators with 1/3 or more voting power + +Observe that this setting violates the Tendermint failure model. + +Execution: + +* A faulty proposer proposes block A to CA +* A faulty proposer proposes block B to CB +* Validators from the set CA and CB prevote for A and B, respectively. +* Faulty validators from the set F prevote both for A and B. +* The faulty prevote messages + * for A arrive at CA long before the B messages + * for B arrive at CB long before the A messages +* Therefore correct validators from set CA and CB will observe +more than 2/3 of prevotes for A and B and precommit for A and B, respectively. +* Faulty validators from the set F precommit both values A and B. +* Thus, we have more than 2/3 commits for both A and B. + +Consequences: + +* Creating evidence of misbehavior is simple in this case as we have multiple messages signed by the same faulty processes for different values in the same round. + +* We have to ensure that these different messages reach a correct process (full node, monitor?), which can submit evidence. + +* This is an attack on the full node level (Fork-Full). +* It extends also to the light clients, +* For both we need a detection and recovery mechanism. + +#### Scenario 2: Equivocation to a light client (LCS) + +Validators: + +* a set F of faulty validators with more than 2/3 of the voting power. + +Execution: + +* for the main chain F behaves nicely +* F coordinates to sign a block B that is different from the one on the main chain. +* the light clients obtains B and trusts at as it is signed by more than 2/3 of the voting power. + +Consequences: + +Once equivocation is used to attack light client it opens space +for different kind of attacks as application state can be diverged in any direction. For example, it can modify validator set such that it contains only validators that do not have any stake bonded. Note that after a light client is fooled by a fork, that means that an attacker can change application state and validator set arbitrarily. + +In order to detect such (equivocation-based attack), the light client would need to cross check its state with some correct validator (or to obtain a hash of the state from the main chain using out of band channels). + +*Remark.* The light client would be able to create evidence of misbehavior, but this would require to pull potentially a lot of data from correct full nodes. Maybe we need to figure out different architecture where a light client that is attacked will push all its data for the current unbonding period to a correct node that will inspect this data and submit corresponding evidence. There are also architectures that assumes a special role (sometimes called fisherman) whose goal is to collect as much as possible useful data from the network, to do analysis and create evidence transactions. That functionality is outside the scope of this document. + +*Remark.* The difference between LCS and LCB might only be in the amount of voting power needed to convince light client about arbitrary state. In case of LCB where security threshold is at minimum, an attacker can arbitrarily modify application state with 1/3 or more of voting power, while in case of LCS it requires more than 2/3 of the voting power. + +### Flip-flopping: Amnesia based attacks + +In case of amnesia, faulty validators lock some value *v* in some round *r*, and then vote for different value *v'* in higher rounds without correctly unlocking value *v*. This attack can be used both on full nodes and light clients. + +#### Scenario 3: At most 2/3 of faults + +Validators: + +* a set F of faulty validators with 1/3 or more but at most 2/3 of the voting power +* a set C of correct validators + +Execution: + +* Faulty validators commit (without exposing it on the main chain) a block A in round *r* by collecting more than 2/3 of the + voting power (containing correct and faulty validators). +* All validators (correct and faulty) reach a round *r' > r*. +* Some correct validators in C do not lock any value before round *r'*. +* The faulty validators in F deviate from Tendermint consensus by ignoring that they locked A in *r*, and propose a different block B in *r'*. +* As the validators in C that have not locked any value find B acceptable, they accept the proposal for B and commit a block B. + +*Remark.* In this case, the more than 1/3 of faulty validators do not need to commit an equivocation (F1) as they only vote once per round in the execution. + +Detecting faulty validators in the case of such an attack can be done by the fork accountability mechanism described in: . + +If a light client is attacked using this attack with 1/3 or more of voting power (and less than 2/3), the attacker cannot change the application state arbitrarily. Rather, the attacker is limited to a state a correct validator finds acceptable: In the execution above, correct validators still find the value acceptable, however, the block the light client trusts deviates from the one on the main chain. + +#### Scenario 4: More than 2/3 of faults + +In case there is an attack with more than 2/3 of the voting power, an attacker can arbitrarily change application state. + +Validators: + +* a set F1 of faulty validators with 1/3 or more of the voting power +* a set F2 of faulty validators with less than 1/3 of the voting power + +Execution + +* Similar to Scenario 3 (however, messages by correct validators are not needed) +* The faulty validators in F1 lock value A in round *r* +* They sign a different value in follow-up rounds +* F2 does not lock A in round *r* + +Consequences: + +* The validators in F1 will be detectable by the the fork accountability mechanisms. +* The validators in F2 cannot be detected using this mechanism. +Only in case they signed something which conflicts with the application this can be used against them. Otherwise they do not do anything incorrect. +* This case is not covered by the report as it only assumes at most 2/3 of faulty validators. + +**Q:** do we need to define a special kind of attack for the case where a validator sign arbitrarily state? It seems that detecting such attack requires a different mechanism that would require as an evidence a sequence of blocks that led to that state. This might be very tricky to implement. + +### Back to the past + +In this kind of attack, faulty validators take advantage of the fact that they did not sign messages in some of the past rounds. Due to the asynchronous network in which Tendermint operates, we cannot easily differentiate between such an attack and delayed message. This kind of attack can be used at both full nodes and light clients. + +#### Scenario 5 + +Validators: + +* C1 - a set of correct validators with over 1/3 of the voting power +* C2 - a set of correct validators with 1/3 of the voting power +* C1 and C2 are disjoint +* F - a set of faulty validators with less than 1/3 voting power +* one additional faulty process *q* +* F and *q* violate the Tendermint failure model. + +Execution: + +* in a round *r* of height *h* we have C1 precommitting a value A, +* C2 precommits nil, +* F does not send any message +* *q* precommits nil. +* In some round *r' > r*, F and *q* and C2 commit some other value B different from A. +* F and *fp* "go back to the past" and sign precommit message for value A in round *r*. +* Together with precomit messages of C1 this is sufficient for a commit for value A. + +Consequences: + +* Only a single faulty validator that previously precommited nil did equivocation, while the other 1/3 of faulty validators actually executed an attack that has exactly the same sequence of messages as part of amnesia attack. Detecting this kind of attack boil down to mechanisms for equivocation and amnesia. + +**Q:** should we keep this as a separate kind of attack? It seems that equivocation, amnesia and phantom validators are the only kind of attack we need to support and this gives us security also in other cases. This would not be surprising as equivocation and amnesia are attacks that followed from the protocol and phantom attack is not really an attack to Tendermint but more to the Proof of Stake module. + +### Phantom validators + +In case of phantom validators, processes that are not part of the current validator set but are still bonded (as attack happen during their unbonding period) can be part of the attack by signing vote messages. This attack can be executed against both full nodes and light clients. + +#### Scenario 6 + +Validators: + +* F -- a set of faulty validators that are not part of the validator set on the main chain at height *h + k* + +Execution: + +* There is a fork, and there exist two different headers for height *h + k*, with different validator sets: + * VS2 on the main chain + * forged header VS2', signed by F (and others) + +* a light client has a trust in a header for height *h* (and the corresponding validator set VS1). +* As part of bisection header verification, it verifies the header at height *h + k* with new validator set VS2'. + +Consequences: + +* To detect this, a node needs to see both, the forged header and the canonical header from the chain. +* If this is the case, detecting these kind of attacks is easy as it just requires verifying if processes are signing messages in heights in which they are not part of the validator set. + +**Remark.** We can have phantom-validator-based attacks as a follow up of equivocation or amnesia based attack where forked state contains validators that are not part of the validator set at the main chain. In this case, they keep signing messages contributed to a forked chain (the wrong branch) although they are not part of the validator set on the main chain. This attack can also be used to attack full node during a period of time it is eclipsed. + +**Remark.** Phantom validator evidence has been removed from implementation as it was deemed, although possibly a plausible form of evidence, not relevant. Any attack on +the light client involving a phantom validator will have needed to be initiated by 1/3+ lunatic +validators that can forge a new validator set that includes the phantom validator. Only in +that case will the light client accept the phantom validators vote. We need only worry about +punishing the 1/3+ lunatic cabal, that is the root cause of the attack. + +### Lunatic validator + +Lunatic validator agrees to sign commit messages for arbitrary application state. It is used to attack light clients. +Note that detecting this behavior require application knowledge. Detecting this behavior can probably be done by +referring to the block before the one in which height happen. + +**Q:** can we say that in this case a validator declines to check if a proposed value is valid before voting for it? diff --git a/spec/light-client/accountability/Synopsis.md b/spec/light-client/accountability/Synopsis.md new file mode 100644 index 000000000..76da3868c --- /dev/null +++ b/spec/light-client/accountability/Synopsis.md @@ -0,0 +1,105 @@ + +# Synopsis + + A TLA+ specification of a simplified Tendermint consensus, tuned for + fork accountability. The simplifications are as follows: + +- the procotol runs for one height, that is, one-shot consensus + +- this specification focuses on safety, so timeouts are modelled with + with non-determinism + +- the proposer function is non-determinstic, no fairness is assumed + +- the messages by the faulty processes are injected right in the initial states + +- every process has the voting power of 1 + +- hashes are modelled as identity + + Having the above assumptions in mind, the specification follows the pseudo-code + of the Tendermint paper: + + Byzantine processes can demonstrate arbitrary behavior, including + no communication. However, we have to show that under the collective evidence + collected by the correct processes, at least `f+1` Byzantine processes demonstrate + one of the following behaviors: + +- Equivocation: a Byzantine process sends two different values + in the same round. + +- Amnesia: a Byzantine process locks a value, although it has locked + another value in the past. + +# TLA+ modules + +- [TendermintAcc_004_draft](TendermintAcc_004_draft.tla) is the protocol + specification, + +- [TendermintAccInv_004_draft](TendermintAccInv_004_draft.tla) contains an + inductive invariant for establishing the protocol safety as well as the + forking cases, + +- `MC_n_f`, e.g., [MC_n4_f1](MC_n4_f1.tla), contains fixed constants for + model checking with the [Apalache model + checker](https://github.com/informalsystems/apalache), + +- [TendermintAccTrace_004_draft](TendermintAccTrace_004_draft.tla) shows how + to restrict the execution space to a fixed sequence of actions (e.g., to + instantiate a counterexample), + +- [TendermintAccDebug_004_draft](TendermintAccDebug_004_draft.tla) contains + the useful definitions for debugging the protocol specification with TLC and + Apalache. + +# Reasoning about fork scenarios + +The theorem statements can be found in +[TendermintAccInv_004_draft.tla](TendermintAccInv_004_draft.tla). + +First, we would like to show that `TypedInv` is an inductive invariant. +Formally, the statement looks as follows: + +```tla +THEOREM TypedInvIsInductive == + \/ FaultyQuorum + \//\ Init => TypedInv + /\ TypedInv /\ [Next]_vars => TypedInv' +``` + +When over two-thirds of processes are faulty, `TypedInv` is not inductive. +However, there is no hope to repair the protocol in this case. We run +[Apalache](https://github.com/informalsystems/apalache) to prove this theorem +only for fixed instances of 4 to 5 validators. Apalache does not parse theorem +statements at the moment, so we ran Apalache using a shell script. To find a +parameterized argument, one has to use a theorem prover, e.g., TLAPS. + +Second, we would like to show that the invariant implies `Agreement`, that is, +no fork, provided that less than one third of processes is faulty. By combining +this theorem with the previous theorem, we conclude that the protocol indeed +satisfies Agreement under the condition `LessThanThirdFaulty`. + +```tla +THEOREM AgreementWhenLessThanThirdFaulty == + LessThanThirdFaulty /\ TypedInv => Agreement +``` + +Third, in the general case, we either have no fork, or two fork scenarios: + +```tla +THEOREM AgreementOrFork == + ~FaultyQuorum /\ TypedInv => Accountability +``` + +# Model checking results + +Check the report on [model checking with Apalache](./results/001indinv-apalache-report.md). + +To run the model checking experiments, use the script: + +```console +./run.sh +``` + +This script assumes that the apalache build is available in +`~/devl/apalache-unstable`. diff --git a/spec/light-client/accountability/TendermintAccDebug_004_draft.tla b/spec/light-client/accountability/TendermintAccDebug_004_draft.tla new file mode 100644 index 000000000..deaa990ea --- /dev/null +++ b/spec/light-client/accountability/TendermintAccDebug_004_draft.tla @@ -0,0 +1,100 @@ +------------------ MODULE TendermintAccDebug_004_draft ------------------------- +(* + A few definitions that we use for debugging TendermintAcc3, which do not belong + to the specification itself. + + * Version 3. Modular and parameterized definitions. + + Igor Konnov, 2020. + *) + +EXTENDS TendermintAccInv_004_draft + +\* make them parameters? +NFaultyProposals == 0 \* the number of injected faulty PROPOSE messages +NFaultyPrevotes == 6 \* the number of injected faulty PREVOTE messages +NFaultyPrecommits == 6 \* the number of injected faulty PRECOMMIT messages + +\* Given a set of allowed messages Msgs, this operator produces a function from +\* rounds to sets of messages. +\* Importantly, there will be exactly k messages in the image of msgFun. +\* We use this action to produce k faults in an initial state. +ProduceFaults(msgFun, From, k) == + \E f \in [1..k -> From]: + msgFun = [r \in Rounds |-> {m \in {f[i]: i \in 1..k}: m.round = r}] + +\* As TLC explodes with faults, we may have initial states without faults +InitNoFaults == + /\ round = [p \in Corr |-> 0] + /\ step = [p \in Corr |-> "PROPOSE"] + /\ decision = [p \in Corr |-> NilValue] + /\ lockedValue = [p \in Corr |-> NilValue] + /\ lockedRound = [p \in Corr |-> NilRound] + /\ validValue = [p \in Corr |-> NilValue] + /\ validRound = [p \in Corr |-> NilRound] + /\ msgsPropose = [r \in Rounds |-> EmptyMsgSet] + /\ msgsPrevote = [r \in Rounds |-> EmptyMsgSet] + /\ msgsPrecommit = [r \in Rounds |-> EmptyMsgSet] + /\ evidence = EmptyMsgSet + +(* + A specialized version of Init that injects NFaultyProposals proposals, + NFaultyPrevotes prevotes, NFaultyPrecommits precommits by the faulty processes + *) +InitFewFaults == + /\ round = [p \in Corr |-> 0] + /\ step = [p \in Corr |-> "PROPOSE"] + /\ decision = [p \in Corr |-> NilValue] + /\ lockedValue = [p \in Corr |-> NilValue] + /\ lockedRound = [p \in Corr |-> NilRound] + /\ validValue = [p \in Corr |-> NilValue] + /\ validRound = [p \in Corr |-> NilRound] + /\ ProduceFaults(msgsPrevote', + SetOfMsgs([type: {"PREVOTE"}, src: Faulty, round: Rounds, id: Values]), + NFaultyPrevotes) + /\ ProduceFaults(msgsPrecommit', + SetOfMsgs([type: {"PRECOMMIT"}, src: Faulty, round: Rounds, id: Values]), + NFaultyPrecommits) + /\ ProduceFaults(msgsPropose', + SetOfMsgs([type: {"PROPOSAL"}, src: Faulty, round: Rounds, + proposal: Values, validRound: Rounds \cup {NilRound}]), + NFaultyProposals) + /\ evidence = EmptyMsgSet + +\* Add faults incrementally +NextWithFaults == + \* either the protocol makes a step + \/ Next + \* or a faulty process sends a message + \//\ UNCHANGED <> + /\ \E p \in Faulty: + \E r \in Rounds: + \//\ UNCHANGED <> + /\ \E proposal \in ValidValues \union {NilValue}: + \E vr \in RoundsOrNil: + BroadcastProposal(p, r, proposal, vr) + \//\ UNCHANGED <> + /\ \E id \in ValidValues \union {NilValue}: + BroadcastPrevote(p, r, id) + \//\ UNCHANGED <> + /\ \E id \in ValidValues \union {NilValue}: + BroadcastPrecommit(p, r, id) + +(******************************** PROPERTIES ***************************************) +\* simple reachability properties to see that the spec is progressing +NoPrevote == \A p \in Corr: step[p] /= "PREVOTE" + +NoPrecommit == \A p \in Corr: step[p] /= "PRECOMMIT" + +NoValidPrecommit == + \A r \in Rounds: + \A m \in msgsPrecommit[r]: + m.id = NilValue \/ m.src \in Faulty + +NoHigherRounds == \A p \in Corr: round[p] < 1 + +NoDecision == \A p \in Corr: decision[p] = NilValue + +============================================================================= + diff --git a/spec/light-client/accountability/TendermintAccInv_004_draft.tla b/spec/light-client/accountability/TendermintAccInv_004_draft.tla new file mode 100644 index 000000000..5dd15396d --- /dev/null +++ b/spec/light-client/accountability/TendermintAccInv_004_draft.tla @@ -0,0 +1,370 @@ +------------------- MODULE TendermintAccInv_004_draft -------------------------- +(* + An inductive invariant for TendermintAcc3, which capture the forked + and non-forked cases. + + * Version 3. Modular and parameterized definitions. + * Version 2. Bugfixes in the spec and an inductive invariant. + + Igor Konnov, 2020. + *) + +EXTENDS TendermintAcc_004_draft + +(************************** TYPE INVARIANT ***********************************) +(* first, we define the sets of all potential messages *) +AllProposals == + SetOfMsgs([type: {"PROPOSAL"}, + src: AllProcs, + round: Rounds, + proposal: ValuesOrNil, + validRound: RoundsOrNil]) + +AllPrevotes == + SetOfMsgs([type: {"PREVOTE"}, + src: AllProcs, + round: Rounds, + id: ValuesOrNil]) + +AllPrecommits == + SetOfMsgs([type: {"PRECOMMIT"}, + src: AllProcs, + round: Rounds, + id: ValuesOrNil]) + +(* the standard type invariant -- importantly, it is inductive *) +TypeOK == + /\ round \in [Corr -> Rounds] + /\ step \in [Corr -> { "PROPOSE", "PREVOTE", "PRECOMMIT", "DECIDED" }] + /\ decision \in [Corr -> ValidValues \union {NilValue}] + /\ lockedValue \in [Corr -> ValidValues \union {NilValue}] + /\ lockedRound \in [Corr -> RoundsOrNil] + /\ validValue \in [Corr -> ValidValues \union {NilValue}] + /\ validRound \in [Corr -> RoundsOrNil] + /\ msgsPropose \in [Rounds -> SUBSET AllProposals] + /\ BenignRoundsInMessages(msgsPropose) + /\ msgsPrevote \in [Rounds -> SUBSET AllPrevotes] + /\ BenignRoundsInMessages(msgsPrevote) + /\ msgsPrecommit \in [Rounds -> SUBSET AllPrecommits] + /\ BenignRoundsInMessages(msgsPrecommit) + /\ evidence \in SUBSET (AllProposals \union AllPrevotes \union AllPrecommits) + /\ action \in { + "Init", + "InsertProposal", + "UponProposalInPropose", + "UponProposalInProposeAndPrevote", + "UponQuorumOfPrevotesAny", + "UponProposalInPrevoteOrCommitAndPrevote", + "UponQuorumOfPrecommitsAny", + "UponProposalInPrecommitNoDecision", + "OnTimeoutPropose", + "OnQuorumOfNilPrevotes", + "OnRoundCatchup" + } + +(************************** INDUCTIVE INVARIANT *******************************) +EvidenceContainsMessages == + \* evidence contains only the messages from: + \* msgsPropose, msgsPrevote, and msgsPrecommit + \A m \in evidence: + LET r == m.round + t == m.type + IN + CASE t = "PROPOSAL" -> m \in msgsPropose[r] + [] t = "PREVOTE" -> m \in msgsPrevote[r] + [] OTHER -> m \in msgsPrecommit[r] + +NoFutureMessagesForLargerRounds(p) == + \* a correct process does not send messages for the future rounds + \A r \in { rr \in Rounds: rr > round[p] }: + /\ \A m \in msgsPropose[r]: m.src /= p + /\ \A m \in msgsPrevote[r]: m.src /= p + /\ \A m \in msgsPrecommit[r]: m.src /= p + +NoFutureMessagesForCurrentRound(p) == + \* a correct process does not send messages in the future + LET r == round[p] IN + /\ Proposer[r] = p \/ \A m \in msgsPropose[r]: m.src /= p + /\ \/ step[p] \in {"PREVOTE", "PRECOMMIT", "DECIDED"} + \/ \A m \in msgsPrevote[r]: m.src /= p + /\ \/ step[p] \in {"PRECOMMIT", "DECIDED"} + \/ \A m \in msgsPrecommit[r]: m.src /= p + +\* the correct processes never send future messages +AllNoFutureMessagesSent == + \A p \in Corr: + /\ NoFutureMessagesForCurrentRound(p) + /\ NoFutureMessagesForLargerRounds(p) + +\* a correct process in the PREVOTE state has sent a PREVOTE message +IfInPrevoteThenSentPrevote(p) == + step[p] = "PREVOTE" => + \E m \in msgsPrevote[round[p]]: + /\ m.id \in ValidValues \cup { NilValue } + /\ m.src = p + +AllIfInPrevoteThenSentPrevote == + \A p \in Corr: IfInPrevoteThenSentPrevote(p) + +\* a correct process in the PRECOMMIT state has sent a PRECOMMIT message +IfInPrecommitThenSentPrecommit(p) == + step[p] = "PRECOMMIT" => + \E m \in msgsPrecommit[round[p]]: + /\ m.id \in ValidValues \cup { NilValue } + /\ m.src = p + +AllIfInPrecommitThenSentPrecommit == + \A p \in Corr: IfInPrecommitThenSentPrecommit(p) + +\* a process in the PRECOMMIT state has sent a PRECOMMIT message +IfInDecidedThenValidDecision(p) == + step[p] = "DECIDED" <=> decision[p] \in ValidValues + +AllIfInDecidedThenValidDecision == + \A p \in Corr: IfInDecidedThenValidDecision(p) + +\* a decided process should have received a proposal on its decision +IfInDecidedThenReceivedProposal(p) == + step[p] = "DECIDED" => + \E r \in Rounds: \* r is not necessarily round[p] + /\ \E m \in msgsPropose[r] \intersect evidence: + /\ m.src = Proposer[r] + /\ m.proposal = decision[p] + \* not inductive: /\ m.src \in Corr => (m.validRound <= r) + +AllIfInDecidedThenReceivedProposal == + \A p \in Corr: + IfInDecidedThenReceivedProposal(p) + +\* a decided process has received two-thirds of precommit messages +IfInDecidedThenReceivedTwoThirds(p) == + step[p] = "DECIDED" => + \E r \in Rounds: + LET PV == + { m \in msgsPrecommit[r] \intersect evidence: m.id = decision[p] } + IN + Cardinality(PV) >= THRESHOLD2 + +AllIfInDecidedThenReceivedTwoThirds == + \A p \in Corr: + IfInDecidedThenReceivedTwoThirds(p) + +\* for a round r, there is proposal by the round proposer for a valid round vr +ProposalInRound(r, proposedVal, vr) == + \E m \in msgsPropose[r]: + /\ m.src = Proposer[r] + /\ m.proposal = proposedVal + /\ m.validRound = vr + +TwoThirdsPrevotes(vr, v) == + LET PV == { mm \in msgsPrevote[vr] \intersect evidence: mm.id = v } IN + Cardinality(PV) >= THRESHOLD2 + +\* if a process sends a PREVOTE, then there are three possibilities: +\* 1) the process is faulty, 2) the PREVOTE cotains Nil, +\* 3) there is a proposal in an earlier (valid) round and two thirds of PREVOTES +IfSentPrevoteThenReceivedProposalOrTwoThirds(r) == + \A mpv \in msgsPrevote[r]: + \/ mpv.src \in Faulty + \* lockedRound and lockedValue is beyond my comprehension + \/ mpv.id = NilValue + \//\ mpv.src \in Corr + /\ mpv.id /= NilValue + /\ \/ ProposalInRound(r, mpv.id, NilRound) + \/ \E vr \in { rr \in Rounds: rr < r }: + /\ ProposalInRound(r, mpv.id, vr) + /\ TwoThirdsPrevotes(vr, mpv.id) + +AllIfSentPrevoteThenReceivedProposalOrTwoThirds == + \A r \in Rounds: + IfSentPrevoteThenReceivedProposalOrTwoThirds(r) + +\* if a correct process has sent a PRECOMMIT, then there are two thirds, +\* either on a valid value, or a nil value +IfSentPrecommitThenReceivedTwoThirds == + \A r \in Rounds: + \A mpc \in msgsPrecommit[r]: + mpc.src \in Corr => + \/ /\ mpc.id \in ValidValues + /\ LET PV == + { m \in msgsPrevote[r] \intersect evidence: m.id = mpc.id } + IN + Cardinality(PV) >= THRESHOLD2 + \/ /\ mpc.id = NilValue + /\ Cardinality(msgsPrevote[r]) >= THRESHOLD2 + +\* if a correct process has sent a precommit message in a round, it should +\* have sent a prevote +IfSentPrecommitThenSentPrevote == + \A r \in Rounds: + \A mpc \in msgsPrecommit[r]: + mpc.src \in Corr => + \E m \in msgsPrevote[r]: + m.src = mpc.src + +\* there is a locked round if a only if there is a locked value +LockedRoundIffLockedValue(p) == + (lockedRound[p] = NilRound) <=> (lockedValue[p] = NilValue) + +AllLockedRoundIffLockedValue == + \A p \in Corr: + LockedRoundIffLockedValue(p) + +\* when a process locked a round, it must have sent a precommit on the locked value. +IfLockedRoundThenSentCommit(p) == + lockedRound[p] /= NilRound + => \E r \in { rr \in Rounds: rr <= round[p] }: + \E m \in msgsPrecommit[r]: + m.src = p /\ m.id = lockedValue[p] + +AllIfLockedRoundThenSentCommit == + \A p \in Corr: + IfLockedRoundThenSentCommit(p) + +\* a process always locks the latest round, for which it has sent a PRECOMMIT +LatestPrecommitHasLockedRound(p) == + LET pPrecommits == + {mm \in UNION { msgsPrecommit[r]: r \in Rounds }: mm.src = p /\ mm.id /= NilValue } + IN + pPrecommits /= {} <: {MT} + => LET latest == + CHOOSE m \in pPrecommits: + \A m2 \in pPrecommits: + m2.round <= m.round + IN + /\ lockedRound[p] = latest.round + /\ lockedValue[p] = latest.id + +AllLatestPrecommitHasLockedRound == + \A p \in Corr: + LatestPrecommitHasLockedRound(p) + +\* Every correct process sends only one value or NilValue. +\* This test has quantifier alternation -- a threat to all decision procedures. +\* Luckily, the sets Corr and ValidValues are small. +NoEquivocationByCorrect(r, msgs) == + \A p \in Corr: + \E v \in ValidValues \union {NilValue}: + \A m \in msgs[r]: + \/ m.src /= p + \/ m.id = v + +\* a proposer nevers sends two values +ProposalsByProposer(r, msgs) == + \* if the proposer is not faulty, it sends only one value + \E v \in ValidValues: + \A m \in msgs[r]: + \/ m.src \in Faulty + \/ m.src = Proposer[r] /\ m.proposal = v + +AllNoEquivocationByCorrect == + \A r \in Rounds: + /\ ProposalsByProposer(r, msgsPropose) + /\ NoEquivocationByCorrect(r, msgsPrevote) + /\ NoEquivocationByCorrect(r, msgsPrecommit) + +\* construct the set of the message senders +Senders(M) == { m.src: m \in M } + +\* The final piece by Josef Widder: +\* if T + 1 processes precommit on the same value in a round, +\* then in the future rounds there are less than 2T + 1 prevotes for another value +PrecommitsLockValue == + \A r \in Rounds: + \A v \in ValidValues \union {NilValue}: + \/ LET Precommits == {m \in msgsPrecommit[r]: m.id = v} + IN + Cardinality(Senders(Precommits)) < THRESHOLD1 + \/ \A fr \in { rr \in Rounds: rr > r }: \* future rounds + \A w \in (ValuesOrNil) \ {v}: + LET Prevotes == {m \in msgsPrevote[fr]: m.id = w} + IN + Cardinality(Senders(Prevotes)) < THRESHOLD2 + +\* a combination of all lemmas +Inv == + /\ EvidenceContainsMessages + /\ AllNoFutureMessagesSent + /\ AllIfInPrevoteThenSentPrevote + /\ AllIfInPrecommitThenSentPrecommit + /\ AllIfInDecidedThenReceivedProposal + /\ AllIfInDecidedThenReceivedTwoThirds + /\ AllIfInDecidedThenValidDecision + /\ AllLockedRoundIffLockedValue + /\ AllIfLockedRoundThenSentCommit + /\ AllLatestPrecommitHasLockedRound + /\ AllIfSentPrevoteThenReceivedProposalOrTwoThirds + /\ IfSentPrecommitThenSentPrevote + /\ IfSentPrecommitThenReceivedTwoThirds + /\ AllNoEquivocationByCorrect + /\ PrecommitsLockValue + +\* this is the inductive invariant we like to check +TypedInv == TypeOK /\ Inv + +\* UNUSED FOR SAFETY +ValidRoundNotSmallerThanLockedRound(p) == + validRound[p] >= lockedRound[p] + +\* UNUSED FOR SAFETY +ValidRoundIffValidValue(p) == + (validRound[p] = NilRound) <=> (validValue[p] = NilValue) + +\* UNUSED FOR SAFETY +AllValidRoundIffValidValue == + \A p \in Corr: ValidRoundIffValidValue(p) + +\* if validRound is defined, then there are two-thirds of PREVOTEs +IfValidRoundThenTwoThirds(p) == + \/ validRound[p] = NilRound + \/ LET PV == { m \in msgsPrevote[validRound[p]]: m.id = validValue[p] } IN + Cardinality(PV) >= THRESHOLD2 + +\* UNUSED FOR SAFETY +AllIfValidRoundThenTwoThirds == + \A p \in Corr: IfValidRoundThenTwoThirds(p) + +\* a valid round can be only set to a valid value that was proposed earlier +IfValidRoundThenProposal(p) == + \/ validRound[p] = NilRound + \/ \E m \in msgsPropose[validRound[p]]: + m.proposal = validValue[p] + +\* UNUSED FOR SAFETY +AllIfValidRoundThenProposal == + \A p \in Corr: IfValidRoundThenProposal(p) + +(******************************** THEOREMS ***************************************) +(* Under this condition, the faulty processes can decide alone *) +FaultyQuorum == Cardinality(Faulty) >= THRESHOLD2 + +(* The standard condition of the Tendermint security model *) +LessThanThirdFaulty == N > 3 * T /\ Cardinality(Faulty) <= T + +(* + TypedInv is an inductive invariant, provided that there is no faulty quorum. + We run Apalache to prove this theorem only for fixed instances of 4 to 10 processes. + (We run Apalache manually, as it does not parse theorem statements at the moment.) + To get a parameterized argument, one has to use a theorem prover, e.g., TLAPS. + *) +THEOREM TypedInvIsInductive == + \/ FaultyQuorum \* if there are 2 * T + 1 faulty processes, we give up + \//\ Init => TypedInv + /\ TypedInv /\ [Next]_vars => TypedInv' + +(* + There should be no fork, when there are less than 1/3 faulty processes. + *) +THEOREM AgreementWhenLessThanThirdFaulty == + LessThanThirdFaulty /\ TypedInv => Agreement + +(* + In a more general case, when there are less than 2/3 faulty processes, + there is either Agreement (no fork), or two scenarios exist: + equivocation by Faulty, or amnesia by Faulty. + *) +THEOREM AgreementOrFork == + ~FaultyQuorum /\ TypedInv => Accountability + +============================================================================= + diff --git a/spec/light-client/accountability/TendermintAccTrace_004_draft.tla b/spec/light-client/accountability/TendermintAccTrace_004_draft.tla new file mode 100644 index 000000000..436c2275a --- /dev/null +++ b/spec/light-client/accountability/TendermintAccTrace_004_draft.tla @@ -0,0 +1,33 @@ +------------------ MODULE TendermintAccTrace_004_draft ------------------------- +(* + When Apalache is running too slow and we have an idea of a counterexample, + we use this module to restrict the behaviors only to certain actions. + Once the whole trace is replayed, the system deadlocks. + + Version 1. + + Igor Konnov, 2020. + *) + +EXTENDS Sequences, Apalache, TendermintAcc_004_draft + +\* a sequence of action names that should appear in the given order, +\* excluding "Init" +CONSTANT Trace + +VARIABLE toReplay + +TraceInit == + /\ toReplay = Trace + /\ action' := "Init" + /\ Init + +TraceNext == + /\ Len(toReplay) > 0 + /\ toReplay' = Tail(toReplay) + \* Here is the trick. We restrict the action to the expected one, + \* so the other actions will be pruned + /\ action' := Head(toReplay) + /\ Next + +================================================================================ diff --git a/spec/light-client/accountability/TendermintAcc_004_draft.tla b/spec/light-client/accountability/TendermintAcc_004_draft.tla new file mode 100644 index 000000000..952d8c569 --- /dev/null +++ b/spec/light-client/accountability/TendermintAcc_004_draft.tla @@ -0,0 +1,474 @@ +-------------------- MODULE TendermintAcc_004_draft --------------------------- +(* + A TLA+ specification of a simplified Tendermint consensus, tuned for + fork accountability. The simplifications are as follows: + + - the protocol runs for one height, that is, it is one-shot consensus + + - this specification focuses on safety, so timeouts are modelled + with non-determinism + + - the proposer function is non-determinstic, no fairness is assumed + + - the messages by the faulty processes are injected right in the initial states + + - every process has the voting power of 1 + + - hashes are modelled as identity + + Having the above assumptions in mind, the specification follows the pseudo-code + of the Tendermint paper: https://arxiv.org/abs/1807.04938 + + Byzantine processes can demonstrate arbitrary behavior, including + no communication. We show that if agreement is violated, then the Byzantine + processes demonstrate one of the two behaviors: + + - Equivocation: a Byzantine process may send two different values + in the same round. + + - Amnesia: a Byzantine process may lock a value without unlocking + the previous value that it has locked in the past. + + * Version 4. Remove defective processes, fix bugs, collect global evidence. + * Version 3. Modular and parameterized definitions. + * Version 2. Bugfixes in the spec and an inductive invariant. + * Version 1. A preliminary specification. + + Zarko Milosevic, Igor Konnov, Informal Systems, 2019-2020. + *) + +EXTENDS Integers, FiniteSets + +(********************* PROTOCOL PARAMETERS **********************************) +CONSTANTS + Corr, \* the set of correct processes + Faulty, \* the set of Byzantine processes, may be empty + N, \* the total number of processes: correct, defective, and Byzantine + T, \* an upper bound on the number of Byzantine processes + ValidValues, \* the set of valid values, proposed both by correct and faulty + InvalidValues, \* the set of invalid values, never proposed by the correct ones + MaxRound, \* the maximal round number + Proposer \* the proposer function from 0..NRounds to 1..N + +ASSUME(N = Cardinality(Corr \union Faulty)) + +(*************************** DEFINITIONS ************************************) +AllProcs == Corr \union Faulty \* the set of all processes +Rounds == 0..MaxRound \* the set of potential rounds +NilRound == -1 \* a special value to denote a nil round, outside of Rounds +RoundsOrNil == Rounds \union {NilRound} +Values == ValidValues \union InvalidValues \* the set of all values +NilValue == "None" \* a special value for a nil round, outside of Values +ValuesOrNil == Values \union {NilValue} + +\* a value hash is modeled as identity +Id(v) == v + +\* The validity predicate +IsValid(v) == v \in ValidValues + +\* the two thresholds that are used in the algorithm +THRESHOLD1 == T + 1 \* at least one process is not faulty +THRESHOLD2 == 2 * T + 1 \* a quorum when having N > 3 * T + +(********************* TYPE ANNOTATIONS FOR APALACHE **************************) +\* the operator for type annotations +a <: b == a + +\* the type of message records +MT == [type |-> STRING, src |-> STRING, round |-> Int, + proposal |-> STRING, validRound |-> Int, id |-> STRING] + +\* a type annotation for a message +AsMsg(m) == m <: MT +\* a type annotation for a set of messages +SetOfMsgs(S) == S <: {MT} +\* a type annotation for an empty set of messages +EmptyMsgSet == SetOfMsgs({}) + +(********************* PROTOCOL STATE VARIABLES ******************************) +VARIABLES + round, \* a process round number: Corr -> Rounds + step, \* a process step: Corr -> { "PROPOSE", "PREVOTE", "PRECOMMIT", "DECIDED" } + decision, \* process decision: Corr -> ValuesOrNil + lockedValue, \* a locked value: Corr -> ValuesOrNil + lockedRound, \* a locked round: Corr -> RoundsOrNil + validValue, \* a valid value: Corr -> ValuesOrNil + validRound \* a valid round: Corr -> RoundsOrNil + +\* book-keeping variables +VARIABLES + msgsPropose, \* PROPOSE messages broadcast in the system, Rounds -> Messages + msgsPrevote, \* PREVOTE messages broadcast in the system, Rounds -> Messages + msgsPrecommit, \* PRECOMMIT messages broadcast in the system, Rounds -> Messages + evidence, \* the messages that were used by the correct processes to make transitions + action \* we use this variable to see which action was taken + +(* to see a type invariant, check TendermintAccInv3 *) + +\* a handy definition used in UNCHANGED +vars == <> + +(********************* PROTOCOL INITIALIZATION ******************************) +FaultyProposals(r) == + SetOfMsgs([type: {"PROPOSAL"}, src: Faulty, + round: {r}, proposal: Values, validRound: RoundsOrNil]) + +AllFaultyProposals == + SetOfMsgs([type: {"PROPOSAL"}, src: Faulty, + round: Rounds, proposal: Values, validRound: RoundsOrNil]) + +FaultyPrevotes(r) == + SetOfMsgs([type: {"PREVOTE"}, src: Faulty, round: {r}, id: Values]) + +AllFaultyPrevotes == + SetOfMsgs([type: {"PREVOTE"}, src: Faulty, round: Rounds, id: Values]) + +FaultyPrecommits(r) == + SetOfMsgs([type: {"PRECOMMIT"}, src: Faulty, round: {r}, id: Values]) + +AllFaultyPrecommits == + SetOfMsgs([type: {"PRECOMMIT"}, src: Faulty, round: Rounds, id: Values]) + +BenignRoundsInMessages(msgfun) == + \* the message function never contains a message for a wrong round + \A r \in Rounds: + \A m \in msgfun[r]: + r = m.round + +\* The initial states of the protocol. Some faults can be in the system already. +Init == + /\ round = [p \in Corr |-> 0] + /\ step = [p \in Corr |-> "PROPOSE"] + /\ decision = [p \in Corr |-> NilValue] + /\ lockedValue = [p \in Corr |-> NilValue] + /\ lockedRound = [p \in Corr |-> NilRound] + /\ validValue = [p \in Corr |-> NilValue] + /\ validRound = [p \in Corr |-> NilRound] + /\ msgsPropose \in [Rounds -> SUBSET AllFaultyProposals] + /\ msgsPrevote \in [Rounds -> SUBSET AllFaultyPrevotes] + /\ msgsPrecommit \in [Rounds -> SUBSET AllFaultyPrecommits] + /\ BenignRoundsInMessages(msgsPropose) + /\ BenignRoundsInMessages(msgsPrevote) + /\ BenignRoundsInMessages(msgsPrecommit) + /\ evidence = EmptyMsgSet + /\ action' = "Init" + +(************************ MESSAGE PASSING ********************************) +BroadcastProposal(pSrc, pRound, pProposal, pValidRound) == + LET newMsg == + AsMsg([type |-> "PROPOSAL", src |-> pSrc, round |-> pRound, + proposal |-> pProposal, validRound |-> pValidRound]) + IN + msgsPropose' = [msgsPropose EXCEPT ![pRound] = msgsPropose[pRound] \union {newMsg}] + +BroadcastPrevote(pSrc, pRound, pId) == + LET newMsg == AsMsg([type |-> "PREVOTE", + src |-> pSrc, round |-> pRound, id |-> pId]) + IN + msgsPrevote' = [msgsPrevote EXCEPT ![pRound] = msgsPrevote[pRound] \union {newMsg}] + +BroadcastPrecommit(pSrc, pRound, pId) == + LET newMsg == AsMsg([type |-> "PRECOMMIT", + src |-> pSrc, round |-> pRound, id |-> pId]) + IN + msgsPrecommit' = [msgsPrecommit EXCEPT ![pRound] = msgsPrecommit[pRound] \union {newMsg}] + + +(********************* PROTOCOL TRANSITIONS ******************************) +\* lines 12-13 +StartRound(p, r) == + /\ step[p] /= "DECIDED" \* a decided process does not participate in consensus + /\ round' = [round EXCEPT ![p] = r] + /\ step' = [step EXCEPT ![p] = "PROPOSE"] + +\* lines 14-19, a proposal may be sent later +InsertProposal(p) == + LET r == round[p] IN + /\ p = Proposer[r] + /\ step[p] = "PROPOSE" + \* if the proposer is sending a proposal, then there are no other proposals + \* by the correct processes for the same round + /\ \A m \in msgsPropose[r]: m.src /= p + /\ \E v \in ValidValues: + LET proposal == IF validValue[p] /= NilValue THEN validValue[p] ELSE v IN + BroadcastProposal(p, round[p], proposal, validRound[p]) + /\ UNCHANGED <> + /\ action' = "InsertProposal" + +\* lines 22-27 +UponProposalInPropose(p) == + \E v \in Values: + /\ step[p] = "PROPOSE" (* line 22 *) + /\ LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> v, validRound |-> NilRound]) IN + /\ msg \in msgsPropose[round[p]] \* line 22 + /\ evidence' = {msg} \union evidence + /\ LET mid == (* line 23 *) + IF IsValid(v) /\ (lockedRound[p] = NilRound \/ lockedValue[p] = v) + THEN Id(v) + ELSE NilValue + IN + BroadcastPrevote(p, round[p], mid) \* lines 24-26 + /\ step' = [step EXCEPT ![p] = "PREVOTE"] + /\ UNCHANGED <> + /\ action' = "UponProposalInPropose" + +\* lines 28-33 +UponProposalInProposeAndPrevote(p) == + \E v \in Values, vr \in Rounds: + /\ step[p] = "PROPOSE" /\ 0 <= vr /\ vr < round[p] \* line 28, the while part + /\ LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> v, validRound |-> vr]) + IN + /\ msg \in msgsPropose[round[p]] \* line 28 + /\ LET PV == { m \in msgsPrevote[vr]: m.id = Id(v) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 28 + /\ evidence' = PV \union {msg} \union evidence + /\ LET mid == (* line 29 *) + IF IsValid(v) /\ (lockedRound[p] <= vr \/ lockedValue[p] = v) + THEN Id(v) + ELSE NilValue + IN + BroadcastPrevote(p, round[p], mid) \* lines 24-26 + /\ step' = [step EXCEPT ![p] = "PREVOTE"] + /\ UNCHANGED <> + /\ action' = "UponProposalInProposeAndPrevote" + + \* lines 34-35 + lines 61-64 (onTimeoutPrevote) +UponQuorumOfPrevotesAny(p) == + /\ step[p] = "PREVOTE" \* line 34 and 61 + /\ \E MyEvidence \in SUBSET msgsPrevote[round[p]]: + \* find the unique voters in the evidence + LET Voters == { m.src: m \in MyEvidence } IN + \* compare the number of the unique voters against the threshold + /\ Cardinality(Voters) >= THRESHOLD2 \* line 34 + /\ evidence' = MyEvidence \union evidence + /\ BroadcastPrecommit(p, round[p], NilValue) + /\ step' = [step EXCEPT ![p] = "PRECOMMIT"] + /\ UNCHANGED <> + /\ action' = "UponQuorumOfPrevotesAny" + +\* lines 36-46 +UponProposalInPrevoteOrCommitAndPrevote(p) == + \E v \in ValidValues, vr \in RoundsOrNil: + /\ step[p] \in {"PREVOTE", "PRECOMMIT"} \* line 36 + /\ LET msg == + AsMsg([type |-> "PROPOSAL", src |-> Proposer[round[p]], + round |-> round[p], proposal |-> v, validRound |-> vr]) IN + /\ msg \in msgsPropose[round[p]] \* line 36 + /\ LET PV == { m \in msgsPrevote[round[p]]: m.id = Id(v) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 36 + /\ evidence' = PV \union {msg} \union evidence + /\ IF step[p] = "PREVOTE" + THEN \* lines 38-41: + /\ lockedValue' = [lockedValue EXCEPT ![p] = v] + /\ lockedRound' = [lockedRound EXCEPT ![p] = round[p]] + /\ BroadcastPrecommit(p, round[p], Id(v)) + /\ step' = [step EXCEPT ![p] = "PRECOMMIT"] + ELSE + UNCHANGED <> + \* lines 42-43 + /\ validValue' = [validValue EXCEPT ![p] = v] + /\ validRound' = [validRound EXCEPT ![p] = round[p]] + /\ UNCHANGED <> + /\ action' = "UponProposalInPrevoteOrCommitAndPrevote" + +\* lines 47-48 + 65-67 (onTimeoutPrecommit) +UponQuorumOfPrecommitsAny(p) == + /\ \E MyEvidence \in SUBSET msgsPrecommit[round[p]]: + \* find the unique committers in the evidence + LET Committers == { m.src: m \in MyEvidence } IN + \* compare the number of the unique committers against the threshold + /\ Cardinality(Committers) >= THRESHOLD2 \* line 47 + /\ evidence' = MyEvidence \union evidence + /\ round[p] + 1 \in Rounds + /\ StartRound(p, round[p] + 1) + /\ UNCHANGED <> + /\ action' = "UponQuorumOfPrecommitsAny" + +\* lines 49-54 +UponProposalInPrecommitNoDecision(p) == + /\ decision[p] = NilValue \* line 49 + /\ \E v \in ValidValues (* line 50*) , r \in Rounds, vr \in RoundsOrNil: + /\ LET msg == AsMsg([type |-> "PROPOSAL", src |-> Proposer[r], + round |-> r, proposal |-> v, validRound |-> vr]) IN + /\ msg \in msgsPropose[r] \* line 49 + /\ LET PV == { m \in msgsPrecommit[r]: m.id = Id(v) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 49 + /\ evidence' = PV \union {msg} \union evidence + /\ decision' = [decision EXCEPT ![p] = v] \* update the decision, line 51 + \* The original algorithm does not have 'DECIDED', but it increments the height. + \* We introduced 'DECIDED' here to prevent the process from changing its decision. + /\ step' = [step EXCEPT ![p] = "DECIDED"] + /\ UNCHANGED <> + /\ action' = "UponProposalInPrecommitNoDecision" + +\* the actions below are not essential for safety, but added for completeness + +\* lines 20-21 + 57-60 +OnTimeoutPropose(p) == + /\ step[p] = "PROPOSE" + /\ p /= Proposer[round[p]] + /\ BroadcastPrevote(p, round[p], NilValue) + /\ step' = [step EXCEPT ![p] = "PREVOTE"] + /\ UNCHANGED <> + /\ action' = "OnTimeoutPropose" + +\* lines 44-46 +OnQuorumOfNilPrevotes(p) == + /\ step[p] = "PREVOTE" + /\ LET PV == { m \in msgsPrevote[round[p]]: m.id = Id(NilValue) } IN + /\ Cardinality(PV) >= THRESHOLD2 \* line 36 + /\ evidence' = PV \union evidence + /\ BroadcastPrecommit(p, round[p], Id(NilValue)) + /\ step' = [step EXCEPT ![p] = "PRECOMMIT"] + /\ UNCHANGED <> + /\ action' = "OnQuorumOfNilPrevotes" + +\* lines 55-56 +OnRoundCatchup(p) == + \E r \in {rr \in Rounds: rr > round[p]}: + LET RoundMsgs == msgsPropose[r] \union msgsPrevote[r] \union msgsPrecommit[r] IN + \E MyEvidence \in SUBSET RoundMsgs: + LET Faster == { m.src: m \in MyEvidence } IN + /\ Cardinality(Faster) >= THRESHOLD1 + /\ evidence' = MyEvidence \union evidence + /\ StartRound(p, r) + /\ UNCHANGED <> + /\ action' = "OnRoundCatchup" + +(* + * A system transition. In this specificatiom, the system may eventually deadlock, + * e.g., when all processes decide. This is expected behavior, as we focus on safety. + *) +Next == + \E p \in Corr: + \/ InsertProposal(p) + \/ UponProposalInPropose(p) + \/ UponProposalInProposeAndPrevote(p) + \/ UponQuorumOfPrevotesAny(p) + \/ UponProposalInPrevoteOrCommitAndPrevote(p) + \/ UponQuorumOfPrecommitsAny(p) + \/ UponProposalInPrecommitNoDecision(p) + \* the actions below are not essential for safety, but added for completeness + \/ OnTimeoutPropose(p) + \/ OnQuorumOfNilPrevotes(p) + \/ OnRoundCatchup(p) + + +(**************************** FORK SCENARIOS ***************************) + +\* equivocation by a process p +EquivocationBy(p) == + \E m1, m2 \in evidence: + /\ m1 /= m2 + /\ m1.src = p + /\ m2.src = p + /\ m1.round = m2.round + /\ m1.type = m2.type + +\* amnesic behavior by a process p +AmnesiaBy(p) == + \E r1, r2 \in Rounds: + /\ r1 < r2 + /\ \E v1, v2 \in ValidValues: + /\ v1 /= v2 + /\ AsMsg([type |-> "PRECOMMIT", src |-> p, + round |-> r1, id |-> Id(v1)]) \in evidence + /\ AsMsg([type |-> "PREVOTE", src |-> p, + round |-> r2, id |-> Id(v2)]) \in evidence + /\ \A r \in { rnd \in Rounds: r1 <= rnd /\ rnd < r2 }: + LET prevotes == + { m \in evidence: + m.type = "PREVOTE" /\ m.round = r /\ m.id = Id(v2) } + IN + Cardinality(prevotes) < THRESHOLD2 + +(******************************** PROPERTIES ***************************************) + +\* the safety property -- agreement +Agreement == + \A p, q \in Corr: + \/ decision[p] = NilValue + \/ decision[q] = NilValue + \/ decision[p] = decision[q] + +\* the protocol validity +Validity == + \A p \in Corr: decision[p] \in ValidValues \union {NilValue} + +(* + The protocol safety. Two cases are possible: + 1. There is no fork, that is, Agreement holds true. + 2. A subset of faulty processes demonstrates equivocation or amnesia. + *) +Accountability == + \/ Agreement + \/ \E Detectable \in SUBSET Faulty: + /\ Cardinality(Detectable) >= THRESHOLD1 + /\ \A p \in Detectable: + EquivocationBy(p) \/ AmnesiaBy(p) + +(****************** FALSE INVARIANTS TO PRODUCE EXAMPLES ***********************) + +\* This property is violated. You can check it to see how amnesic behavior +\* appears in the evidence variable. +NoAmnesia == + \A p \in Faulty: ~AmnesiaBy(p) + +\* This property is violated. You can check it to see an example of equivocation. +NoEquivocation == + \A p \in Faulty: ~EquivocationBy(p) + +\* This property is violated. You can check it to see an example of agreement. +\* It is not exactly ~Agreement, as we do not want to see the states where +\* decision[p] = NilValue +NoAgreement == + \A p, q \in Corr: + (p /= q /\ decision[p] /= NilValue /\ decision[q] /= NilValue) + => decision[p] /= decision[q] + +\* Either agreement holds, or the faulty processes indeed demonstrate amnesia. +\* This property is violated. A counterexample should demonstrate equivocation. +AgreementOrAmnesia == + Agreement \/ (\A p \in Faulty: AmnesiaBy(p)) + +\* We expect this property to be violated. It shows us a protocol run, +\* where one faulty process demonstrates amnesia without equivocation. +\* However, the absence of amnesia +\* is a tough constraint for Apalache. It has not reported a counterexample +\* for n=4,f=2, length <= 5. +ShowMeAmnesiaWithoutEquivocation == + (~Agreement /\ \E p \in Faulty: ~EquivocationBy(p)) + => \A p \in Faulty: ~AmnesiaBy(p) + +\* This property is violated on n=4,f=2, length=4 in less than 10 min. +\* Two faulty processes may demonstrate amnesia without equivocation. +AmnesiaImpliesEquivocation == + (\E p \in Faulty: AmnesiaBy(p)) => (\E q \in Faulty: EquivocationBy(q)) + +(* + This property is violated. You can check it to see that all correct processes + may reach MaxRound without making a decision. + *) +NeverUndecidedInMaxRound == + LET AllInMax == \A p \in Corr: round[p] = MaxRound + AllDecided == \A p \in Corr: decision[p] /= NilValue + IN + AllInMax => AllDecided + +============================================================================= + diff --git a/spec/light-client/accountability/results/001indinv-apalache-mem-log.svg b/spec/light-client/accountability/results/001indinv-apalache-mem-log.svg new file mode 100644 index 000000000..5821418da --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-mem-log.svg @@ -0,0 +1,1063 @@ + + + + + + + + + 2020-12-11T20:07:39.617177 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/light-client/accountability/results/001indinv-apalache-mem.svg b/spec/light-client/accountability/results/001indinv-apalache-mem.svg new file mode 100644 index 000000000..dc7213eae --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-mem.svg @@ -0,0 +1,1141 @@ + + + + + + + + + 2020-12-11T20:07:40.321995 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/light-client/accountability/results/001indinv-apalache-ncells.svg b/spec/light-client/accountability/results/001indinv-apalache-ncells.svg new file mode 100644 index 000000000..20c49f4f1 --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-ncells.svg @@ -0,0 +1,1015 @@ + + + + + + + + + 2020-12-11T20:07:40.804886 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/light-client/accountability/results/001indinv-apalache-nclauses.svg b/spec/light-client/accountability/results/001indinv-apalache-nclauses.svg new file mode 100644 index 000000000..86d19143b --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-nclauses.svg @@ -0,0 +1,1133 @@ + + + + + + + + + 2020-12-11T20:07:41.276750 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/light-client/accountability/results/001indinv-apalache-report.md b/spec/light-client/accountability/results/001indinv-apalache-report.md new file mode 100644 index 000000000..0c14742c5 --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-report.md @@ -0,0 +1,61 @@ +# Results of 001indinv-apalache + +## 1. Awesome plots + +### 1.1. Time (logarithmic scale) + +![time-log](001indinv-apalache-time-log.svg "Time Log") + +### 1.2. Time (linear) + +![time-log](001indinv-apalache-time.svg "Time Log") + +### 1.3. Memory (logarithmic scale) + +![mem-log](001indinv-apalache-mem-log.svg "Memory Log") + +### 1.4. Memory (linear) + +![mem](001indinv-apalache-mem.svg "Memory Log") + +### 1.5. Number of arena cells (linear) + +![ncells](001indinv-apalache-ncells.svg "Number of arena cells") + +### 1.6. Number of SMT clauses (linear) + +![nclauses](001indinv-apalache-nclauses.svg "Number of SMT clauses") + +## 2. Input parameters + +no | filename | tool | timeout | init | inv | next | args +----|----------------|------------|-----------|------------|------------------|--------|------------------------------ +1 | MC_n4_f1.tla | apalache | 10h | TypedInv | TypedInv | | --length=1 --cinit=ConstInit +2 | MC_n4_f2.tla | apalache | 10h | TypedInv | TypedInv | | --length=1 --cinit=ConstInit +3 | MC_n5_f1.tla | apalache | 10h | TypedInv | TypedInv | | --length=1 --cinit=ConstInit +4 | MC_n5_f2.tla | apalache | 10h | TypedInv | TypedInv | | --length=1 --cinit=ConstInit +5 | MC_n4_f1.tla | apalache | 20h | Init | TypedInv | | --length=0 --cinit=ConstInit +6 | MC_n4_f2.tla | apalache | 20h | Init | TypedInv | | --length=0 --cinit=ConstInit +7 | MC_n5_f1.tla | apalache | 20h | Init | TypedInv | | --length=0 --cinit=ConstInit +8 | MC_n5_f2.tla | apalache | 20h | Init | TypedInv | | --length=0 --cinit=ConstInit +9 | MC_n4_f1.tla | apalache | 20h | TypedInv | Agreement | | --length=0 --cinit=ConstInit +10 | MC_n4_f2.tla | apalache | 20h | TypedInv | Accountability | | --length=0 --cinit=ConstInit +11 | MC_n5_f1.tla | apalache | 20h | TypedInv | Agreement | | --length=0 --cinit=ConstInit +12 | MC_n5_f2.tla | apalache | 20h | TypedInv | Accountability | | --length=0 --cinit=ConstInit + +## 3. Detailed results: 001indinv-apalache-unstable.csv + +01:no | 02:tool | 03:status | 04:time_sec | 05:depth | 05:mem_kb | 10:ninit_trans | 11:ninit_trans | 12:ncells | 13:nclauses | 14:navg_clause_len +-------|------------|-------------|---------------|------------|-------------|------------------|------------------|-------------|---------------|-------------------- +1 | apalache | NoError | 11m | 1 | 3.0GB | 0 | 0 | 217K | 1.0M | 89 +2 | apalache | NoError | 11m | 1 | 3.0GB | 0 | 0 | 207K | 1.0M | 88 +3 | apalache | NoError | 16m | 1 | 4.0GB | 0 | 0 | 311K | 2.0M | 101 +4 | apalache | NoError | 14m | 1 | 3.0GB | 0 | 0 | 290K | 1.0M | 103 +5 | apalache | NoError | 9s | 0 | 563MB | 0 | 0 | 2.0K | 14K | 42 +6 | apalache | NoError | 10s | 0 | 657MB | 0 | 0 | 2.0K | 28K | 43 +7 | apalache | NoError | 8s | 0 | 635MB | 0 | 0 | 2.0K | 17K | 44 +8 | apalache | NoError | 10s | 0 | 667MB | 0 | 0 | 3.0K | 32K | 45 +9 | apalache | NoError | 5m05s | 0 | 2.0GB | 0 | 0 | 196K | 889K | 108 +10 | apalache | NoError | 8m08s | 0 | 6.0GB | 0 | 0 | 2.0M | 3.0M | 34 +11 | apalache | NoError | 9m09s | 0 | 3.0GB | 0 | 0 | 284K | 1.0M | 128 +12 | apalache | NoError | 14m | 0 | 7.0GB | 0 | 0 | 4.0M | 5.0M | 38 diff --git a/spec/light-client/accountability/results/001indinv-apalache-time-log.svg b/spec/light-client/accountability/results/001indinv-apalache-time-log.svg new file mode 100644 index 000000000..458d67c6c --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-time-log.svg @@ -0,0 +1,1134 @@ + + + + + + + + + 2020-12-11T20:07:38.347583 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/light-client/accountability/results/001indinv-apalache-time.svg b/spec/light-client/accountability/results/001indinv-apalache-time.svg new file mode 100644 index 000000000..a5db5a8b5 --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-time.svg @@ -0,0 +1,957 @@ + + + + + + + + + 2020-12-11T20:07:39.136767 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/light-client/accountability/results/001indinv-apalache-unstable.csv b/spec/light-client/accountability/results/001indinv-apalache-unstable.csv new file mode 100644 index 000000000..db1a06093 --- /dev/null +++ b/spec/light-client/accountability/results/001indinv-apalache-unstable.csv @@ -0,0 +1,13 @@ +01:no,02:tool,03:status,04:time_sec,05:depth,05:mem_kb,10:ninit_trans,11:ninit_trans,12:ncells,13:nclauses,14:navg_clause_len +1,apalache,NoError,704,1,3215424,0,0,217385,1305718,89 +2,apalache,NoError,699,1,3195020,0,0,207969,1341979,88 +3,apalache,NoError,1018,1,4277060,0,0,311798,2028544,101 +4,apalache,NoError,889,1,4080012,0,0,290989,1951616,103 +5,apalache,NoError,9,0,577100,0,0,2045,14655,42 +6,apalache,NoError,10,0,673772,0,0,2913,28213,43 +7,apalache,NoError,8,0,651008,0,0,2214,17077,44 +8,apalache,NoError,10,0,683188,0,0,3082,32651,45 +9,apalache,NoError,340,0,3053848,0,0,196943,889859,108 +10,apalache,NoError,517,0,6424536,0,0,2856378,3802779,34 +11,apalache,NoError,587,0,4028516,0,0,284369,1343296,128 +12,apalache,NoError,880,0,7881148,0,0,4382556,5778072,38 diff --git a/spec/light-client/accountability/run.sh b/spec/light-client/accountability/run.sh new file mode 100755 index 000000000..75e57a5f8 --- /dev/null +++ b/spec/light-client/accountability/run.sh @@ -0,0 +1,9 @@ +#!/bin/sh +# +# The script to run all experiments at once + +export SCRIPTS_DIR=~/devl/apalache-tests/scripts +export BUILDS="unstable" +export BENCHMARK=001indinv-apalache +export RUN_SCRIPT=./run-all.sh # alternatively, use ./run-parallel.sh +make -e -f ~/devl/apalache-tests/Makefile.common diff --git a/spec/light-client/assets/light-node-image.png b/spec/light-client/assets/light-node-image.png new file mode 100644 index 000000000..f0b93c6e4 Binary files /dev/null and b/spec/light-client/assets/light-node-image.png differ diff --git a/spec/light-client/attacks/Blockchain_003_draft.tla b/spec/light-client/attacks/Blockchain_003_draft.tla new file mode 100644 index 000000000..fb6e6e8e8 --- /dev/null +++ b/spec/light-client/attacks/Blockchain_003_draft.tla @@ -0,0 +1,166 @@ +------------------------ MODULE Blockchain_003_draft ----------------------------- +(* + This is a high-level specification of Tendermint blockchain + that is designed specifically for the light client. + Validators have the voting power of one. If you like to model various + voting powers, introduce multiple copies of the same validator + (do not forget to give them unique names though). + *) +EXTENDS Integers, FiniteSets, Apalache + +Min(a, b) == IF a < b THEN a ELSE b + +CONSTANT + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + ULTIMATE_HEIGHT, + (* a maximal height that can be ever reached (modelling artifact) *) + TRUSTING_PERIOD + (* the period within which the validators are trusted *) + +Heights == 1..ULTIMATE_HEIGHT (* possible heights *) + +(* A commit is just a set of nodes who have committed the block *) +Commits == SUBSET AllNodes + +(* The set of all block headers that can be on the blockchain. + This is a simplified version of the Block data structure in the actual implementation. *) +BlockHeaders == [ + height: Heights, + \* the block height + time: Int, + \* the block timestamp in some integer units + lastCommit: Commits, + \* the nodes who have voted on the previous block, the set itself instead of a hash + (* in the implementation, only the hashes of V and NextV are stored in a block, + as V and NextV are stored in the application state *) + VS: SUBSET AllNodes, + \* the validators of this bloc. We store the validators instead of the hash. + NextVS: SUBSET AllNodes + \* the validators of the next block. We store the next validators instead of the hash. +] + +(* A signed header is just a header together with a set of commits *) +LightBlocks == [header: BlockHeaders, Commits: Commits] + +VARIABLES + refClock, + (* the current global time in integer units as perceived by the reference chain *) + blockchain, + (* A sequence of BlockHeaders, which gives us a bird view of the blockchain. *) + Faulty + (* A set of faulty nodes, which can act as validators. We assume that the set + of faulty processes is non-decreasing. If a process has recovered, it should + connect using a different id. *) + +(* all variables, to be used with UNCHANGED *) +vars == <> + +(* The set of all correct nodes in a state *) +Corr == AllNodes \ Faulty + +(* APALACHE annotations *) +a <: b == a \* type annotation + +NT == STRING +NodeSet(S) == S <: {NT} +EmptyNodeSet == NodeSet({}) + +BT == [height |-> Int, time |-> Int, lastCommit |-> {NT}, VS |-> {NT}, NextVS |-> {NT}] + +LBT == [header |-> BT, Commits |-> {NT}] +(* end of APALACHE annotations *) + +(****************************** BLOCKCHAIN ************************************) + +(* the header is still within the trusting period *) +InTrustingPeriod(header) == + refClock < header.time + TRUSTING_PERIOD + +(* + Given a function pVotingPower \in D -> Powers for some D \subseteq AllNodes + and pNodes \subseteq D, test whether the set pNodes \subseteq AllNodes has + more than 2/3 of voting power among the nodes in D. + *) +TwoThirds(pVS, pNodes) == + LET TP == Cardinality(pVS) + SP == Cardinality(pVS \intersect pNodes) + IN + 3 * SP > 2 * TP \* when thinking in real numbers, not integers: SP > 2.0 / 3.0 * TP + +(* + Given a set of FaultyNodes, test whether the voting power of the correct nodes in D + is more than 2/3 of the voting power of the faulty nodes in D. + + Parameters: + - pFaultyNodes is a set of nodes that are considered faulty + - pVS is a set of all validators, maybe including Faulty, intersecting with it, etc. + - pMaxFaultRatio is a pair <> that limits the ratio a / b of the faulty + validators from above (exclusive) + *) +FaultyValidatorsFewerThan(pFaultyNodes, pVS, maxRatio) == + LET FN == pFaultyNodes \intersect pVS \* faulty nodes in pNodes + CN == pVS \ pFaultyNodes \* correct nodes in pNodes + CP == Cardinality(CN) \* power of the correct nodes + FP == Cardinality(FN) \* power of the faulty nodes + IN + \* CP + FP = TP is the total voting power + LET TP == CP + FP IN + FP * maxRatio[2] < TP * maxRatio[1] + +(* Can a block be produced by a correct peer, or an authenticated Byzantine peer *) +IsLightBlockAllowedByDigitalSignatures(ht, block) == + \/ block.header = blockchain[ht] \* signed by correct and faulty (maybe) + \/ /\ block.Commits \subseteq Faulty + /\ block.header.height = ht + /\ block.header.time >= 0 \* signed only by faulty + +(* + Initialize the blockchain to the ultimate height right in the initial states. + We pick the faulty validators statically, but that should not affect the light client. + + Parameters: + - pMaxFaultyRatioExclusive is a pair <> that bound the number of + faulty validators in each block by the ratio a / b (exclusive) + *) +InitToHeight(pMaxFaultyRatioExclusive) == + /\ \E Nodes \in SUBSET AllNodes: + Faulty := Nodes \* pick a subset of nodes to be faulty + \* pick the validator sets and last commits + /\ \E vs, lastCommit \in [Heights -> SUBSET AllNodes]: + \E timestamp \in [Heights -> Int]: + \* refClock is at least as early as the timestamp in the last block + /\ \E tm \in Int: + refClock := tm /\ tm >= timestamp[ULTIMATE_HEIGHT] + \* the genesis starts on day 1 + /\ timestamp[1] = 1 + /\ vs[1] = AllNodes + /\ lastCommit[1] = EmptyNodeSet + /\ \A h \in Heights \ {1}: + /\ lastCommit[h] \subseteq vs[h - 1] \* the non-validators cannot commit + /\ TwoThirds(vs[h - 1], lastCommit[h]) \* the commit has >2/3 of validator votes + \* the faulty validators have the power below the threshold + /\ FaultyValidatorsFewerThan(Faulty, vs[h], pMaxFaultyRatioExclusive) + /\ timestamp[h] > timestamp[h - 1] \* the time grows monotonically + /\ timestamp[h] < timestamp[h - 1] + TRUSTING_PERIOD \* but not too fast + \* form the block chain out of validator sets and commits (this makes apalache faster) + /\ blockchain := [h \in Heights |-> + [height |-> h, + time |-> timestamp[h], + VS |-> vs[h], + NextVS |-> IF h < ULTIMATE_HEIGHT THEN vs[h + 1] ELSE AllNodes, + lastCommit |-> lastCommit[h]] + ] \****** + +(********************* BLOCKCHAIN ACTIONS ********************************) +(* + Advance the clock by zero or more time units. + *) +AdvanceTime == + /\ \E tm \in Int: tm >= refClock /\ refClock' = tm + /\ UNCHANGED <> + +============================================================================= +\* Modification History +\* Last modified Wed Jun 10 14:10:54 CEST 2020 by igor +\* Created Fri Oct 11 15:45:11 CEST 2019 by igor diff --git a/spec/light-client/attacks/Isolation_001_draft.tla b/spec/light-client/attacks/Isolation_001_draft.tla new file mode 100644 index 000000000..7406b8942 --- /dev/null +++ b/spec/light-client/attacks/Isolation_001_draft.tla @@ -0,0 +1,159 @@ +----------------------- MODULE Isolation_001_draft ---------------------------- +(** + * The specification of the attackers isolation at full node, + * when it has received an evidence from the light client. + * We check that the isolation spec produces a set of validators + * that have more than 1/3 of the voting power. + * + * It follows the English specification: + * + * https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/attacks/isolate-attackers_001_draft.md + * + * The assumptions made in this specification: + * + * - the voting power of every validator is 1 + * (add more validators, if you need more validators) + * + * - Tendermint security model is violated + * (there are Byzantine validators who signed a conflicting block) + * + * Igor Konnov, Zarko Milosevic, Josef Widder, Informal Systems, 2020 + *) + + +EXTENDS Integers, FiniteSets, Apalache + +\* algorithm parameters +CONSTANTS + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + COMMON_HEIGHT, + (* an index of the block header that two peers agree upon *) + CONFLICT_HEIGHT, + (* an index of the block header that two peers disagree upon *) + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + FAULTY_RATIO + (* a pair <> that limits that ratio of faulty validator in the blockchain + from above (exclusive). Tendermint security model prescribes 1 / 3. *) + +VARIABLES + blockchain, (* the chain at the full node *) + refClock, (* the reference clock at the full node *) + Faulty, (* the set of faulty validators *) + conflictingBlock, (* an evidence that two peers reported conflicting blocks *) + state, (* the state of the attack isolation machine at the full node *) + attackers (* the set of the identified attackers *) + +vars == <> + +\* instantiate the chain at the full node +ULTIMATE_HEIGHT == CONFLICT_HEIGHT + 1 +BC == INSTANCE Blockchain_003_draft + +\* use the light client API +TRUSTING_HEIGHT == COMMON_HEIGHT +TARGET_HEIGHT == CONFLICT_HEIGHT + +LC == INSTANCE LCVerificationApi_003_draft + WITH localClock <- refClock, REAL_CLOCK_DRIFT <- 0, CLOCK_DRIFT <- 0 + +\* old-style type annotations in apalache +a <: b == a + +\* [LCAI-NONVALID-OUTPUT.1::TLA.1] +ViolatesValidity(header1, header2) == + \/ header1.VS /= header2.VS + \/ header1.NextVS /= header2.NextVS + \/ header1.height /= header2.height + \/ header1.time /= header2.time + (* The English specification also checks the fields that we do not have + at this level of abstraction: + - header1.ConsensusHash != header2.ConsensusHash or + - header1.AppHash != header2.AppHash or + - header1.LastResultsHash header2 != ev.LastResultsHash + *) + +Init == + /\ state := "init" + \* Pick an arbitrary blockchain from 1 to COMMON_HEIGHT + 1. + /\ BC!InitToHeight(FAULTY_RATIO) \* initializes blockchain, Faulty, and refClock + /\ attackers := {} <: {STRING} \* attackers are unknown + \* Receive an arbitrary evidence. + \* Instantiate the light block fields one by one, + \* to avoid combinatorial explosion of records. + /\ \E time \in Int: + \E VS, NextVS, lastCommit, Commits \in SUBSET AllNodes: + LET conflicting == + [ Commits |-> Commits, + header |-> + [height |-> CONFLICT_HEIGHT, + time |-> time, + VS |-> VS, + NextVS |-> NextVS, + lastCommit |-> lastCommit] ] + IN + LET refBlock == [ header |-> blockchain[COMMON_HEIGHT], + Commits |-> blockchain[COMMON_HEIGHT + 1].lastCommit ] + IN + /\ "SUCCESS" = LC!ValidAndVerifiedUntimed(refBlock, conflicting) + \* More than third of next validators in the common reference block + \* is faulty. That is a precondition for a fork. + /\ 3 * Cardinality(Faulty \intersect refBlock.header.NextVS) + > Cardinality(refBlock.header.NextVS) + \* correct validators cannot sign an invalid block + /\ ViolatesValidity(conflicting.header, refBlock.header) + => conflicting.Commits \subseteq Faulty + /\ conflictingBlock := conflicting + + +\* This is a specification of isolateMisbehavingProcesses. +\* +\* [LCAI-FUNC-MAIN.1::TLA.1] +Next == + /\ state = "init" + \* Extract the rounds from the reference block and the conflicting block. + \* In this specification, we just pick rounds non-deterministically. + \* The English specification calls RoundOf on the blocks. + /\ \E referenceRound, evidenceRound \in Int: + /\ referenceRound >= 0 /\ evidenceRound >= 0 + /\ LET reference == blockchain[CONFLICT_HEIGHT] + referenceCommit == blockchain[CONFLICT_HEIGHT + 1].lastCommit + evidenceHeader == conflictingBlock.header + evidenceCommit == conflictingBlock.Commits + IN + IF ViolatesValidity(reference, evidenceHeader) + THEN /\ attackers' := blockchain[COMMON_HEIGHT].NextVS \intersect evidenceCommit + /\ state' := "Lunatic" + ELSE IF referenceRound = evidenceRound + THEN /\ attackers' := referenceCommit \intersect evidenceCommit + /\ state' := "Equivocation" + ELSE + \* This property is shown in property + \* Accountability of TendermintAcc3.tla + /\ state' := "Amnesia" + /\ \E Attackers \in SUBSET (Faulty \intersect reference.VS): + /\ 3 * Cardinality(Attackers) > Cardinality(reference.VS) + /\ attackers' := Attackers + /\ blockchain' := blockchain + /\ refClock' := refClock + /\ Faulty' := Faulty + /\ conflictingBlock' := conflictingBlock + +(********************************** INVARIANTS *******************************) + +\* This invariant ensure that the attackers have +\* more than 1/3 of the voting power +\* +\* [LCAI-INV-Output.1::TLA-DETECTION-COMPLETENESS.1] +DetectionCompleteness == + state /= "init" => + 3 * Cardinality(attackers) > Cardinality(blockchain[CONFLICT_HEIGHT].VS) + +\* This invariant ensures that only the faulty validators are detected +\* +\* [LCAI-INV-Output.1::TLA-DETECTION-ACCURACY.1] +DetectionAccuracy == + attackers \subseteq Faulty + +============================================================================== diff --git a/spec/light-client/attacks/LCVerificationApi_003_draft.tla b/spec/light-client/attacks/LCVerificationApi_003_draft.tla new file mode 100644 index 000000000..909eab92b --- /dev/null +++ b/spec/light-client/attacks/LCVerificationApi_003_draft.tla @@ -0,0 +1,192 @@ +-------------------- MODULE LCVerificationApi_003_draft -------------------------- +(** + * The common interface of the light client verification and detection. + *) +EXTENDS Integers, FiniteSets + +\* the parameters of Light Client +CONSTANTS + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + CLOCK_DRIFT, + (* the assumed precision of the clock *) + REAL_CLOCK_DRIFT, + (* the actual clock drift, which under normal circumstances should not + be larger than CLOCK_DRIFT (otherwise, there will be a bug) *) + FAULTY_RATIO + (* a pair <> that limits that ratio of faulty validator in the blockchain + from above (exclusive). Tendermint security model prescribes 1 / 3. *) + +VARIABLES + localClock (* current time as measured by the light client *) + +(* the header is still within the trusting period *) +InTrustingPeriodLocal(header) == + \* note that the assumption about the drift reduces the period of trust + localClock < header.time + TRUSTING_PERIOD - CLOCK_DRIFT + +(* the header is still within the trusting period, even if the clock can go backwards *) +InTrustingPeriodLocalSurely(header) == + \* note that the assumption about the drift reduces the period of trust + localClock < header.time + TRUSTING_PERIOD - 2 * CLOCK_DRIFT + +(* ensure that the local clock does not drift far away from the global clock *) +IsLocalClockWithinDrift(local, global) == + /\ global - REAL_CLOCK_DRIFT <= local + /\ local <= global + REAL_CLOCK_DRIFT + +(** + * Check that the commits in an untrusted block form 1/3 of the next validators + * in a trusted header. + *) +SignedByOneThirdOfTrusted(trusted, untrusted) == + LET TP == Cardinality(trusted.header.NextVS) + SP == Cardinality(untrusted.Commits \intersect trusted.header.NextVS) + IN + 3 * SP > TP + +(** + The first part of the precondition of ValidAndVerified, which does not take + the current time into account. + + [LCV-FUNC-VALID.1::TLA-PRE-UNTIMED.1] + *) +ValidAndVerifiedPreUntimed(trusted, untrusted) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ thdr.height < uhdr.height + \* the trusted block has been created earlier + /\ thdr.time < uhdr.time + /\ untrusted.Commits \subseteq uhdr.VS + /\ LET TP == Cardinality(uhdr.VS) + SP == Cardinality(untrusted.Commits) + IN + 3 * SP > 2 * TP + /\ thdr.height + 1 = uhdr.height => thdr.NextVS = uhdr.VS + (* As we do not have explicit hashes we ignore these three checks of the English spec: + + 1. "trusted.Commit is a commit is for the header trusted.Header, + i.e. it contains the correct hash of the header". + 2. untrusted.Validators = hash(untrusted.Header.Validators) + 3. untrusted.NextValidators = hash(untrusted.Header.NextValidators) + *) + +(** + Check the precondition of ValidAndVerified, including the time checks. + + [LCV-FUNC-VALID.1::TLA-PRE.1] + *) +ValidAndVerifiedPre(trusted, untrusted, checkFuture) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ InTrustingPeriodLocal(thdr) + \* The untrusted block is not from the future (modulo clock drift). + \* Do the check, if it is required. + /\ checkFuture => uhdr.time < localClock + CLOCK_DRIFT + /\ ValidAndVerifiedPreUntimed(trusted, untrusted) + + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + This test does take current time into account, but only looks at the block structure. + + [LCV-FUNC-VALID.1::TLA-UNTIMED.1] + *) +ValidAndVerifiedUntimed(trusted, untrusted) == + IF ~ValidAndVerifiedPreUntimed(trusted, untrusted) + THEN "INVALID" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + + [LCV-FUNC-VALID.1::TLA.1] + *) +ValidAndVerified(trusted, untrusted, checkFuture) == + IF ~ValidAndVerifiedPre(trusted, untrusted, checkFuture) + THEN "INVALID" + ELSE IF ~InTrustingPeriodLocal(untrusted.header) + (* We leave the following test for the documentation purposes. + The implementation should do this test, as signature verification may be slow. + In the TLA+ specification, ValidAndVerified happens in no time. + *) + THEN "FAILED_TRUSTING_PERIOD" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + + +(** + The invariant of the light store that is not related to the blockchain + *) +LightStoreInv(fetchedLightBlocks, lightBlockStatus) == + \A lh, rh \in DOMAIN fetchedLightBlocks: + \* for every pair of stored headers that have been verified + \/ lh >= rh + \/ lightBlockStatus[lh] /= "StateVerified" + \/ lightBlockStatus[rh] /= "StateVerified" + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh /\ lightBlockStatus[mh] = "StateVerified" + \* or the left header is outside the trusting period, so no guarantees + \/ LET lhdr == fetchedLightBlocks[lh] + rhdr == fetchedLightBlocks[rh] + IN + \* we can verify the right one using the left one + "SUCCESS" = ValidAndVerifiedUntimed(lhdr, rhdr) + +(** + Correctness states that all the obtained headers are exactly like in the blockchain. + + It is always the case that every verified header in LightStore was generated by + an instance of Tendermint consensus. + + [LCV-DIST-SAFE.1::CORRECTNESS-INV.1] + *) +CorrectnessInv(blockchain, fetchedLightBlocks, lightBlockStatus) == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" => + fetchedLightBlocks[h].header = blockchain[h] + +(** + * When the light client terminates, there are no failed blocks. + * (Otherwise, someone lied to us.) + *) +NoFailedBlocksOnSuccessInv(fetchedLightBlocks, lightBlockStatus) == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] /= "StateFailed" + +(** + The expected post-condition of VerifyToTarget. + *) +VerifyToTargetPost(blockchain, isPeerCorrect, + fetchedLightBlocks, lightBlockStatus, + trustedHeight, targetHeight, finalState) == + LET trustedHeader == fetchedLightBlocks[trustedHeight].header IN + \* The light client is not lying us on the trusted block. + \* It is straightforward to detect. + /\ lightBlockStatus[trustedHeight] = "StateVerified" + /\ trustedHeight \in DOMAIN fetchedLightBlocks + /\ trustedHeader = blockchain[trustedHeight] + \* the invariants we have found in the light client verification + \* there is a problem with trusting period + /\ isPeerCorrect + => CorrectnessInv(blockchain, fetchedLightBlocks, lightBlockStatus) + \* a correct peer should fail the light client, + \* if the trusted block is in the trusting period + /\ isPeerCorrect /\ InTrustingPeriodLocalSurely(trustedHeader) + => finalState = "finishedSuccess" + /\ finalState = "finishedSuccess" => + /\ lightBlockStatus[targetHeight] = "StateVerified" + /\ targetHeight \in DOMAIN fetchedLightBlocks + /\ NoFailedBlocksOnSuccessInv(fetchedLightBlocks, lightBlockStatus) + /\ LightStoreInv(fetchedLightBlocks, lightBlockStatus) + + +================================================================================== diff --git a/spec/light-client/attacks/MC_5_3.tla b/spec/light-client/attacks/MC_5_3.tla new file mode 100644 index 000000000..552de49ae --- /dev/null +++ b/spec/light-client/attacks/MC_5_3.tla @@ -0,0 +1,18 @@ +------------------------- MODULE MC_5_3 ------------------------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +COMMON_HEIGHT == 1 +CONFLICT_HEIGHT == 3 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +FAULTY_RATIO == <<1, 2>> \* < 1 / 2 faulty validators + +VARIABLES + blockchain, \* the reference blockchain + refClock, \* current time in the reference blockchain + Faulty, \* the set of faulty validators + state, \* the state of the light client detector + conflictingBlock, \* an evidence that two peers reported conflicting blocks + attackers + +INSTANCE Isolation_001_draft +============================================================================ diff --git a/spec/light-client/attacks/isolate-attackers_001_draft.md b/spec/light-client/attacks/isolate-attackers_001_draft.md new file mode 100644 index 000000000..e4f585f4a --- /dev/null +++ b/spec/light-client/attacks/isolate-attackers_001_draft.md @@ -0,0 +1,221 @@ + +# Lightclient Attackers Isolation + +> Warning: This is the beginning of an unfinished draft. Don't continue reading! + +Adversarial nodes may have the incentive to lie to a lightclient about the state of a Tendermint blockchain. An attempt to do so is called attack. Light client [verification][verification] checks incoming data by checking a so-called "commit", which is a forwarded set of signed messages that is (supposedly) produced during executing Tendermint consensus. Thus, an attack boils down to creating and signing Tendermint consensus messages in deviation from the Tendermint consensus algorithm rules. + +As Tendermint consensus and light client verification is safe under the assumption of more than 2/3 of correct voting power per block [[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link], this implies that if there was an attack then [[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link] was violated, that is, there is a block such that + +- validators deviated from the protocol, and +- these validators represent more than 1/3 of the voting power in that block. + +In the case of an [attack][node-based-attack-characterization], the lightclient [attack detection mechanism][detection] computes data, so called evidence [[LC-DATA-EVIDENCE.1]][LC-DATA-EVIDENCE-link], that can be used + +- to proof that there has been attack [[TMBC-LC-EVIDENCE-DATA.1]][TMBC-LC-EVIDENCE-DATA-link] and +- as basis to find the actual nodes that deviated from the Tendermint protocol. + +This specification considers how a full node in a Tendermint blockchain can isolate a set of attackers that launched the attack. The set should satisfy + +- the set does not contain a correct validator +- the set contains validators that represent more than 1/3 of the voting power of a block that is still within the unbonding period + +# Outline + +**TODO** when preparing a version for broader review. + +# Part I - Basics + +For definitions of data structures used here, in particular LightBlocks [[LCV-DATA-LIGHTBLOCK.1]](https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-data-lightblock1), cf. [Light Client Verification][verification]. + +# Part II - Definition of the Problem + +The specification of the [detection mechanism][detection] describes + +- what is a light client attack, +- conditions under which the detector will detect a light client attack, +- and the format of the output data, called evidence, in the case an attack is detected. The format is defined in +[[LC-DATA-EVIDENCE.1]][LC-DATA-EVIDENCE-link] and looks as follows + +```go +type LightClientAttackEvidence struct { + ConflictingBlock LightBlock + CommonHeight int64 +} +``` + +The isolator is a function that gets as input evidence `ev` +and a prefix of the blockchain `bc` at least up to height `ev.ConflictingBlock.Header.Height + 1`. The output is a set of *peerIDs* of validators. + +We assume that the full node is synchronized with the blockchain and has reached the height `ev.ConflictingBlock.Header.Height + 1`. + +#### **[FN-INV-Output.1]** + +When an output is generated it satisfies the following properties: + +- If + - `bc[CommonHeight].bfttime` is within the unbonding period w.r.t. the time at the full node, + - `ev.ConflictingBlock.Header != bc[ev.ConflictingBlock.Header.Height]` + - Validators in `ev.ConflictingBlock.Commit` represent more than 1/3 of the voting power in `bc[ev.CommonHeight].NextValidators` +- Then: A set of validators in `bc[CommonHeight].NextValidators` that + - represent more than 1/3 of the voting power in `bc[ev.commonHeight].NextValidators` + - signed Tendermint consensus messages for height `ev.ConflictingBlock.Header.Height` by violating the Tendermint consensus protocol. +- Else: the empty set. + +# Part IV - Protocol + +Here we discuss how to solve the problem of isolating misbehaving processes. We describe the function `isolateMisbehavingProcesses` as well as all the helping functions below. In [Part V](#part-v---Completeness), we discuss why the solution is complete based on result from analysis with automated tools. + +## Isolation + +### Outline + +> Describe solution (in English), decomposition into functions, where communication to other components happens. + +#### **[LCAI-FUNC-MAIN.1]** + +```go +func isolateMisbehavingProcesses(ev LightClientAttackEvidence, bc Blockchain) []ValidatorAddress { + + reference := bc[ev.conflictingBlock.Header.Height].Header + ev_header := ev.conflictingBlock.Header + + ref_commit := bc[ev.conflictingBlock.Header.Height + 1].Header.LastCommit // + 1 !! + ev_commit := ev.conflictingBlock.Commit + + if violatesTMValidity(reference, ev_header) { + // lunatic light client attack + signatories := Signers(ev.ConflictingBlock.Commit) + bonded_vals := Addresses(bc[ev.CommonHeight].NextValidators) + return intersection(signatories,bonded_vals) + + } + // If this point is reached the validator sets in reference and ev_header are identical + else if RoundOf(ref_commit) == RoundOf(ev_commit) { + // equivocation light client attack + return intersection(Signers(ref_commit), Signers(ev_commit)) + } + else { + // amnesia light client attack + return IsolateAmnesiaAttacker(ev, bc) + } +} +``` + +- Implementation comment + - If the full node has only reached height `ev.conflictingBlock.Header.Height` then `bc[ev.conflictingBlock.Header.Height + 1].Header.LastCommit` refers to the locally stored commit for this height. (This commit must be present by the precondition on `length(bc)`.) + - We check in the precondition that the unbonding period is not expired. However, since time moves on, before handing the validators over Cosmos SDK, the time needs to be checked again to satisfy the contract which requires that only bonded validators are reported. This passing of validators to the SDK is out of scope of this specification. +- Expected precondition + - `length(bc) >= ev.conflictingBlock.Header.Height` + - `ValidAndVerifiedUnbonding(bc[ev.CommonHeight], ev.ConflictingBlock) == SUCCESS` + - `ev.ConflictingBlock.Header != bc[ev.ConflictingBlock.Header.Height]` + - TODO: input light blocks pass basic validation +- Expected postcondition + - [[FN-INV-Output.1]](#FN-INV-Output1) holds +- Error condition + - returns an error if precondition is violated. + +### Details of the Functions + +#### **[LCAI-FUNC-VVU.1]** + +```go +func ValidAndVerifiedUnbonding(trusted LightBlock, untrusted LightBlock) Result +``` + +- Conditions are identical to [[LCV-FUNC-VALID.2]][LCV-FUNC-VALID.link] except the precondition "*trusted.Header.Time > now - trustingPeriod*" is substituted with + - `trusted.Header.Time > now - UnbondingPeriod` + +#### **[LCAI-FUNC-NONVALID.1]** + +```go +func violatesTMValidity(ref Header, ev Header) boolean +``` + +- Implementation remarks + - checks whether the evidence header `ev` violates the validity property of Tendermint Consensus, by checking agains a reference header +- Expected precondition + - `ref.Height == ev.Height` +- Expected postcondition + - returns evaluation of the following disjunction + **[[LCAI-NONVALID-OUTPUT.1]]** == + `ref.ValidatorsHash != ev.ValidatorsHash` or + `ref.NextValidatorsHash != ev.NextValidatorsHash` or + `ref.ConsensusHash != ev.ConsensusHash` or + `ref.AppHash != ev.AppHash` or + `ref.LastResultsHash != ev.LastResultsHash` + +```go +func IsolateAmnesiaAttacker(ev LightClientAttackEvidence, bc Blockchain) []ValidatorAddress +``` + +- Implementation remarks + **TODO:** What should we do here? Refer to the accountability doc? +- Expected postcondition + **TODO:** What should we do here? Refer to the accountability doc? + +```go +func RoundOf(commit Commit) []ValidatorAddress +``` + +- Expected precondition + - `commit` is well-formed. In particular all votes are from the same round `r`. +- Expected postcondition + - returns round `r` that is encoded in all the votes of the commit + +```go +func Signers(commit Commit) []ValidatorAddress +``` + +- Expected postcondition + - returns all validator addresses in `commit` + +```go +func Addresses(vals Validator[]) ValidatorAddress[] +``` + +- Expected postcondition + - returns all validator addresses in `vals` + +# Part V - Completeness + +As discussed in the beginning of this document, an attack boils down to creating and signing Tendermint consensus messages in deviation from the Tendermint consensus algorithm rules. +The main function `isolateMisbehavingProcesses` distinguishes three kinds of wrongly signing messages, namely, + +- lunatic: signing invalid blocks +- equivocation: double-signing valid blocks in the same consensus round +- amnesia: signing conflicting blocks in different consensus rounds, without having seen a quorum of messages that would have allowed to do so. + +The question is whether this captures all attacks. +First observe that the first checking in `isolateMisbehavingProcesses` is `violatesTMValidity`. It takes care of lunatic attacks. If this check passes, that is, if `violatesTMValidity` returns `FALSE` this means that [FN-NONVALID-OUTPUT] evaluates to false, which implies that `ref.ValidatorsHash = ev.ValidatorsHash`. Hence after `violatesTMValidity`, all the involved validators are the ones from the blockchain. It is thus sufficient to analyze one instance of Tendermint consensus with a fixed group membership (set of validators). Also it is sufficient to consider two different valid consensus values, that is, binary consensus. + +**TODO** we have analyzed Tendermint consensus with TLA+ and have accompanied Galois in an independent study of the protocol based on [Ivy proofs](https://github.com/tendermint/spec/tree/master/ivy-proofs). + +# References + +[[supervisor]] The specification of the light client supervisor. + +[[verification]] The specification of the light client verification protocol + +[[detection]] The specification of the light client attack detection mechanism. + +[supervisor]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/supervisor/supervisor_001_draft.md + +[verification]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md + +[detection]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md + +[LC-DATA-EVIDENCE-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md#lc-data-evidence1 + +[TMBC-LC-EVIDENCE-DATA-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md#tmbc-lc-evidence-data1 + +[node-based-attack-characterization]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md#node-based-characterization-of-attacks + +[TMBC-FM-2THIRDS-link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#tmbc-fm-2thirds1 + +[LCV-FUNC-VALID.link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-func-valid2 diff --git a/spec/light-client/attacks/isolate-attackers_002_reviewed.md b/spec/light-client/attacks/isolate-attackers_002_reviewed.md new file mode 100644 index 000000000..febcc10a8 --- /dev/null +++ b/spec/light-client/attacks/isolate-attackers_002_reviewed.md @@ -0,0 +1,223 @@ +# Lightclient Attackers Isolation + +Adversarial nodes may have the incentive to lie to a lightclient about the state of a Tendermint blockchain. An attempt to do so is called attack. Light client [verification][verification] checks incoming data by checking a so-called "commit", which is a forwarded set of signed messages that is (supposedly) produced during executing Tendermint consensus. Thus, an attack boils down to creating and signing Tendermint consensus messages in deviation from the Tendermint consensus algorithm rules. + +As Tendermint consensus and light client verification is safe under the assumption of more than 2/3 of correct voting power per block [[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link], this implies that if there was an attack then [[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link] was violated, that is, there is a block such that + +- validators deviated from the protocol, and +- these validators represent more than 1/3 of the voting power in that block. + +In the case of an [attack][node-based-attack-characterization], the lightclient [attack detection mechanism][detection] computes data, so called evidence [[LC-DATA-EVIDENCE.1]][LC-DATA-EVIDENCE-link], that can be used + +- to proof that there has been attack [[TMBC-LC-EVIDENCE-DATA.1]][TMBC-LC-EVIDENCE-DATA-link] and +- as basis to find the actual nodes that deviated from the Tendermint protocol. + +This specification considers how a full node in a Tendermint blockchain can isolate a set of attackers that launched the attack. The set should satisfy + +- the set does not contain a correct validator +- the set contains validators that represent more than 1/3 of the voting power of a block that is still within the unbonding period + +# Outline + +After providing the [problem statement](#Part-I---Basics-and-Definition-of-the-Problem), we specify the [isolator function](#Part-II---Protocol) and close with the discussion about its [correctness](#Part-III---Completeness) which is based on computer-aided analysis of Tendermint Consensus. + +# Part I - Basics and Definition of the Problem + +For definitions of data structures used here, in particular LightBlocks [[LCV-DATA-LIGHTBLOCK.1]](https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-data-lightblock1), we refer to the specification of [Light Client Verification][verification]. + +The specification of the [detection mechanism][detection] describes + +- what is a light client attack, +- conditions under which the detector will detect a light client attack, +- and the format of the output data, called evidence, in the case an attack is detected. The format is defined in +[[LC-DATA-EVIDENCE.1]][LC-DATA-EVIDENCE-link] and looks as follows + +```go +type LightClientAttackEvidence struct { + ConflictingBlock LightBlock + CommonHeight int64 +} +``` + +The isolator is a function that gets as input evidence `ev` +and a prefix of the blockchain `bc` at least up to height `ev.ConflictingBlock.Header.Height + 1`. The output is a set of *peerIDs* of validators. + +We assume that the full node is synchronized with the blockchain and has reached the height `ev.ConflictingBlock.Header.Height + 1`. + +#### **[LCAI-INV-Output.1]** + +When an output is generated it satisfies the following properties: + +- If + - `bc[CommonHeight].bfttime` is within the unbonding period w.r.t. the time at the full node, + - `ev.ConflictingBlock.Header != bc[ev.ConflictingBlock.Header.Height]` + - Validators in `ev.ConflictingBlock.Commit` represent more than 1/3 of the voting power in `bc[ev.CommonHeight].NextValidators` +- Then: The output is a set of validators in `bc[CommonHeight].NextValidators` that + - represent more than 1/3 of the voting power in `bc[ev.commonHeight].NextValidators` + - signed Tendermint consensus messages for height `ev.ConflictingBlock.Header.Height` by violating the Tendermint consensus protocol. +- Else: the empty set. + +# Part II - Protocol + +Here we discuss how to solve the problem of isolating misbehaving processes. We describe the function `isolateMisbehavingProcesses` as well as all the helping functions below. In [Part III](#part-III---Completeness), we discuss why the solution is complete based on result from analysis with automated tools. + +## Isolation + +### Outline + +We first check whether the conflicting block can indeed be verified from the common height. We then first check whether it was a lunatic attack (violating validity). If this is not the case, we check for equivocation. If this also is not the case, we start the on-chain [accountability protocol](https://docs.google.com/document/d/11ZhMsCj3y7zIZz4udO9l25xqb0kl7gmWqNpGVRzOeyY/edit). + +#### **[LCAI-FUNC-MAIN.1]** + +```go +func isolateMisbehavingProcesses(ev LightClientAttackEvidence, bc Blockchain) []ValidatorAddress { + + reference := bc[ev.conflictingBlock.Header.Height].Header + ev_header := ev.conflictingBlock.Header + + ref_commit := bc[ev.conflictingBlock.Header.Height + 1].Header.LastCommit // + 1 !! + ev_commit := ev.conflictingBlock.Commit + + if violatesTMValidity(reference, ev_header) { + // lunatic light client attack + signatories := Signers(ev.ConflictingBlock.Commit) + bonded_vals := Addresses(bc[ev.CommonHeight].NextValidators) + return intersection(signatories,bonded_vals) + + } + // If this point is reached the validator sets in reference and ev_header are identical + else if RoundOf(ref_commit) == RoundOf(ev_commit) { + // equivocation light client attack + return intersection(Signers(ref_commit), Signers(ev_commit)) + } + else { + // amnesia light client attack + return IsolateAmnesiaAttacker(ev, bc) + } +} +``` + +- Implementation comment + - If the full node has only reached height `ev.conflictingBlock.Header.Height` then `bc[ev.conflictingBlock.Header.Height + 1].Header.LastCommit` refers to the locally stored commit for this height. (This commit must be present by the precondition on `length(bc)`.) + - We check in the precondition that the unbonding period is not expired. However, since time moves on, before handing the validators over Cosmos SDK, the time needs to be checked again to satisfy the contract which requires that only bonded validators are reported. This passing of validators to the SDK is out of scope of this specification. +- Expected precondition + - `length(bc) >= ev.conflictingBlock.Header.Height` + - `ValidAndVerifiedUnbonding(bc[ev.CommonHeight], ev.ConflictingBlock) == SUCCESS` + - `ev.ConflictingBlock.Header != bc[ev.ConflictingBlock.Header.Height]` + - `ev.conflictingBlock` satisfies basic validation (in particular all signed messages in the Commit are from the same round) +- Expected postcondition + - [[FN-INV-Output.1]](#FN-INV-Output1) holds +- Error condition + - returns an error if precondition is violated. + +### Details of the Functions + +#### **[LCAI-FUNC-VVU.1]** + +```go +func ValidAndVerifiedUnbonding(trusted LightBlock, untrusted LightBlock) Result +``` + +- Conditions are identical to [[LCV-FUNC-VALID.2]][LCV-FUNC-VALID.link] except the precondition "*trusted.Header.Time > now - trustingPeriod*" is substituted with + - `trusted.Header.Time > now - UnbondingPeriod` + +#### **[LCAI-FUNC-NONVALID.1]** + +```go +func violatesTMValidity(ref Header, ev Header) boolean +``` + +- Implementation remarks + - checks whether the evidence header `ev` violates the validity property of Tendermint Consensus, by checking against a reference header +- Expected precondition + - `ref.Height == ev.Height` +- Expected postcondition + - returns evaluation of the following disjunction + **[LCAI-NONVALID-OUTPUT.1]** == + `ref.ValidatorsHash != ev.ValidatorsHash` or + `ref.NextValidatorsHash != ev.NextValidatorsHash` or + `ref.ConsensusHash != ev.ConsensusHash` or + `ref.AppHash != ev.AppHash` or + `ref.LastResultsHash != ev.LastResultsHash` + +```go +func IsolateAmnesiaAttacker(ev LightClientAttackEvidence, bc Blockchain) []ValidatorAddress +``` + +- Implementation remarks + - This triggers the [query/response protocol](https://docs.google.com/document/d/11ZhMsCj3y7zIZz4udO9l25xqb0kl7gmWqNpGVRzOeyY/edit). +- Expected postcondition + - returns attackers according to [LCAI-INV-Output.1]. + +```go +func RoundOf(commit Commit) []ValidatorAddress +``` + +- Expected precondition + - `commit` is well-formed. In particular all votes are from the same round `r`. +- Expected postcondition + - returns round `r` that is encoded in all the votes of the commit +- Error condition + - reports error if precondition is violated + +```go +func Signers(commit Commit) []ValidatorAddress +``` + +- Expected postcondition + - returns all validator addresses in `commit` + +```go +func Addresses(vals Validator[]) ValidatorAddress[] +``` + +- Expected postcondition + - returns all validator addresses in `vals` + +# Part III - Completeness + +As discussed in the beginning of this document, an attack boils down to creating and signing Tendermint consensus messages in deviation from the Tendermint consensus algorithm rules. +The main function `isolateMisbehavingProcesses` distinguishes three kinds of wrongly signed messages, namely, + +- lunatic: signing invalid blocks +- equivocation: double-signing valid blocks in the same consensus round +- amnesia: signing conflicting blocks in different consensus rounds, without having seen a quorum of messages that would have allowed to do so. + +The question is whether this captures all attacks. +First observe that the first check in `isolateMisbehavingProcesses` is `violatesTMValidity`. It takes care of lunatic attacks. If this check passes, that is, if `violatesTMValidity` returns `FALSE` this means that [[LCAI-NONVALID-OUTPUT.1]](#LCAI-FUNC-NONVALID1]) evaluates to false, which implies that `ref.ValidatorsHash = ev.ValidatorsHash`. Hence, after `violatesTMValidity`, all the involved validators are the ones from the blockchain. It is thus sufficient to analyze one instance of Tendermint consensus with a fixed group membership (set of validators). Also, as we have two different blocks for the same height, it is sufficient to consider two different valid consensus values, that is, binary consensus. + +For this fixed group membership, we have analyzed the attacks using the TLA+ specification of [Tendermint Consensus in TLA+][tendermint-accountability]. We checked that indeed the only possible scenarios that can lead to violation of agreement are **equivocation** and **amnesia**. An independent study by Galois of the protocol based on [Ivy proofs](https://github.com/tendermint/spec/tree/master/ivy-proofs) led to the same conclusion. + +# References + +[[supervisor]] The specification of the light client supervisor. + +[[verification]] The specification of the light client verification protocol. + +[[detection]] The specification of the light client attack detection mechanism. + +[[tendermint-accountability]]: TLA+ specification to check the types of attacks + +[tendermint-accountability]: +https://github.com/tendermint/spec/blob/master/rust-spec/tendermint-accountability/README.md + +[supervisor]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/supervisor/supervisor_001_draft.md + +[verification]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md + +[detection]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md + +[LC-DATA-EVIDENCE-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md#lc-data-evidence1 + +[TMBC-LC-EVIDENCE-DATA-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md#tmbc-lc-evidence-data1 + +[node-based-attack-characterization]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md#node-based-characterization-of-attacks + +[TMBC-FM-2THIRDS-link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#tmbc-fm-2thirds1 + +[LCV-FUNC-VALID.link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-func-valid2 diff --git a/spec/light-client/attacks/notes-on-evidence-handling.md b/spec/light-client/attacks/notes-on-evidence-handling.md new file mode 100644 index 000000000..4b7d81919 --- /dev/null +++ b/spec/light-client/attacks/notes-on-evidence-handling.md @@ -0,0 +1,219 @@ + +# Light client attacks + +We define a light client attack as detection of conflicting headers for a given height that can be verified +starting from the trusted light block. A light client attack is defined in the context of interactions of +light client with two peers. One of the peers (called primary) defines a trace of verified light blocks +(primary trace) that are being checked against trace of the other peer (called witness) that we call +witness trace. + +A light client attack is defined by the primary and witness traces +that have a common root (the same trusted light block for a common height) but forms +conflicting branches (end of traces is for the same height but with different headers). +Note that conflicting branches could be arbitrarily big as branches continue to diverge after +a bifurcation point. We propose an approach that allows us to define a valid light client attack +only with a common light block and a single conflicting light block. We rely on the fact that +we assume that the primary is under suspicion (therefore not trusted) and that the witness plays +support role to detect and process an attack (therefore trusted). Therefore, once a light client +detects an attack, it needs to send to a witness only missing data (common height +and conflicting light block) as it has its trace. Keeping light client attack data of constant size +saves bandwidth and reduces an attack surface. As we will explain below, although in the context of +light client core +[verification](https://github.com/informalsystems/tendermint-rs/tree/master/docs/spec/lightclient/verification) +the roles of primary and witness are clearly defined, +in case of the attack, we run the same attack detection procedure twice where the roles are swapped. +The rationale is that the light client does not know what peer is correct (on a right main branch) +so it tries to create and submit an attack evidence to both peers. + +Light client attack evidence consists of a conflicting light block and a common height. + +```go +type LightClientAttackEvidence struct { + ConflictingBlock LightBlock + CommonHeight int64 +} +``` + +Full node can validate a light client attack evidence by executing the following procedure: + +```go +func IsValid(lcaEvidence LightClientAttackEvidence, bc Blockchain) boolean { + commonBlock = GetLightBlock(bc, lcaEvidence.CommonHeight) + if commonBlock == nil return false + + // Note that trustingPeriod in ValidAndVerified is set to UNBONDING_PERIOD + verdict = ValidAndVerified(commonBlock, lcaEvidence.ConflictingBlock) + conflictingHeight = lcaEvidence.ConflictingBlock.Header.Height + + return verdict == OK and bc[conflictingHeight].Header != lcaEvidence.ConflictingBlock.Header +} +``` + +## Light client attack creation + +Given a trusted light block `trusted`, a light node executes the bisection algorithm to verify header +`untrusted` at some height `h`. If the bisection algorithm succeeds, then the header `untrusted` is verified. +Headers that are downloaded as part of the bisection algorithm are stored in a store and they are also in +the verified state. Therefore, after the bisection algorithm successfully terminates we have a trace of +the light blocks ([] LightBlock) we obtained from the primary that we call primary trace. + +### Primary trace + +The following invariant holds for the primary trace: + +- Given a `trusted` light block, target height `h`, and `primary_trace` ([] LightBlock): + *primary_trace[0] == trusted* and *primary_trace[len(primary_trace)-1].Height == h* and + successive light blocks are passing light client verification logic. + +### Witness with a conflicting header + +The verified header at height `h` is cross-checked with every witness as part of +[detection](https://github.com/informalsystems/tendermint-rs/tree/master/docs/spec/lightclient/detection). +If a witness returns the conflicting header at the height `h` the following procedure is executed to verify +if the conflicting header comes from the valid trace and if that's the case to create an attack evidence: + +#### Helper functions + +We assume the following helper functions: + +```go +// Returns trace of verified light blocks starting from rootHeight and ending with targetHeight. +Trace(lightStore LightStore, rootHeight int64, targetHeight int64) LightBlock[] + +// Returns validator set for the given height +GetValidators(bc Blockchain, height int64) Validator[] + +// Returns validator set for the given height +GetValidators(bc Blockchain, height int64) Validator[] + +// Return validator addresses for the given validators +GetAddresses(vals Validator[]) ValidatorAddress[] +``` + +```go +func DetectLightClientAttacks(primary PeerID, + primary_trace []LightBlock, + witness PeerID) (LightClientAttackEvidence, LightClientAttackEvidence) { + primary_lca_evidence, witness_trace = DetectLightClientAttack(primary_trace, witness) + + witness_lca_evidence = nil + if witness_trace != nil { + witness_lca_evidence, _ = DetectLightClientAttack(witness_trace, primary) + } + return primary_lca_evidence, witness_lca_evidence +} + +func DetectLightClientAttack(trace []LightBlock, peer PeerID) (LightClientAttackEvidence, []LightBlock) { + + lightStore = new LightStore().Update(trace[0], StateTrusted) + + for i in 1..len(trace)-1 { + lightStore, result = VerifyToTarget(peer, lightStore, trace[i].Header.Height) + + if result == ResultFailure then return (nil, nil) + + current = lightStore.Get(trace[i].Header.Height) + + // if obtained header is the same as in the trace we continue with a next height + if current.Header == trace[i].Header continue + + // we have identified a conflicting header + commonBlock = trace[i-1] + conflictingBlock = trace[i] + + return (LightClientAttackEvidence { conflictingBlock, commonBlock.Header.Height }, + Trace(lightStore, trace[i-1].Header.Height, trace[i].Header.Height)) + } + return (nil, nil) +} +``` + +## Evidence handling + +As part of on chain evidence handling, full nodes identifies misbehaving processes and informs +the application, so they can be slashed. Note that only bonded validators should +be reported to the application. There are three types of attacks that can be executed against +Tendermint light client: + +- lunatic attack +- equivocation attack and +- amnesia attack. + +We now specify the evidence handling logic. + +```go +func detectMisbehavingProcesses(lcAttackEvidence LightClientAttackEvidence, bc Blockchain) []ValidatorAddress { + assume IsValid(lcaEvidence, bc) + + // lunatic light client attack + if !isValidBlock(current.Header, conflictingBlock.Header) { + conflictingCommit = lcAttackEvidence.ConflictingBlock.Commit + bondedValidators = GetNextValidators(bc, lcAttackEvidence.CommonHeight) + + return getSigners(conflictingCommit) intersection GetAddresses(bondedValidators) + + // equivocation light client attack + } else if current.Header.Round == conflictingBlock.Header.Round { + conflictingCommit = lcAttackEvidence.ConflictingBlock.Commit + trustedCommit = bc[conflictingBlock.Header.Height+1].LastCommit + + return getSigners(trustedCommit) intersection getSigners(conflictingCommit) + + // amnesia light client attack + } else { + HandleAmnesiaAttackEvidence(lcAttackEvidence, bc) + } +} + +// Block validity in this context is defined by the trusted header. +func isValidBlock(trusted Header, conflicting Header) boolean { + return trusted.ValidatorsHash == conflicting.ValidatorsHash and + trusted.NextValidatorsHash == conflicting.NextValidatorsHash and + trusted.ConsensusHash == conflicting.ConsensusHash and + trusted.AppHash == conflicting.AppHash and + trusted.LastResultsHash == conflicting.LastResultsHash +} + +func getSigners(commit Commit) []ValidatorAddress { + signers = []ValidatorAddress + for (i, commitSig) in commit.Signatures { + if commitSig.BlockIDFlag == BlockIDFlagCommit { + signers.append(commitSig.ValidatorAddress) + } + } + return signers +} +``` + +Note that amnesia attack evidence handling involves more complex processing, i.e., cannot be +defined simply on amnesia attack evidence. We explain in the following section a protocol +for handling amnesia attack evidence. + +### Amnesia attack evidence handling + +Detecting faulty processes in case of the amnesia attack is more complex and cannot be inferred +purely based on attack evidence data. In this case, in order to detect misbehaving processes we need +access to votes processes sent/received during the conflicting height. Therefore, amnesia handling assumes that +validators persist all votes received and sent during multi-round heights (as amnesia attack +is only possible in heights that executes over multiple rounds, i.e., commit round > 0). + +To simplify description of the algorithm we assume existence of the trusted oracle called monitor that will +drive the algorithm and output faulty processes at the end. Monitor can be implemented in a +distributed setting as on-chain module. The algorithm works as follows: + 1) Monitor sends votesets request to validators of the conflicting height. Validators + are expected to send their votesets within predefined timeout. + 2) Upon receiving votesets request, validators send their votesets to a monitor. + 2) Validators which have not sent its votesets within timeout are considered faulty. + 3) The preprocessing of the votesets is done. That means that the received votesets are analyzed + and each vote (valid) sent by process p is added to the voteset of the sender p. This phase ensures that + votes sent by faulty processes observed by at least one correct validator cannot be excluded from the analysis. + 4) Votesets of every validator are analyzed independently to decide whether the validator is correct or faulty. + A faulty validators is the one where at least one of those invalid transitions is found: + - More than one PREVOTE message is sent in a round + - More than one PRECOMMIT message is sent in a round + - PRECOMMIT message is sent without receiving +2/3 of voting-power equivalent + appropriate PREVOTE messages + - PREVOTE message is sent for the value V’ in round r’ and the PRECOMMIT message had + been sent for the value V in round r by the same process (r’ > r) and there are no + +2/3 of voting-power equivalent PREVOTE(vr, V’) messages (vr ≥ 0 and vr > r and vr < r’) + as the justification for sending PREVOTE(r’, V’) diff --git a/spec/light-client/detection/004bmc-apalache-ok.csv b/spec/light-client/detection/004bmc-apalache-ok.csv new file mode 100644 index 000000000..bf4f53ea2 --- /dev/null +++ b/spec/light-client/detection/004bmc-apalache-ok.csv @@ -0,0 +1,10 @@ +no;filename;tool;timeout;init;inv;next;args +1;LCD_MC3_3_faulty.tla;apalache;1h;;CommonHeightOnEvidenceInv;;--length=10 +2;LCD_MC3_3_faulty.tla;apalache;1h;;AccuracyInv;;--length=10 +3;LCD_MC3_3_faulty.tla;apalache;1h;;PrecisionInvLocal;;--length=10 +4;LCD_MC3_4_faulty.tla;apalache;1h;;CommonHeightOnEvidenceInv;;--length=10 +5;LCD_MC3_4_faulty.tla;apalache;1h;;AccuracyInv;;--length=10 +6;LCD_MC3_4_faulty.tla;apalache;1h;;PrecisionInvLocal;;--length=10 +7;LCD_MC4_4_faulty.tla;apalache;1h;;CommonHeightOnEvidenceInv;;--length=10 +8;LCD_MC4_4_faulty.tla;apalache;1h;;AccuracyInv;;--length=10 +9;LCD_MC4_4_faulty.tla;apalache;1h;;PrecisionInvLocal;;--length=10 diff --git a/spec/light-client/detection/005bmc-apalache-error.csv b/spec/light-client/detection/005bmc-apalache-error.csv new file mode 100644 index 000000000..1b9dd05ca --- /dev/null +++ b/spec/light-client/detection/005bmc-apalache-error.csv @@ -0,0 +1,4 @@ +no;filename;tool;timeout;init;inv;next;args +1;LCD_MC3_3_faulty.tla;apalache;1h;;PrecisionInvGrayZone;;--length=10 +2;LCD_MC3_4_faulty.tla;apalache;1h;;PrecisionInvGrayZone;;--length=10 +3;LCD_MC4_4_faulty.tla;apalache;1h;;PrecisionInvGrayZone;;--length=10 diff --git a/spec/light-client/detection/Blockchain_003_draft.tla b/spec/light-client/detection/Blockchain_003_draft.tla new file mode 100644 index 000000000..2b37c1b18 --- /dev/null +++ b/spec/light-client/detection/Blockchain_003_draft.tla @@ -0,0 +1,164 @@ +------------------------ MODULE Blockchain_003_draft ----------------------------- +(* + This is a high-level specification of Tendermint blockchain + that is designed specifically for the light client. + Validators have the voting power of one. If you like to model various + voting powers, introduce multiple copies of the same validator + (do not forget to give them unique names though). + *) +EXTENDS Integers, FiniteSets + +Min(a, b) == IF a < b THEN a ELSE b + +CONSTANT + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + ULTIMATE_HEIGHT, + (* a maximal height that can be ever reached (modelling artifact) *) + TRUSTING_PERIOD + (* the period within which the validators are trusted *) + +Heights == 1..ULTIMATE_HEIGHT (* possible heights *) + +(* A commit is just a set of nodes who have committed the block *) +Commits == SUBSET AllNodes + +(* The set of all block headers that can be on the blockchain. + This is a simplified version of the Block data structure in the actual implementation. *) +BlockHeaders == [ + height: Heights, + \* the block height + time: Int, + \* the block timestamp in some integer units + lastCommit: Commits, + \* the nodes who have voted on the previous block, the set itself instead of a hash + (* in the implementation, only the hashes of V and NextV are stored in a block, + as V and NextV are stored in the application state *) + VS: SUBSET AllNodes, + \* the validators of this bloc. We store the validators instead of the hash. + NextVS: SUBSET AllNodes + \* the validators of the next block. We store the next validators instead of the hash. +] + +(* A signed header is just a header together with a set of commits *) +LightBlocks == [header: BlockHeaders, Commits: Commits] + +VARIABLES + refClock, + (* the current global time in integer units as perceived by the reference chain *) + blockchain, + (* A sequence of BlockHeaders, which gives us a bird view of the blockchain. *) + Faulty + (* A set of faulty nodes, which can act as validators. We assume that the set + of faulty processes is non-decreasing. If a process has recovered, it should + connect using a different id. *) + +(* all variables, to be used with UNCHANGED *) +vars == <> + +(* The set of all correct nodes in a state *) +Corr == AllNodes \ Faulty + +(* APALACHE annotations *) +a <: b == a \* type annotation + +NT == STRING +NodeSet(S) == S <: {NT} +EmptyNodeSet == NodeSet({}) + +BT == [height |-> Int, time |-> Int, lastCommit |-> {NT}, VS |-> {NT}, NextVS |-> {NT}] + +LBT == [header |-> BT, Commits |-> {NT}] +(* end of APALACHE annotations *) + +(****************************** BLOCKCHAIN ************************************) + +(* the header is still within the trusting period *) +InTrustingPeriod(header) == + refClock < header.time + TRUSTING_PERIOD + +(* + Given a function pVotingPower \in D -> Powers for some D \subseteq AllNodes + and pNodes \subseteq D, test whether the set pNodes \subseteq AllNodes has + more than 2/3 of voting power among the nodes in D. + *) +TwoThirds(pVS, pNodes) == + LET TP == Cardinality(pVS) + SP == Cardinality(pVS \intersect pNodes) + IN + 3 * SP > 2 * TP \* when thinking in real numbers, not integers: SP > 2.0 / 3.0 * TP + +(* + Given a set of FaultyNodes, test whether the voting power of the correct nodes in D + is more than 2/3 of the voting power of the faulty nodes in D. + + Parameters: + - pFaultyNodes is a set of nodes that are considered faulty + - pVS is a set of all validators, maybe including Faulty, intersecting with it, etc. + - pMaxFaultRatio is a pair <> that limits the ratio a / b of the faulty + validators from above (exclusive) + *) +FaultyValidatorsFewerThan(pFaultyNodes, pVS, maxRatio) == + LET FN == pFaultyNodes \intersect pVS \* faulty nodes in pNodes + CN == pVS \ pFaultyNodes \* correct nodes in pNodes + CP == Cardinality(CN) \* power of the correct nodes + FP == Cardinality(FN) \* power of the faulty nodes + IN + \* CP + FP = TP is the total voting power + LET TP == CP + FP IN + FP * maxRatio[2] < TP * maxRatio[1] + +(* Can a block be produced by a correct peer, or an authenticated Byzantine peer *) +IsLightBlockAllowedByDigitalSignatures(ht, block) == + \/ block.header = blockchain[ht] \* signed by correct and faulty (maybe) + \/ /\ block.Commits \subseteq Faulty + /\ block.header.height = ht + /\ block.header.time >= 0 \* signed only by faulty + +(* + Initialize the blockchain to the ultimate height right in the initial states. + We pick the faulty validators statically, but that should not affect the light client. + + Parameters: + - pMaxFaultyRatioExclusive is a pair <> that bound the number of + faulty validators in each block by the ratio a / b (exclusive) + *) +InitToHeight(pMaxFaultyRatioExclusive) == + /\ Faulty \in SUBSET AllNodes \* some nodes may fail + \* pick the validator sets and last commits + /\ \E vs, lastCommit \in [Heights -> SUBSET AllNodes]: + \E timestamp \in [Heights -> Int]: + \* refClock is at least as early as the timestamp in the last block + /\ \E tm \in Int: refClock = tm /\ tm >= timestamp[ULTIMATE_HEIGHT] + \* the genesis starts on day 1 + /\ timestamp[1] = 1 + /\ vs[1] = AllNodes + /\ lastCommit[1] = EmptyNodeSet + /\ \A h \in Heights \ {1}: + /\ lastCommit[h] \subseteq vs[h - 1] \* the non-validators cannot commit + /\ TwoThirds(vs[h - 1], lastCommit[h]) \* the commit has >2/3 of validator votes + \* the faulty validators have the power below the threshold + /\ FaultyValidatorsFewerThan(Faulty, vs[h], pMaxFaultyRatioExclusive) + /\ timestamp[h] > timestamp[h - 1] \* the time grows monotonically + /\ timestamp[h] < timestamp[h - 1] + TRUSTING_PERIOD \* but not too fast + \* form the block chain out of validator sets and commits (this makes apalache faster) + /\ blockchain = [h \in Heights |-> + [height |-> h, + time |-> timestamp[h], + VS |-> vs[h], + NextVS |-> IF h < ULTIMATE_HEIGHT THEN vs[h + 1] ELSE AllNodes, + lastCommit |-> lastCommit[h]] + ] \****** + +(********************* BLOCKCHAIN ACTIONS ********************************) +(* + Advance the clock by zero or more time units. + *) +AdvanceTime == + /\ \E tm \in Int: tm >= refClock /\ refClock' = tm + /\ UNCHANGED <> + +============================================================================= +\* Modification History +\* Last modified Wed Jun 10 14:10:54 CEST 2020 by igor +\* Created Fri Oct 11 15:45:11 CEST 2019 by igor diff --git a/spec/light-client/detection/LCD_MC3_3_faulty.tla b/spec/light-client/detection/LCD_MC3_3_faulty.tla new file mode 100644 index 000000000..cef1df4d3 --- /dev/null +++ b/spec/light-client/detection/LCD_MC3_3_faulty.tla @@ -0,0 +1,27 @@ +------------------------- MODULE LCD_MC3_3_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 3 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +IS_SECONDARY_CORRECT == TRUE +FAULTY_RATIO == <<2, 3>> \* < 1 / 3 faulty validators + +VARIABLES + blockchain, (* the reference blockchain *) + localClock, (* current time in the light client *) + refClock, (* current time in the reference blockchain *) + Faulty, (* the set of faulty validators *) + state, (* the state of the light client detector *) + fetchedLightBlocks1, (* a function from heights to LightBlocks *) + fetchedLightBlocks2, (* a function from heights to LightBlocks *) + fetchedLightBlocks1b, (* a function from heights to LightBlocks *) + commonHeight, (* the height that is trusted in CreateEvidenceForPeer *) + nextHeightToTry, (* the index in CreateEvidenceForPeer *) + evidences + +INSTANCE LCDetector_003_draft +============================================================================ diff --git a/spec/light-client/detection/LCD_MC3_4_faulty.tla b/spec/light-client/detection/LCD_MC3_4_faulty.tla new file mode 100644 index 000000000..06bcdee13 --- /dev/null +++ b/spec/light-client/detection/LCD_MC3_4_faulty.tla @@ -0,0 +1,27 @@ +------------------------- MODULE LCD_MC3_4_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +IS_SECONDARY_CORRECT == TRUE +FAULTY_RATIO == <<2, 3>> \* < 1 / 3 faulty validators + +VARIABLES + blockchain, (* the reference blockchain *) + localClock, (* current time in the light client *) + refClock, (* current time in the reference blockchain *) + Faulty, (* the set of faulty validators *) + state, (* the state of the light client detector *) + fetchedLightBlocks1, (* a function from heights to LightBlocks *) + fetchedLightBlocks2, (* a function from heights to LightBlocks *) + fetchedLightBlocks1b, (* a function from heights to LightBlocks *) + commonHeight, (* the height that is trusted in CreateEvidenceForPeer *) + nextHeightToTry, (* the index in CreateEvidenceForPeer *) + evidences + +INSTANCE LCDetector_003_draft +============================================================================ diff --git a/spec/light-client/detection/LCD_MC4_4_faulty.tla b/spec/light-client/detection/LCD_MC4_4_faulty.tla new file mode 100644 index 000000000..fdb97d961 --- /dev/null +++ b/spec/light-client/detection/LCD_MC4_4_faulty.tla @@ -0,0 +1,27 @@ +------------------------- MODULE LCD_MC4_4_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +IS_SECONDARY_CORRECT == TRUE +FAULTY_RATIO == <<2, 3>> \* < 2 / 3 faulty validators + +VARIABLES + blockchain, (* the reference blockchain *) + localClock, (* current time in the light client *) + refClock, (* current time in the reference blockchain *) + Faulty, (* the set of faulty validators *) + state, (* the state of the light client detector *) + fetchedLightBlocks1, (* a function from heights to LightBlocks *) + fetchedLightBlocks2, (* a function from heights to LightBlocks *) + fetchedLightBlocks1b, (* a function from heights to LightBlocks *) + commonHeight, (* the height that is trusted in CreateEvidenceForPeer *) + nextHeightToTry, (* the index in CreateEvidenceForPeer *) + evidences + +INSTANCE LCDetector_003_draft +============================================================================ diff --git a/spec/light-client/detection/LCD_MC5_5_faulty.tla b/spec/light-client/detection/LCD_MC5_5_faulty.tla new file mode 100644 index 000000000..fdbd87b8b --- /dev/null +++ b/spec/light-client/detection/LCD_MC5_5_faulty.tla @@ -0,0 +1,27 @@ +------------------------- MODULE LCD_MC5_5_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +IS_SECONDARY_CORRECT == TRUE +FAULTY_RATIO == <<2, 3>> \* < 1 / 3 faulty validators + +VARIABLES + blockchain, (* the reference blockchain *) + localClock, (* current time in the light client *) + refClock, (* current time in the reference blockchain *) + Faulty, (* the set of faulty validators *) + state, (* the state of the light client detector *) + fetchedLightBlocks1, (* a function from heights to LightBlocks *) + fetchedLightBlocks2, (* a function from heights to LightBlocks *) + fetchedLightBlocks1b, (* a function from heights to LightBlocks *) + commonHeight, (* the height that is trusted in CreateEvidenceForPeer *) + nextHeightToTry, (* the index in CreateEvidenceForPeer *) + evidences + +INSTANCE LCDetector_003_draft +============================================================================ diff --git a/spec/light-client/detection/LCDetector_003_draft.tla b/spec/light-client/detection/LCDetector_003_draft.tla new file mode 100644 index 000000000..e2d32e996 --- /dev/null +++ b/spec/light-client/detection/LCDetector_003_draft.tla @@ -0,0 +1,373 @@ +-------------------------- MODULE LCDetector_003_draft ----------------------------- +(** + * This is a specification of the light client detector module. + * It follows the English specification: + * + * https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_003_reviewed.md + * + * The assumptions made in this specification: + * + * - light client connects to one primary and one secondary peer + * + * - the light client has its own local clock that can drift from the reference clock + * within the envelope [refClock - CLOCK_DRIFT, refClock + CLOCK_DRIFT]. + * The local clock may increase as well as decrease in the the envelope + * (similar to clock synchronization). + * + * - the ratio of the faulty validators is set as the parameter. + * + * Igor Konnov, Josef Widder, 2020 + *) + +EXTENDS Integers + +\* the parameters of Light Client +CONSTANTS + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + TRUSTED_HEIGHT, + (* an index of the block header that the light client trusts by social consensus *) + TARGET_HEIGHT, + (* an index of the block header that the light client tries to verify *) + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + CLOCK_DRIFT, + (* the assumed precision of the clock *) + REAL_CLOCK_DRIFT, + (* the actual clock drift, which under normal circumstances should not + be larger than CLOCK_DRIFT (otherwise, there will be a bug) *) + FAULTY_RATIO, + (* a pair <> that limits that ratio of faulty validator in the blockchain + from above (exclusive). Tendermint security model prescribes 1 / 3. *) + IS_PRIMARY_CORRECT, + IS_SECONDARY_CORRECT + +VARIABLES + blockchain, (* the reference blockchain *) + localClock, (* the local clock of the light client *) + refClock, (* the reference clock in the reference blockchain *) + Faulty, (* the set of faulty validators *) + state, (* the state of the light client detector *) + fetchedLightBlocks1, (* a function from heights to LightBlocks *) + fetchedLightBlocks2, (* a function from heights to LightBlocks *) + fetchedLightBlocks1b, (* a function from heights to LightBlocks *) + commonHeight, (* the height that is trusted in CreateEvidenceForPeer *) + nextHeightToTry, (* the index in CreateEvidenceForPeer *) + evidences (* a set of evidences *) + +vars == <> + +\* (old) type annotations in Apalache +a <: b == a + + +\* instantiate a reference chain +ULTIMATE_HEIGHT == TARGET_HEIGHT + 1 +BC == INSTANCE Blockchain_003_draft + WITH ULTIMATE_HEIGHT <- (TARGET_HEIGHT + 1) + +\* use the light client API +LC == INSTANCE LCVerificationApi_003_draft + +\* evidence type +ET == [peer |-> STRING, conflictingBlock |-> BC!LBT, commonHeight |-> Int] + +\* is the algorithm in the terminating state +IsTerminated == + state \in { <<"NoEvidence", "PRIMARY">>, + <<"NoEvidence", "SECONDARY">>, + <<"FaultyPeer", "PRIMARY">>, + <<"FaultyPeer", "SECONDARY">>, + <<"FoundEvidence", "PRIMARY">> } + + +(********************************* Initialization ******************************) + +\* initialization for the light blocks data structure +InitLightBlocks(lb, Heights) == + \* BC!LightBlocks is an infinite set, as time is not restricted. + \* Hence, we initialize the light blocks by picking the sets inside. + \E vs, nextVS, lastCommit, commit \in [Heights -> SUBSET AllNodes]: + \* although [Heights -> Int] is an infinite set, + \* Apalache needs just one instance of this set, so it does not complain. + \E timestamp \in [Heights -> Int]: + LET hdr(h) == + [height |-> h, + time |-> timestamp[h], + VS |-> vs[h], + NextVS |-> nextVS[h], + lastCommit |-> lastCommit[h]] + IN + LET lightHdr(h) == + [header |-> hdr(h), Commits |-> commit[h]] + IN + lb = [ h \in Heights |-> lightHdr(h) ] + +\* initialize the detector algorithm +Init == + \* initialize the blockchain to TARGET_HEIGHT + 1 + /\ BC!InitToHeight(FAULTY_RATIO) + /\ \E tm \in Int: + tm >= 0 /\ LC!IsLocalClockWithinDrift(tm, refClock) /\ localClock = tm + \* start with the secondary looking for evidence + /\ state = <<"Init", "SECONDARY">> /\ commonHeight = 0 /\ nextHeightToTry = 0 + /\ evidences = {} <: {ET} + \* Precompute a possible result of light client verification for the primary. + \* It is the input to the detection algorithm. + /\ \E Heights1 \in SUBSET(TRUSTED_HEIGHT..TARGET_HEIGHT): + /\ TRUSTED_HEIGHT \in Heights1 + /\ TARGET_HEIGHT \in Heights1 + /\ InitLightBlocks(fetchedLightBlocks1, Heights1) + \* As we have a non-deterministic scheduler, for every trace that has + \* an unverified block, there is a filtered trace that only has verified + \* blocks. This is a deep observation. + /\ LET status == [h \in Heights1 |-> "StateVerified"] IN + LC!VerifyToTargetPost(blockchain, IS_PRIMARY_CORRECT, + fetchedLightBlocks1, status, + TRUSTED_HEIGHT, TARGET_HEIGHT, "finishedSuccess") + \* initialize the other data structures to the default values + /\ LET trustedBlock == blockchain[TRUSTED_HEIGHT] + trustedLightBlock == [header |-> trustedBlock, Commits |-> AllNodes] + IN + /\ fetchedLightBlocks2 = [h \in {TRUSTED_HEIGHT} |-> trustedLightBlock] + /\ fetchedLightBlocks1b = [h \in {TRUSTED_HEIGHT} |-> trustedLightBlock] + + +(********************************* Transitions ******************************) + +\* a block should contain a copy of the block from the reference chain, +\* with a matching commit +CopyLightBlockFromChain(block, height) == + LET ref == blockchain[height] + lastCommit == + IF height < ULTIMATE_HEIGHT + THEN blockchain[height + 1].lastCommit + \* for the ultimate block, which we never use, + \* as ULTIMATE_HEIGHT = TARGET_HEIGHT + 1 + ELSE blockchain[height].VS + IN + block = [header |-> ref, Commits |-> lastCommit] + +\* Either the primary is correct and the block comes from the reference chain, +\* or the block is produced by a faulty primary. +\* +\* [LCV-FUNC-FETCH.1::TLA.1] +FetchLightBlockInto(isPeerCorrect, block, height) == + IF isPeerCorrect + THEN CopyLightBlockFromChain(block, height) + ELSE BC!IsLightBlockAllowedByDigitalSignatures(height, block) + + +(** + * Pick the next height, for which there is a block. + *) +PickNextHeight(fetchedBlocks, height) == + LET largerHeights == { h \in DOMAIN fetchedBlocks: h > height } IN + IF largerHeights = ({} <: {Int}) + THEN -1 + ELSE CHOOSE h \in largerHeights: + \A h2 \in largerHeights: h <= h2 + + +(** + * Check, whether the target header matches at the secondary and primary. + *) +CompareLast == + /\ state = <<"Init", "SECONDARY">> + \* fetch a block from the secondary: + \* non-deterministically pick a block that matches the constraints + /\ \E latest \in BC!LightBlocks: + \* for the moment, we ignore the possibility of a timeout when fetching a block + /\ FetchLightBlockInto(IS_SECONDARY_CORRECT, latest, TARGET_HEIGHT) + /\ IF latest.header = fetchedLightBlocks1[TARGET_HEIGHT].header + THEN \* if the headers match, CreateEvidence is not called + /\ state' = <<"NoEvidence", "SECONDARY">> + \* save the retrieved block for further analysis + /\ fetchedLightBlocks2' = + [h \in (DOMAIN fetchedLightBlocks2) \union {TARGET_HEIGHT} |-> + IF h = TARGET_HEIGHT THEN latest ELSE fetchedLightBlocks2[h]] + /\ UNCHANGED <> + ELSE \* prepare the parameters for CreateEvidence + /\ commonHeight' = TRUSTED_HEIGHT + /\ nextHeightToTry' = PickNextHeight(fetchedLightBlocks1, TRUSTED_HEIGHT) + /\ state' = IF nextHeightToTry' >= 0 + THEN <<"CreateEvidence", "SECONDARY">> + ELSE <<"FaultyPeer", "SECONDARY">> + /\ UNCHANGED fetchedLightBlocks2 + + /\ UNCHANGED <> + + +\* the actual loop in CreateEvidence +CreateEvidence(peer, isPeerCorrect, refBlocks, targetBlocks) == + /\ state = <<"CreateEvidence", peer>> + \* precompute a possible result of light client verification for the secondary + \* we have to introduce HeightRange, because Apalache can only handle a..b + \* for constant a and b + /\ LET HeightRange == { h \in TRUSTED_HEIGHT..TARGET_HEIGHT: + commonHeight <= h /\ h <= nextHeightToTry } IN + \E HeightsRange \in SUBSET(HeightRange): + /\ commonHeight \in HeightsRange /\ nextHeightToTry \in HeightsRange + /\ InitLightBlocks(targetBlocks, HeightsRange) + \* As we have a non-deterministic scheduler, for every trace that has + \* an unverified block, there is a filtered trace that only has verified + \* blocks. This is a deep observation. + /\ \E result \in {"finishedSuccess", "finishedFailure"}: + LET targetStatus == [h \in HeightsRange |-> "StateVerified"] IN + \* call VerifyToTarget for (commonHeight, nextHeightToTry). + /\ LC!VerifyToTargetPost(blockchain, isPeerCorrect, + targetBlocks, targetStatus, + commonHeight, nextHeightToTry, result) + \* case 1: the peer has failed (or the trusting period has expired) + /\ \/ /\ result /= "finishedSuccess" + /\ state' = <<"FaultyPeer", peer>> + /\ UNCHANGED <> + \* case 2: success + \/ /\ result = "finishedSuccess" + /\ LET block1 == refBlocks[nextHeightToTry] IN + LET block2 == targetBlocks[nextHeightToTry] IN + IF block1.header /= block2.header + THEN \* the target blocks do not match + /\ state' = <<"FoundEvidence", peer>> + /\ evidences' = evidences \union + {[peer |-> peer, + conflictingBlock |-> block1, + commonHeight |-> commonHeight]} + /\ UNCHANGED <> + ELSE \* the target blocks match + /\ nextHeightToTry' = PickNextHeight(refBlocks, nextHeightToTry) + /\ commonHeight' = nextHeightToTry + /\ state' = IF nextHeightToTry' >= 0 + THEN state + ELSE <<"NoEvidence", peer>> + /\ UNCHANGED evidences + +SwitchToPrimary == + /\ state = <<"FoundEvidence", "SECONDARY">> + /\ nextHeightToTry' = PickNextHeight(fetchedLightBlocks2, commonHeight) + /\ state' = <<"CreateEvidence", "PRIMARY">> + /\ UNCHANGED <> + + +CreateEvidenceForSecondary == + /\ CreateEvidence("SECONDARY", IS_SECONDARY_CORRECT, + fetchedLightBlocks1, fetchedLightBlocks2') + /\ UNCHANGED <> + +CreateEvidenceForPrimary == + /\ CreateEvidence("PRIMARY", IS_PRIMARY_CORRECT, + fetchedLightBlocks2, + fetchedLightBlocks1b') + /\ UNCHANGED <> + +(* + The local and global clocks can be updated. They can also drift from each other. + Note that the local clock can actually go backwards in time. + However, it still stays in the drift envelope + of [refClock - REAL_CLOCK_DRIFT, refClock + REAL_CLOCK_DRIFT]. + *) +AdvanceClocks == + /\ \E tm \in Int: + tm >= refClock /\ refClock' = tm + /\ \E tm \in Int: + /\ tm >= localClock + /\ LC!IsLocalClockWithinDrift(tm, refClock') + /\ localClock' = tm + +(** + Execute AttackDetector for one secondary. + + [LCD-FUNC-DETECTOR.2::LOOP.1] + *) +Next == + /\ AdvanceClocks + /\ \/ CompareLast + \/ CreateEvidenceForSecondary + \/ SwitchToPrimary + \/ CreateEvidenceForPrimary + + +\* simple invariants to see the progress of the detector +NeverNoEvidence == state[1] /= "NoEvidence" +NeverFoundEvidence == state[1] /= "FoundEvidence" +NeverFaultyPeer == state[1] /= "FaultyPeer" +NeverCreateEvidence == state[1] /= "CreateEvidence" + +NeverFoundEvidencePrimary == state /= <<"FoundEvidence", "PRIMARY">> + +NeverReachTargetHeight == nextHeightToTry < TARGET_HEIGHT + +EvidenceWhenFaultyInv == + (state[1] = "FoundEvidence") => (~IS_PRIMARY_CORRECT \/ ~IS_SECONDARY_CORRECT) + +NoEvidenceForCorrectInv == + IS_PRIMARY_CORRECT /\ IS_SECONDARY_CORRECT => evidences = {} <: {ET} + +(** + * If we find an evidence by peer A, peer B has ineded given us a corrupted + * header following the common height. Also, we have a verification trace by peer A. + *) +CommonHeightOnEvidenceInv == + \A e \in evidences: + LET conflicting == e.conflictingBlock IN + LET conflictingHeader == conflicting.header IN + \* the evidence by suspectingPeer can be verified by suspectingPeer in one step + LET SoundEvidence(suspectingPeer, peerBlocks) == + \/ e.peer /= suspectingPeer + \* the conflicting block from another peer verifies against the common height + \/ /\ "SUCCESS" = + LC!ValidAndVerifiedUntimed(peerBlocks[e.commonHeight], conflicting) + \* and the headers of the same height by the two peers do not match + /\ peerBlocks[conflictingHeader.height].header /= conflictingHeader + IN + /\ SoundEvidence("PRIMARY", fetchedLightBlocks1b) + /\ SoundEvidence("SECONDARY", fetchedLightBlocks2) + +(** + * If the light client does not find an evidence, + * then there is no attack on the light client. + *) +AccuracyInv == + (LC!InTrustingPeriodLocal(fetchedLightBlocks1[TARGET_HEIGHT].header) + /\ state = <<"NoEvidence", "SECONDARY">>) + => + (fetchedLightBlocks1[TARGET_HEIGHT].header = blockchain[TARGET_HEIGHT] + /\ fetchedLightBlocks2[TARGET_HEIGHT].header = blockchain[TARGET_HEIGHT]) + +(** + * The primary reports a corrupted block at the target height. If the secondary is + * correct and the algorithm has terminated, we should get the evidence. + * This property is violated due to clock drift. VerifyToTarget may fail with + * the correct secondary within the trusting period (due to clock drift, locally + * we think that we are outside of the trusting period). + *) +PrecisionInvGrayZone == + (/\ fetchedLightBlocks1[TARGET_HEIGHT].header /= blockchain[TARGET_HEIGHT] + /\ BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + /\ IS_SECONDARY_CORRECT + /\ IsTerminated) + => + evidences /= {} <: {ET} + +(** + * The primary reports a corrupted block at the target height. If the secondary is + * correct and the algorithm has terminated, we should get the evidence. + * This invariant does not fail, as we are using the local clock to check the trusting + * period. + *) +PrecisionInvLocal == + (/\ fetchedLightBlocks1[TARGET_HEIGHT].header /= blockchain[TARGET_HEIGHT] + /\ LC!InTrustingPeriodLocalSurely(blockchain[TRUSTED_HEIGHT]) + /\ IS_SECONDARY_CORRECT + /\ IsTerminated) + => + evidences /= {} <: {ET} + +==================================================================================== diff --git a/spec/light-client/detection/LCVerificationApi_003_draft.tla b/spec/light-client/detection/LCVerificationApi_003_draft.tla new file mode 100644 index 000000000..909eab92b --- /dev/null +++ b/spec/light-client/detection/LCVerificationApi_003_draft.tla @@ -0,0 +1,192 @@ +-------------------- MODULE LCVerificationApi_003_draft -------------------------- +(** + * The common interface of the light client verification and detection. + *) +EXTENDS Integers, FiniteSets + +\* the parameters of Light Client +CONSTANTS + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + CLOCK_DRIFT, + (* the assumed precision of the clock *) + REAL_CLOCK_DRIFT, + (* the actual clock drift, which under normal circumstances should not + be larger than CLOCK_DRIFT (otherwise, there will be a bug) *) + FAULTY_RATIO + (* a pair <> that limits that ratio of faulty validator in the blockchain + from above (exclusive). Tendermint security model prescribes 1 / 3. *) + +VARIABLES + localClock (* current time as measured by the light client *) + +(* the header is still within the trusting period *) +InTrustingPeriodLocal(header) == + \* note that the assumption about the drift reduces the period of trust + localClock < header.time + TRUSTING_PERIOD - CLOCK_DRIFT + +(* the header is still within the trusting period, even if the clock can go backwards *) +InTrustingPeriodLocalSurely(header) == + \* note that the assumption about the drift reduces the period of trust + localClock < header.time + TRUSTING_PERIOD - 2 * CLOCK_DRIFT + +(* ensure that the local clock does not drift far away from the global clock *) +IsLocalClockWithinDrift(local, global) == + /\ global - REAL_CLOCK_DRIFT <= local + /\ local <= global + REAL_CLOCK_DRIFT + +(** + * Check that the commits in an untrusted block form 1/3 of the next validators + * in a trusted header. + *) +SignedByOneThirdOfTrusted(trusted, untrusted) == + LET TP == Cardinality(trusted.header.NextVS) + SP == Cardinality(untrusted.Commits \intersect trusted.header.NextVS) + IN + 3 * SP > TP + +(** + The first part of the precondition of ValidAndVerified, which does not take + the current time into account. + + [LCV-FUNC-VALID.1::TLA-PRE-UNTIMED.1] + *) +ValidAndVerifiedPreUntimed(trusted, untrusted) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ thdr.height < uhdr.height + \* the trusted block has been created earlier + /\ thdr.time < uhdr.time + /\ untrusted.Commits \subseteq uhdr.VS + /\ LET TP == Cardinality(uhdr.VS) + SP == Cardinality(untrusted.Commits) + IN + 3 * SP > 2 * TP + /\ thdr.height + 1 = uhdr.height => thdr.NextVS = uhdr.VS + (* As we do not have explicit hashes we ignore these three checks of the English spec: + + 1. "trusted.Commit is a commit is for the header trusted.Header, + i.e. it contains the correct hash of the header". + 2. untrusted.Validators = hash(untrusted.Header.Validators) + 3. untrusted.NextValidators = hash(untrusted.Header.NextValidators) + *) + +(** + Check the precondition of ValidAndVerified, including the time checks. + + [LCV-FUNC-VALID.1::TLA-PRE.1] + *) +ValidAndVerifiedPre(trusted, untrusted, checkFuture) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ InTrustingPeriodLocal(thdr) + \* The untrusted block is not from the future (modulo clock drift). + \* Do the check, if it is required. + /\ checkFuture => uhdr.time < localClock + CLOCK_DRIFT + /\ ValidAndVerifiedPreUntimed(trusted, untrusted) + + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + This test does take current time into account, but only looks at the block structure. + + [LCV-FUNC-VALID.1::TLA-UNTIMED.1] + *) +ValidAndVerifiedUntimed(trusted, untrusted) == + IF ~ValidAndVerifiedPreUntimed(trusted, untrusted) + THEN "INVALID" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + + [LCV-FUNC-VALID.1::TLA.1] + *) +ValidAndVerified(trusted, untrusted, checkFuture) == + IF ~ValidAndVerifiedPre(trusted, untrusted, checkFuture) + THEN "INVALID" + ELSE IF ~InTrustingPeriodLocal(untrusted.header) + (* We leave the following test for the documentation purposes. + The implementation should do this test, as signature verification may be slow. + In the TLA+ specification, ValidAndVerified happens in no time. + *) + THEN "FAILED_TRUSTING_PERIOD" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + + +(** + The invariant of the light store that is not related to the blockchain + *) +LightStoreInv(fetchedLightBlocks, lightBlockStatus) == + \A lh, rh \in DOMAIN fetchedLightBlocks: + \* for every pair of stored headers that have been verified + \/ lh >= rh + \/ lightBlockStatus[lh] /= "StateVerified" + \/ lightBlockStatus[rh] /= "StateVerified" + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh /\ lightBlockStatus[mh] = "StateVerified" + \* or the left header is outside the trusting period, so no guarantees + \/ LET lhdr == fetchedLightBlocks[lh] + rhdr == fetchedLightBlocks[rh] + IN + \* we can verify the right one using the left one + "SUCCESS" = ValidAndVerifiedUntimed(lhdr, rhdr) + +(** + Correctness states that all the obtained headers are exactly like in the blockchain. + + It is always the case that every verified header in LightStore was generated by + an instance of Tendermint consensus. + + [LCV-DIST-SAFE.1::CORRECTNESS-INV.1] + *) +CorrectnessInv(blockchain, fetchedLightBlocks, lightBlockStatus) == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" => + fetchedLightBlocks[h].header = blockchain[h] + +(** + * When the light client terminates, there are no failed blocks. + * (Otherwise, someone lied to us.) + *) +NoFailedBlocksOnSuccessInv(fetchedLightBlocks, lightBlockStatus) == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] /= "StateFailed" + +(** + The expected post-condition of VerifyToTarget. + *) +VerifyToTargetPost(blockchain, isPeerCorrect, + fetchedLightBlocks, lightBlockStatus, + trustedHeight, targetHeight, finalState) == + LET trustedHeader == fetchedLightBlocks[trustedHeight].header IN + \* The light client is not lying us on the trusted block. + \* It is straightforward to detect. + /\ lightBlockStatus[trustedHeight] = "StateVerified" + /\ trustedHeight \in DOMAIN fetchedLightBlocks + /\ trustedHeader = blockchain[trustedHeight] + \* the invariants we have found in the light client verification + \* there is a problem with trusting period + /\ isPeerCorrect + => CorrectnessInv(blockchain, fetchedLightBlocks, lightBlockStatus) + \* a correct peer should fail the light client, + \* if the trusted block is in the trusting period + /\ isPeerCorrect /\ InTrustingPeriodLocalSurely(trustedHeader) + => finalState = "finishedSuccess" + /\ finalState = "finishedSuccess" => + /\ lightBlockStatus[targetHeight] = "StateVerified" + /\ targetHeight \in DOMAIN fetchedLightBlocks + /\ NoFailedBlocksOnSuccessInv(fetchedLightBlocks, lightBlockStatus) + /\ LightStoreInv(fetchedLightBlocks, lightBlockStatus) + + +================================================================================== diff --git a/spec/light-client/detection/README.md b/spec/light-client/detection/README.md new file mode 100644 index 000000000..50d8ab6fe --- /dev/null +++ b/spec/light-client/detection/README.md @@ -0,0 +1,75 @@ +--- +order: 1 +parent: + title: Fork Detection + order: 2 +--- + +# Tendermint fork detection and IBC fork detection + +## Status + +This is a work in progress. +This directory captures the ongoing work and discussion on fork +detection both in the context of a Tendermint light node and in the +context of IBC. It contains the following files + +### [detection.md](./detection.md) + +a draft of the light node fork detection including "proof of fork" + definition, that is, the data structure to submit evidence to full + nodes. + +### [discussions.md](./discussions.md) + +A collection of ideas and intuitions from recent discussions + +- the outcome of recent discussion +- a sketch of the light client supervisor to provide the context in + which fork detection happens +- a discussion about lightstore semantics + +### [req-ibc-detection.md](./req-ibc-detection.md) + +- a collection of requirements for fork detection in the IBC + context. In particular it contains a section "Required Changes in + ICS 007" with necessary updates to ICS 007 to support Tendermint + fork detection + +### [draft-functions.md](./draft-functions.md) + +In order to address the collected requirements, we started to sketch +some functions that we will need in the future when we specify in more +detail the + +- fork detections +- proof of fork generation +- proof of fork verification + +on the following components. + +- IBC on-chain components +- Relayer + +### TODOs + +We decided to merge the files while there are still open points to +address to record the current state an move forward. In particular, +the following points need to be addressed: + +- + +- + +- + +- + +Most likely we will write a specification on the light client +supervisor along the outcomes of + +- + +that also addresses initialization + +- diff --git a/spec/light-client/detection/detection_001_reviewed.md b/spec/light-client/detection/detection_001_reviewed.md new file mode 100644 index 000000000..db8c29a14 --- /dev/null +++ b/spec/light-client/detection/detection_001_reviewed.md @@ -0,0 +1,788 @@ +# ***This an unfinished draft. Comments are welcome!*** + +**TODO:** We will need to do small adaptations to the verification +spec to reflect the semantics in the LightStore (verified, trusted, +untrusted, etc. not needed anymore). In more detail: + +- The state of the Lightstore needs to go. Functions like `LatestVerified` can +keep the name but will ignore state as it will not exist anymore. + +- verification spec should be adapted to the second parameter of +`VerifyToTarget` +being a lightblock; new version number of function tag; + +- We should clarify what is the expectation of VerifyToTarget +so if it returns TimeoutError it can be assumed faulty. I guess that +VerifyToTarget with correct full node should never terminate with +TimeoutError. + +- We need to introduce a new version number for the new +specification. So we should decide how + to handle that. + +# Light Client Attack Detector + +In this specification, we strengthen the light client to be resistant +against so-called light client attacks. In a light client attack, all +the correct Tendermint full nodes agree on the sequence of generated +blocks (no fork), but a set of faulty full nodes attack a light client +by generating (signing) a block that deviates from the block of the +same height on the blockchain. In order to do so, some of these faulty +full nodes must have been validators before and violate +[[TMBC-FM-2THIRDS]](TMBC-FM-2THIRDS-link), as otherwise, if +[[TMBC-FM-2THIRDS]](TMBC-FM-2THIRDS-link) would hold, +[verification](verification) would satisfy +[[LCV-SEQ-SAFE.1]](LCV-SEQ-SAFE-link). + +An attack detector (or detector for short) is a mechanism that is used +by the light client [supervisor](supervisor) after +[verification](verification) of a new light block +with the primary, to cross-check the newly learned light block with +other peers (secondaries). It expects as input a light block with some +height *root* (that serves as a root of trust), and a verification +trace (a sequence of lightblocks) that the primary provided. + +In case the detector observes a light client attack, it computes +evidence data that can be used by Tendermint full nodes to isolate a +set of faulty full nodes that are still within the unbonding period +(more than 1/3 of the voting power of the validator set at some block of the chain), +and report them via ABCI to the application of a Tendermint blockchain +in order to punish faulty nodes. + +## Context of this document + +The light client [verification](verification) specification is +designed for the Tendermint failure model (1/3 assumption) +[[TMBC-FM-2THIRDS]](TMBC-FM-2THIRDS-link). It is safe under this +assumption, and live if it can reliably (that is, no message loss, no +duplication, and eventually delivered) and timely communicate with a +correct full node. If [[TMBC-FM-2THIRDS]](TMBC-FM-2THIRDS-link) assumption is violated, the light client +can be fooled to trust a light block that was not generated by +Tendermint consensus. + +This specification, the attack detector, is a "second line of +defense", in case the 1/3 assumption is violated. Its goal is to +detect a light client attack (conflicting light blocks) and collect +evidence. However, it is impractical to probe all full nodes. At this +time we consider a simple scheme of maintaining an address book of +known full nodes from which a small subset (e.g., 4) are chosen +initially to communicate with. More involved book keeping with +probabilistic guarantees can be considered at later stages of the +project. + +The light client maintains a simple address book containing addresses +of full nodes that it can pick as primary and secondaries. To obtain +a new light block, the light client first does +[verification](verification) with the primary, and then cross-checks +the light block (and the trace of light blocks that led to it) with +the secondaries using this specification. + +## Tendermint Consensus and Light Client Attacks + +In this section we will give some mathematical definitions of what we +mean by light client attacks (that are considered in this +specification) and how they differ from main-chain forks. To this end +we start by defining some properties of the sequence of blocks that is +decided upon by Tendermint consensus in normal operation (if the +Tendermint failure model holds +[[TMBC-FM-2THIRDS]](TMBC-FM-2THIRDS-link)), +and then define different +deviations that correspond to attack scenarios. + +#### **[TMBC-GENESIS.1]** + +Let *Genesis* be the agreed-upon initial block (file). + +#### **[TMBC-FUNC-SIGN.1]** + +Let *b* and *c* be two light blocks with *b.Header.Height + 1 = +c.Header.Height*. We define the predicate **signs(b,c)** to hold +iff *c.Header.LastCommit* is in *PossibleCommit(b)*. +[[TMBC-SOUND-DISTR-POSS-COMMIT.1]](TMBC-SOUND-DISTR-POSS-COMMIT-link). + +> The above encodes sequential verification, that is, intuitively, +> b.Header.NextValidators = c.Header.Validators and 2/3 of +> these Validators signed c? + +#### **[TMBC-FUNC-SUPPORT.1]** + +Let *b* and *c* be two light blocks. We define the predicate +**supports(b,c,t)** to hold iff + +- *t - trustingPeriod < b.Header.Time < t* +- the voting power in *b.NextValidators* of nodes in *c.Commit* + is more than 1/3 of *TotalVotingPower(b.Header.NextValidators)* + +> That is, if the [Tendermint failure model](TMBC-FM-2THIRDS-link) +> holds, then *c* has been signed by at least one correct full node, cf. +> [[TMBC-VAL-CONTAINS-CORR.1]](TMBC-VAL-CONTAINS-CORR-link). +> The following formalizes that *b* was properly generated by +> Tendermint; *b* can be traced back to genesis + +#### **[TMBC-SEQ-ROOTED.1]** + +Let *b* be a light block. +We define *sequ-rooted(b)* iff for all *i*, *1 <= i < h = b.Header.Height*, +there exist light blocks *a(i)* s.t. + +- *a(1) = Genesis* and +- *a(h) = b* and +- *signs( a(i) , a(i+1) )*. + +> The following formalizes that *c* is trusted based on *b* in +> skipping verification. Observe that we do not require here (yet) +> that *b* was properly generated. + +#### **[TMBC-SKIP-TRACE.1]** + +Let *b* and *c* be light blocks. We define *skip-trace(b,c,t)* if at +time t there exists an *h* and a sequence *a(1)*, ... *a(h)* s.t. + +- *a(1) = b* and +- *a(h) = c* and +- *supports( a(i), a(i+1), t)*, for all i, *1 <= i < h*. + +We call such a sequence *a(1)*, ... *a(h)* a **verification trace**. + +> The following formalizes that two light blocks of the same height +> should agree on the content of the header. Observe that *b* and *c* +> may disagree on the Commit. This is a special case if the canonical +> commit has not been decided on, that is, if b.Header.Height is the +> maximum height of all blocks decided upon by Tendermint at this +> moment. + +#### **[TMBC-SIGN-SKIP-MATCH.1]** + +Let *a*, *b*, *c*, be light blocks and *t* a time, we define +*sign-skip-match(a,b,c,t) = true* iff the following implication +evaluates to true: + +- *sequ-rooted(a)* and +- *b.Header.Height = c.Header.Height* and +- *skip-trace(a,b,t)* +- *skip-trace(a,c,t)* + +implies *b.Header = c.Header*. + +> Observe that *sign-skip-match* is defined via an implication. If it +> evaluates to false this means that the left-hand-side of the +> implication evaluates to true, and the right-hand-side evaluates to +> false. In particular, there are two **different** headers *b* and +> *c* that both can be verified from a common block *a* from the +> chain. Thus, the following describes an attack. + +#### **[TMBC-ATTACK.1]** + +If there exists three light blocks a, b, and c, with +*sign-skip-match(a,b,c,t) = false* then we have an *attack*. We say +we have **an attack at height** *b.Header.Height* and write +*attack(a,b,c,t)*. + +> The lightblock *a* need not be unique, that is, there may be +> several blocks that satisfy the above requirement for the same +> blocks *b* and *c*. + +[[TMBC-ATTACK.1]](#TMBC-ATTACK1) is a formalization of the violation +of the agreement property based on the result of consensus, that is, +the generated blocks. + +**Remark.** +Violation of agreement is only possible if more than 1/3 of the validators (or +next validators) of some previous block deviated from the protocol. The +upcoming "accountability" specification will describe how to compute +a set of at least 1/3 faulty nodes from two conflicting blocks. [] + +There are different ways to characterize forks +and attack scenarios. This specification uses the "node-based +characterization of attacks" which focuses on what kinds of nodes are +affected (light nodes vs. full nodes). For future reference and +discussion we also provide a +"block-based characterization of attacks" below. + +### Node-based characterization of attacks + +#### **[TMBC-MC-FORK.1]** + +We say there is a (main chain) fork at time *t* if + +- there are two correct full nodes *i* and *j* and +- *i* is different from *j* and +- *i* has decided on *b* and +- *j* has decided on *c* and +- there exist *a* such that *attack(a,b,c,t)*. + +#### **[TMBC-LC-ATTACK.1]** + +We say there is a light client attack at time *t*, if + +- there is **no** (main chain) fork [[TMBC-MC-FORK.1]](#TMBC-MC-FORK1), and +- there exist nodes that have computed light blocks *b* and *c* and +- there exist *a* such that *attack(a,b,c,t)*. + +We say the attack is at height *a.Header.Height*. + +> In this specification we consider detection of light client +> attacks. Intuitively, the case we consider is that +> light block *b* is the one from the +> blockchain, and some attacker has computed *c* and tries to wrongly +> convince +> the light client that *c* is the block from the chain. + +#### **[TMBC-LC-ATTACK-EVIDENCE.1]** + +We consider the following case of a light client attack +[[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1): + +- *attack(a,b,c,t)* +- there is a peer p1 that has a sequence *chain* of blocks from *a* to *b* +- *skip-trace(a,c,t)*: by [[TMBC-SKIP-TRACE.1]](#TMBC-SKIP-TRACE1) there is a + verification trace *v* of the form *a = v(1)*, ... *v(h) = c* + +Evidence for p1 (that proves an attack) consists for index i +of v(i) and v(i+1) such that + +- E1(i). v(i) is equal to the block of *chain* at height v(i).Height, and +- E2(i). v(i+1) that is different from the block of *chain* at + height v(i+1).height + +> Observe p1 can +> +> - check that v(i+1) differs from its block at that height, and +> - verify v(i+1) in one step from v(i) as v is a verification trace. + +**Proposition.** In the case of attack, evidence exists. +*Proof.* First observe that + +- (A). (NOT E2(i)) implies E1(i+1) + +Now by contradiction assume there is no evidence. Thus + +- for all i, we have NOT E1(i) or NOT E2(i) +- for i = 1 we have E1(1) and thus NOT E2(1) + thus by induction on i, by (A) we have for all i that **E1(i)** +- from attack we have E2(h-1), and as there is no evidence for + i = h - 1 we get **NOT E1(h-1)**. Contradiction. +QED. + +#### **[TMBC-LC-EVIDENCE-DATA.1]** + +To prove the attack to p1, because of Point E1, it is sufficient to +submit + +- v(i).Height (rather than v(i)). +- v(i+1) + +This information is *evidence for height v(i).Height*. + +### Block-based characterization of attacks + +In this section we provide a different characterization of attacks. It +is not defined on the nodes that are affected but purely on the +content of the blocks. In that sense these definitions are less +operational. + +> They might be relevant for a closer analysis of fork scenarios on the +> chain, which is out of the scope of this specification. + +#### **[TMBC-SIGN-UNIQUE.1]** + +Let *b* and *c* be light blocks, we define the predicate +*sign-unique(b,c)* to evaluate to true iff the following implication +evaluates to true: + +- *b.Header.Height = c.Header.Height* and +- *sequ-rooted(b)* and +- *sequ-rooted(c)* + +implies *b = c*. + +#### **[TMBC-BLOCKS-MCFORK.1]** + +If there exists two light blocks b and c, with *sign-unique(b,c) = +false* then we have a *fork*. + +> The difference of the above definition to +> [[TMBC-MC-FORK.1]](#TMBC-MC-FORK1) is subtle. The latter requires a +> full node being affected by a bad block while +> [[TMBC-BLOCKS-MCFORK.1]](#TMBC-BLOCKS-MCFORK1) just requires that a +> bad block exists, possibly in memory of an attacker. +> The following captures a light client fork. There is no fork up to +> the height of block b. However, c is of that height, is different, +> and passes skipping verification. It is a stricter property than +> [[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1), as +> [[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1) requires that no correct full +> node is affected. + +#### **[TMBC-BLOCKS-LCFORK.1]** + +Let *a*, *b*, *c*, be light blocks and *t* a time. We define +*light-client-fork(a,b,c,t)* iff + +- *sign-skip-match(a,b,c,t) = false* and +- *sequ-rooted(b)* and +- *b* is "unique", that is, for all *d*, *sequ-rooted(d)* and + *d.Header.Height = b.Header.Height* implies *d = b* + +> Finally, let us also define bogus blocks that have no support. +> Observe that bogus is even defined if there is a fork. +> Also, for the definition it would be sufficient to restrict *a* to +> *a.height < b.height* (which is implied by the definitions which +> unfold until *supports()*). + +#### **[TMBC-BOGUS.1]** + +Let *b* be a light block and *t* a time. We define *bogus(b,t)* iff + +- *sequ-rooted(b) = false* and +- for all *a*, *sequ-rooted(a)* implies *skip-trace(a,b,t) = false* + +### Informal Problem statement + +There is no sequential specification: the detector only makes sense +in a distributed systems where some nodes misbehave. + +We work under the assumption that full nodes and validators are +responsible for detecting attacks on the main chain, and the evidence +reactor takes care of broadcasting evidence to communicate +misbehaving nodes via ABCI to the application, and halt the chain in +case of a fork. The point of this specification is to shield a light +clients against attacks that cannot be detected by full nodes, and +are fully addressed at light clients (and consequently IBC relayers, +which use the light client protocols to observe the state of a +blockchain). In order to provide full nodes the incentive to follow +the protocols when communicating with the light client, this +specification also considers the generation of evidence that will +also be processed by the Tendermint blockchain. + +#### **[LCD-IP-MODEL.1]** + +The detector is designed under the assumption that + +- [[TMBC-FM-2THIRDS]](TMBC-FM-2THIRDS-link) may be violated +- there is no fork on the main chain. + +> As a result some faulty full nodes may launch an attack on a light +> client. + +The following requirements are operational in that they describe how +things should be done, rather than what should be done. However, they +do not constitute temporal logic verification conditions. For those, +see [LCD-DIST-*] below. + +The detector is called in the [supervisor](supervisor) as follows + +```go +Evidences := AttackDetector(root_of_trust, verifiedLS);` +``` + +where + +- `root-of-trust` is a light block that is trusted (that is, +except upon initialization, the primary and the secondaries +agreed on in the past), and +- `verifiedLS` is a lightstore that contains a verification trace that + starts from a lightblock that can be verified with the + `root-of-trust` in one step and ends with a lightblock of the height + requested by the user +- `Evidences` is a list of evidences for misbehavior + +#### **[LCD-IP-STATEMENT.1]** + +Whenever AttackDetector is called, the detector should for each +secondary try to replay the verification trace `verifiedLS` with the +secondary + +- in case replaying leads to detection of a light client attack + (one of the lightblocks differ from the one in verifiedLS with + the same height), we should return evidence +- if the secondary cannot provide a verification trace, we have no + proof for an attack. Block *b* may be bogus. In this case the + secondary is faulty and it should be replaced. + +## Assumptions/Incentives/Environment + +It is not in the interest of faulty full nodes to talk to the +detector as long as the detector is connected to at least one +correct full node. This would only increase the likelihood of +misbehavior being detected. Also we cannot punish them easily +(cheaply). The absence of a response need not be the fault of the full +node. + +Correct full nodes have the incentive to respond, because the +detector may help them to understand whether their header is a good +one. We can thus base liveness arguments of the detector on +the assumptions that correct full nodes reliably talk to the +detector. + +### Assumptions + +#### **[LCD-A-CorrFull.1]** + +At all times there is at least one correct full +node among the primary and the secondaries. + +> For this version of the detection we take this assumption. It +> allows us to establish the invariant that the lightblock +> `root-of-trust` is always the one from the blockchain, and we can +> use it as starting point for the evidence computation. Moreover, it +> allows us to establish the invariant at the supervisor that any +> lightblock in the (top-level) lightstore is from the blockchain. +> In the future we might design a lightclient based on the assumption +> that at least in regular intervals the lightclient is connected to a +> correct full node. This will require the detector to reconsider +> `root-of-trust`, and remove lightblocks from the top-level +> lightstore. + +#### **[LCD-A-RelComm.1]** + +Communication between the detector and a correct full node is +reliable and bounded in time. Reliable communication means that +messages are not lost, not duplicated, and eventually delivered. There +is a (known) end-to-end delay *Delta*, such that if a message is sent +at time *t* then it is received and processed by time *t + Delta*. +This implies that we need a timeout of at least *2 Delta* for remote +procedure calls to ensure that the response of a correct peer arrives +before the timeout expires. + +## Definitions + +### Evidence + +Following the definition of +[[TMBC-LC-ATTACK-EVIDENCE.1]](#TMBC-LC-ATTACK-EVIDENCE1), by evidence +we refer to a variable of the following type + +#### **[LC-DATA-EVIDENCE.1]** + +```go +type LightClientAttackEvidence struct { + ConflictingBlock LightBlock + CommonHeight int64 +} +``` + +As the above data is computed for a specific peer, the following +data structure wraps the evidence and adds the peerID. + +#### **[LC-DATA-EVIDENCE-INT.1]** + +```go +type InternalEvidence struct { + Evidence LightClientAttackEvidence + Peer PeerID +} +``` + +#### **[LC-SUMBIT-EVIDENCE.1]** + +```go +func submitEvidence(Evidences []InternalEvidence) +``` + +- Expected postcondition + - for each `ev` in `Evidences`: submit `ev.Evidence` to `ev.Peer` + +--- + +### LightStore + +Lightblocks and LightStores are defined in the verification +specification [LCV-DATA-LIGHTBLOCK.1] and [LCV-DATA-LIGHTSTORE.1]. See +the [verification specification][verification] for details. + +## (Distributed) Problem statement + +> As the attack detector is there to reduce the impact of faulty +> nodes, and faulty nodes imply that there is a distributed system, +> there is no sequential specification to which this distributed +> problem statement may refer to. + +The detector gets as input a trusted lightblock called *root* and an +auxiliary lightstore called *primary_trace* with lightblocks that have +been verified before, and that were provided by the primary. + +#### **[LCD-DIST-INV-ATTACK.1]** + +If the detector returns evidence for height *h* +[[TMBC-LC-EVIDENCE-DATA.1]](#TMBC-LC-EVIDENCE-DATA1), then there is an +attack at height *h*. [[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1) + +#### **[LCD-DIST-INV-STORE.1]** + +If the detector does not return evidence, then *primary_trace* +contains only blocks from the blockchain. + +#### **[LCD-DIST-LIVE.1]** + +The detector eventually terminates. + +#### **[LCD-DIST-TERM-NORMAL.1]** + +If + +- the *primary_trace* contains only blocks from the blockchain, and +- there is no attack, and +- *Secondaries* is always non-empty, and +- the age of *root* is always less than the trusting period, + +then the detector does not return evidence. + +#### **[LCD-DIST-TERM-ATTACK.1]** + +If + +- there is an attack, and +- a secondary reports a block that conflicts + with one of the blocks in *primary_trace*, and +- *Secondaries* is always non-empty, and +- the age of *root* is always less than the trusting period, + +then the detector returns evidence. + +> Observe that above we require that "a secondary reports a block that +> conflicts". If there is an attack, but no secondary tries to launch +> it against the detector (or the message from the secondary is lost +> by the network), then there is nothing to detect for us. + +#### **[LCD-DIST-SAFE-SECONDARY.1]** + +No correct secondary is ever replaced. + +#### **[LCD-DIST-SAFE-BOGUS.1]** + +If + +- a secondary reports a bogus lightblock, +- the age of *root* is always less than the trusting period, + +then the secondary is replaced before the detector terminates. + +> The above property is quite operational ("reports"), but it captures +> quite closely the requirement. As the +> detector only makes sense in a distributed setting, and does +> not have a sequential specification, less "pure" +> specification are acceptable. + +# Protocol + +## Functions and Data defined in other Specifications + +### From the supervisor + +```go +Replace_Secondary(addr Address, root-of-trust LightBlock) +``` + +### From the verifier + +```go +func VerifyToTarget(primary PeerID, root LightBlock, + targetHeight Height) (LightStore, Result) +``` + +> Note: the above differs from the current version in the second +> parameter. verification will be revised. + +Observe that `VerifyToTarget` does communication with the secondaries +via the function [FetchLightBlock](fetch). + +### Shared data of the light client + +- a pool of full nodes *FullNodes* that have not been contacted before +- peer set called *Secondaries* +- primary + +> Note that the lightStore is not needed to be shared. + +## Outline + +The problem laid out is solved by calling the function `AttackDetector` +with a lightstore that contains a light block that has just been +verified by the verifier. + +Then `AttackDetector` downloads headers from the secondaries. In case +a conflicting header is downloaded from a secondary, +`CreateEvidenceForPeer` which computes evidence in the case that +indeed an attack is confirmed. It could be that the secondary reports +a bogus block, which means that there need not be an attack, and the +secondary is replaced. + +## Details of the functions + +#### **[LCD-FUNC-DETECTOR.1]:** + +```go +func AttackDetector(root LightBlock, primary_trace []LightBlock) + ([]InternalEvidence) { + + Evidences := new []InternalEvidence; + + for each secondary in Secondaries { + // we replay the primary trace with the secondary, in + // order to generate evidence that we can submit to the + // secodary. We return the evidence + the trace the + // secondary told us that spans the evidence at its local store + + EvidenceForSecondary, newroot, secondary_trace, result := + CreateEvidenceForPeer(secondary, + root, + primary_trace); + if result == FaultyPeer { + Replace_Secondary(root); + } + else if result == FoundEvidence { + // the conflict is not bogus + Evidences.Add(EvidenceForSecondary); + // we replay the secondary trace with the primary, ... + EvidenceForPrimary, _, result := + CreateEvidenceForPeer(primary, + newroot, + secondary_trace); + if result == FoundEvidence { + Evidences.Add(EvidenceForPrimary); + } + // At this point we do not care about the other error + // codes. We already have generated evidence for an + // attack and need to stop the lightclient. It does not + // help to call replace_primary. Also we will use the + // same primary to check with other secondaries in + // later iterations of the loop + } + // In the case where the secondary reports NoEvidence + // we do nothing + } + return Evidences; +} +``` + +- Expected precondition + - root and primary trace are a verification trace +- Expected postcondition + - solves the problem statement (if attack found, then evidence is reported) +- Error condition + - `ErrorTrustExpired`: fails if root expires (outside trusting + period) [[LCV-INV-TP.1]](LCV-INV-TP1-link) + - `ErrorNoPeers`: if no peers are left to replace secondaries, and + no evidence was found before that happened + +--- + +```go +func CreateEvidenceForPeer(peer PeerID, root LightBlock, trace LightStore) + (Evidence, LightBlock, LightStore, result) { + + common := root; + + for i in 1 .. len(trace) { + auxLS, result := VerifyToTarget(peer, common, trace[i].Header.Height) + + if result != ResultSuccess { + // something went wrong; peer did not provide a verifyable block + return (nil, nil, nil, FaultyPeer) + } + else { + if auxLS.LatestVerified().Header != trace[i].Header { + // the header reported by the peer differs from the + // reference header in trace but both could be + // verified from common in one step. + // we can create evidence for submission to the secondary + ev := new InternalEvidence; + ev.Evidence.ConflictingBlock := trace[i]; + ev.Evidence.CommonHeight := common.Height; + ev.Peer := peer + return (ev, common, auxLS, FoundEvidence) + } + else { + // the peer agrees with the trace, we move common forward + // we could delete auxLS as it will be overwritten in + // the next iteration + common := trace[i] + } + } + } + return (nil, nil, nil, NoEvidence) +} +``` + +- Expected precondition + - root and trace are a verification trace +- Expected postcondition + - finds evidence where trace and peer diverge +- Error condition + - `ErrorTrustExpired`: fails if root expires (outside trusting + period) [[LCV-INV-TP.1]](LCV-INV-TP1-link) + - If `VerifyToTarget` returns error but root is not expired then return + `FaultyPeer` + +--- + +## Correctness arguments + +#### Argument for [[LCD-DIST-INV-ATTACK.1]](#LCD-DIST-INV-ATTACK1) + +Under the assumption that root and trace are a verification trace, +when in `CreateEvidenceForPeer` the detector the detector creates +evidence, then the lightclient has seen two different headers (one via +`trace` and one via `VerifyToTarget` for the same height that can both +be verified in one step. + +#### Argument for [[LCD-DIST-INV-STORE.1]](#LCD-DIST-INV-STORE1) + +We assume that there is at least one correct peer, and there is no +fork. As a result the correct peer has the correct sequence of +blocks. Since the primary_trace is checked block-by-block also against +each secondary, and at no point evidence was generated that means at +no point there were conflicting blocks. + +#### Argument for [[LCD-DIST-LIVE.1]](#LCD-DIST-LIVE1) + +At the latest when [[LCV-INV-TP.1]](LCV-INV-TP1-link) is violated, +`AttackDetector` terminates. + +#### Argument for [[LCD-DIST-TERM-NORMAL.1]](#LCD-DIST-TERM-NORMAL1) + +As there are finitely many peers, eventually the main loop +terminates. As there is no attack no evidence can be generated. + +#### Argument for [[LCD-DIST-TERM-ATTACK.1]](#LCD-DIST-TERM-ATTACK1) + +Argument similar to [[LCD-DIST-TERM-NORMAL.1]](#LCD-DIST-TERM-NORMAL1) + +#### Argument for [[LCD-DIST-SAFE-SECONDARY.1]](#LCD-DIST-SAFE-SECONDARY1) + +Secondaries are only replaced if they time-out or if they report bogus +blocks. The former is ruled out by the timing assumption, the latter +by correct peers only reporting blocks from the chain. + +#### Argument for [[LCD-DIST-SAFE-BOGUS.1]](#LCD-DIST-SAFE-BOGUS1) + +Once a bogus block is recognized as such the secondary is removed. + +# References + +> links to other specifications/ADRs this document refers to + +[[verification]] The specification of the light client verification. + +[[supervisor]] The specification of the light client supervisor. + +[verification]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md + +[supervisor]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/supervisor/supervisor.md + +[block]: https://github.com/tendermint/spec/blob/d46cd7f573a2c6a2399fcab2cde981330aa63f37/spec/core/data_structures.md + +[TMBC-FM-2THIRDS-link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md#tmbc-fm-2thirds1 + +[TMBC-SOUND-DISTR-POSS-COMMIT-link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md#tmbc-sound-distr-poss-commit1 + +[LCV-SEQ-SAFE-link]:https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md#lcv-seq-safe1 + +[TMBC-VAL-CONTAINS-CORR-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md#tmbc-val-contains-corr1 + +[fetch]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md#lcv-func-fetch1 + +[LCV-INV-TP1-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification.md#lcv-inv-tp1 diff --git a/spec/light-client/detection/detection_003_reviewed.md b/spec/light-client/detection/detection_003_reviewed.md new file mode 100644 index 000000000..13f6e9716 --- /dev/null +++ b/spec/light-client/detection/detection_003_reviewed.md @@ -0,0 +1,839 @@ +# Light Client Attack Detector + +In this specification, we strengthen the light client to be resistant +against so-called light client attacks. In a light client attack, all +the correct Tendermint full nodes agree on the sequence of generated +blocks (no fork), but a set of faulty full nodes attack a light client +by generating (signing) a block that deviates from the block of the +same height on the blockchain. In order to do so, some of these faulty +full nodes must have been validators before and violate the assumption +of more than two thirds of "correct voting power" +[[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link], as otherwise, if +[[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link] would hold, +[verification][verification] would satisfy +[[LCV-SEQ-SAFE.1]][LCV-SEQ-SAFE-link]. + +An attack detector (or detector for short) is a mechanism that is used +by the light client [supervisor][supervisor] after +[verification][verification] of a new light block +with the primary, to cross-check the newly learned light block with +other peers (secondaries). It expects as input a light block with some +height *root* (that serves as a root of trust), and a verification +trace (a sequence of lightblocks) that the primary provided. + +In case the detector observes a light client attack, it computes +evidence data that can be used by Tendermint full nodes to isolate a +set of faulty full nodes that are still within the unbonding period +(more than 1/3 of the voting power of the validator set at some block +of the chain), and report them via ABCI (application/blockchain +interface) +to the application of a +Tendermint blockchain in order to punish faulty nodes. + +## Context of this document + +The light client [verification][verification] specification is +designed for the Tendermint failure model (1/3 assumption) +[[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link]. It is safe under this +assumption, and live if it can reliably (that is, no message loss, no +duplication, and eventually delivered) and timely communicate with a +correct full node. If [[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link] +assumption is violated, the light client can be fooled to trust a +light block that was not generated by Tendermint consensus. + +This specification, the attack detector, is a "second line of +defense", in case the 1/3 assumption is violated. Its goal is to +detect a light client attack (conflicting light blocks) and collect +evidence. However, it is impractical to probe all full nodes. At this +time we consider a simple scheme of maintaining an address book of +known full nodes from which a small subset (e.g., 4) are chosen +initially to communicate with. More involved book keeping with +probabilistic guarantees can be considered at later stages of the +project. + +The light client maintains a simple address book containing addresses +of full nodes that it can pick as primary and secondaries. To obtain +a new light block, the light client first does +[verification][verification] with the primary, and then cross-checks +the light block (and the trace of light blocks that led to it) with +the secondaries using this specification. + +# Outline + +- [Part I](#part-i---Tendermint-Consensus-and-Light-Client-Attacks): + Formal definitions of lightclient attacks, based on basic + properties of Tendermint consensus. + - [Node-based characterization of + attacks](#Node-based-characterization-of-attacks). The + definition of attacks used in the problem statement of + this specification. + + - [Block-based characterization of attacks](#Block-based-characterization-of-attacks). Alternative definitions + provided for future reference. + +- [Part II](#part-ii---problem-statement): Problem statement of + lightclient attack detection + + - [Informal Problem Statement](#informal-problem-statement) + - [Assumptions](#Assumptions) + - [Definitions](#definitions) + - [Distributed Problem statement](#Distributed-Problem-statement) + +- [Part III](#part-iii---protocol): The protocol + + - [Functions and Data defined in other Specifications](#Functions-and-Data-defined-in-other-Specifications) + - [Outline of Solution](#Outline-of-solution) + - [Details of the functions](#Details-of-the-functions) + - [Correctness arguments](#Correctness-arguments) + +# Part I - Tendermint Consensus and Light Client Attacks + +In this section we will give some mathematical definitions of what we +mean by light client attacks (that are considered in this +specification) and how they differ from main-chain forks. To this end, +we start by defining some properties of the sequence of blocks that is +decided upon by Tendermint consensus in normal operation (if the +Tendermint failure model holds +[[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link]), +and then define different +deviations that correspond to attack scenarios. We consider the notion +of [light blocks][LCV-LB-link] and [headers][LVC-HD-link]. + +#### **[TMBC-GENESIS.1]** + +Let *Genesis* be the agreed-upon initial block (file). + +#### **[TMBC-FUNC-SIGN.1]** + +Let *b* and *c* be two light blocks with *b.Header.Height + 1 = +c.Header.Height*. We define the predicate **signs(b,c)** to hold +iff *c.Header.LastCommit* is in *PossibleCommit(b)*. +[[TMBC-SOUND-DISTR-POSS-COMMIT.1]][TMBC-SOUND-DISTR-POSS-COMMIT-link]. + +> The above encodes sequential verification, that is, intuitively, +> b.Header.NextValidators = c.Header.Validators and 2/3 of +> these Validators signed c. + +#### **[TMBC-FUNC-SUPPORT.1]** + +Let *b* and *c* be two light blocks. We define the predicate +**supports(b,c,t)** to hold iff + +- *t - trustingPeriod < b.Header.Time < t* +- the voting power in *b.NextValidators* of nodes in *c.Commit* + is more than 1/3 of *TotalVotingPower(b.Header.NextValidators)* + +> That is, if the [Tendermint failure model][TMBC-FM-2THIRDS-link] +> holds, then *c* has been signed by at least one correct full node, cf. +> [[TMBC-VAL-CONTAINS-CORR.1]][TMBC-VAL-CONTAINS-CORR-link]. +> The following formalizes that *b* was properly generated by +> Tendermint; *b* can be traced back to genesis. + +#### **[TMBC-SEQ-ROOTED.1]** + +Let *b* be a light block. +We define *sequ-rooted(b)* iff for all *i*, *1 <= i < h = b.Header.Height*, +there exist light blocks *a(i)* s.t. + +- *a(1) = Genesis* and +- *a(h) = b* and +- *signs( a(i) , a(i+1) )*. + +> The following formalizes that *c* is trusted based on *b* in +> skipping verification. Observe that we do not require here (yet) +> that *b* was properly generated. + +#### **[TMBC-SKIP-TRACE.1]** + +Let *b* and *c* be light blocks. We define *skip-trace(b,c,t)* if at +time t there exists an integer *h* and a sequence *a(1)*, ... *a(h)* s.t. + +- *a(1) = b* and +- *a(h) = c* and +- *supports( a(i), a(i+1), t)*, for all i, *1 <= i < h*. + +We call such a sequence *a(1)*, ... *a(h)* a **verification trace**. + +> The following formalizes that two light blocks of the same height +> should agree on the content of the header. Observe that *b* and *c* +> may disagree on the Commit. This is a special case if the canonical +> commit has not been decided on yet, that is, if b.Header.Height is the +> maximum height of all blocks decided upon by Tendermint at this +> moment. + +#### **[TMBC-SIGN-SKIP-MATCH.1]** + +Let *a*, *b*, *c*, be light blocks and *t* a time, we define +*sign-skip-match(a,b,c,t) = true* iff the following implication +evaluates to true: + +- *sequ-rooted(a)* and +- *b.Header.Height = c.Header.Height* and +- *skip-trace(a,b,t)* +- *skip-trace(a,c,t)* + +implies *b.Header = c.Header*. + +> Observe that *sign-skip-match* is defined via an implication. If it +> evaluates to false this means that the left-hand-side of the +> implication evaluates to true, and the right-hand-side evaluates to +> false. In particular, there are two **different** headers *b* and +> *c* that both can be verified from a common block *a* from the +> chain. Thus, the following describes an attack. + +#### **[TMBC-ATTACK.1]** + +If there exists three light blocks a, b, and c, with +*sign-skip-match(a,b,c,t) = false* then we have an *attack*. We say +we have **an attack at height** *b.Header.Height* and write +*attack(a,b,c,t)*. + +> The lightblock *a* need not be unique, that is, there may be +> several blocks that satisfy the above requirement for the same +> blocks *b* and *c*. + +[[TMBC-ATTACK.1]](#TMBC-ATTACK1) is a formalization of the violation +of the agreement property based on the result of consensus, that is, +the generated blocks. + +**Remark.** +Violation of agreement is only possible if more than 1/3 of the validators (or +next validators) of some previous block deviated from the protocol. The +upcoming "accountability" specification will describe how to compute +a set of at least 1/3 faulty nodes from two conflicting blocks. [] + +There are different ways to characterize forks +and attack scenarios. This specification uses the "node-based +characterization of attacks" which focuses on what kinds of nodes are +affected (light nodes vs. full nodes). For future reference and +discussion we also provide a +"block-based characterization of attacks" below. + +## Node-based characterization of attacks + +#### **[TMBC-MC-FORK.1]** + +We say there is a (main chain) fork at time *t* if + +- there are two correct full nodes *i* and *j* and +- *i* is different from *j* and +- *i* has decided on *b* and +- *j* has decided on *c* and +- there exist *a* such that *attack(a,b,c,t)*. + +#### **[TMBC-LC-ATTACK.1]** + +We say there is a light client attack at time *t*, if + +- there is **no** (main chain) fork [[TMBC-MC-FORK.1]](#TMBC-MC-FORK1), and +- there exist nodes that have computed light blocks *b* and *c* and +- there exist *a* such that *attack(a,b,c,t)*. + +We say the attack is at height *a.Header.Height*. + +> In this specification we consider detection of light client +> attacks. Intuitively, the case we consider is that +> light block *b* is the one from the +> blockchain, and some attacker has computed *c* and tries to wrongly +> convince +> the light client that *c* is the block from the chain. + +#### **[TMBC-LC-ATTACK-EVIDENCE.1]** + +We consider the following case of a light client attack +[[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1): + +- *attack(a,b,c,t)* +- there is a peer p1 that has a sequence *chain* of blocks from *a* to *b* +- *skip-trace(a,c,t)*: by [[TMBC-SKIP-TRACE.1]](#TMBC-SKIP-TRACE1) there is a + verification trace *v* of the form *a = v(1)*, ... *v(h) = c* + +Evidence for p1 (that proves an attack to p1) consists for index i +of v(i) and v(i+1) such that + +- E1(i). v(i) is equal to the block of *chain* at height v(i).Height, and +- E2(i). v(i+1) that is different from the block of *chain* at + height v(i+1).height + +> Observe p1 can +> +> - check that v(i+1) differs from its block at that height, and +> - verify v(i+1) in one step from v(i) as v is a verification trace. + +#### **[TMBC-LC-EVIDENCE-DATA.1]** + +To prove the attack to p1, because of Point E1, it is sufficient to +submit + +- v(i).Height (rather than v(i)). +- v(i+1) + +This information is *evidence for height v(i).Height*. + +## Block-based characterization of attacks + +In this section we provide a different characterization of attacks. It +is not defined on the nodes that are affected but purely on the +content of the blocks. In that sense these definitions are less +operational. + +> They might be relevant for a closer analysis of fork scenarios on the +> chain, which is out of the scope of this specification. + +#### **[TMBC-SIGN-UNIQUE.1]** + +Let *b* and *c* be light blocks, we define the predicate +*sign-unique(b,c)* to evaluate to true iff the following implication +evaluates to true: + +- *b.Header.Height = c.Header.Height* and +- *sequ-rooted(b)* and +- *sequ-rooted(c)* + +implies *b = c*. + +#### **[TMBC-BLOCKS-MCFORK.1]** + +If there exists two light blocks b and c, with *sign-unique(b,c) = +false* then we have a *fork*. + +> The difference of the above definition to +> [[TMBC-MC-FORK.1]](#TMBC-MC-FORK1) is subtle. The latter requires a +> full node being affected by a bad block while +> [[TMBC-BLOCKS-MCFORK.1]](#TMBC-BLOCKS-MCFORK1) just requires that a +> bad block exists, possibly in memory of an attacker. +> The following captures a light client fork. There is no fork up to +> the height of block b. However, c is of that height, is different, +> and passes skipping verification. It is a stricter property than +> [[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1), as +> [[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1) requires that no correct full +> node is affected. + +#### **[TMBC-BLOCKS-LCFORK.1]** + +Let *a*, *b*, *c*, be light blocks and *t* a time. We define +*light-client-fork(a,b,c,t)* iff + +- *sign-skip-match(a,b,c,t) = false* and +- *sequ-rooted(b)* and +- *b* is "unique", that is, for all *d*, *sequ-rooted(d)* and + *d.Header.Height = b.Header.Height* implies *d = b* + +> Finally, let us also define bogus blocks that have no support. +> Observe that bogus is even defined if there is a fork. +> Also, for the definition it would be sufficient to restrict *a* to +> *a.height < b.height* (which is implied by the definitions which +> unfold until *supports()*). + +#### **[TMBC-BOGUS.1]** + +Let *b* be a light block and *t* a time. We define *bogus(b,t)* iff + +- *sequ-rooted(b) = false* and +- for all *a*, *sequ-rooted(a)* implies *skip-trace(a,b,t) = false* + +# Part II - Problem Statement + +## Informal Problem statement + +There is no sequential specification: the detector only makes sense +in a distributed systems where some nodes misbehave. + +We work under the assumption that full nodes and validators are +responsible for detecting attacks on the main chain, and the evidence +reactor takes care of broadcasting evidence to communicate +misbehaving nodes via ABCI to the application, and halt the chain in +case of a fork. The point of this specification is to shield a light +clients against attacks that cannot be detected by full nodes, and +are fully addressed at light clients (and consequently IBC relayers, +which use the light client protocols to observe the state of a +blockchain). In order to provide full nodes the incentive to follow +the protocols when communicating with the light client, this +specification also considers the generation of evidence that will +also be processed by the Tendermint blockchain. + +#### **[LCD-IP-MODEL.1]** + +The detector is designed under the assumption that + +- [[TMBC-FM-2THIRDS]][TMBC-FM-2THIRDS-link] may be violated +- there is no fork on the main chain. + +> As a result some faulty full nodes may launch an attack on a light +> client. + +The following requirements are operational in that they describe how +things should be done, rather than what should be done. However, they +do not constitute temporal logic verification conditions. For those, +see [LCD-DIST-*] below. + +The detector is called in the [supervisor][supervisor] as follows + +```go +Evidences := AttackDetector(root_of_trust, verifiedLS);` +``` + +where + +- `root-of-trust` is a light block that is trusted (that is, +except upon initialization, the primary and the secondaries +agreed on in the past), and +- `verifiedLS` is a lightstore that contains a verification trace that + starts from a lightblock that can be verified with the + `root-of-trust` in one step and ends with a lightblock of the height + requested by the user +- `Evidences` is a list of evidences for misbehavior + +#### **[LCD-IP-STATEMENT.1]** + +Whenever AttackDetector is called, the detector should for each +secondary cross check the largest header in verifiedLS with the +corresponding header of the same height provided by the secondary. If +there is a deviation, the detector should +try to replay the verification trace `verifiedLS` with the +secondary + +- in case replaying leads to detection of a light client attack + (one of the lightblocks differ from the one in verifiedLS with + the same height), we should return evidence +- if the secondary cannot provide a verification trace, we have no + proof for an attack. Block *b* may be bogus. In this case the + secondary is faulty and it should be replaced. + +## Assumptions + +It is not in the interest of faulty full nodes to talk to the +detector as long as the detector is connected to at least one +correct full node. This would only increase the likelihood of +misbehavior being detected. Also we cannot punish them easily +(cheaply). The absence of a response need not be the fault of the full +node. + +Correct full nodes have the incentive to respond, because the +detector may help them to understand whether their header is a good +one. We can thus base liveness arguments of the detector on +the assumptions that correct full nodes reliably talk to the +detector. + +#### **[LCD-A-CorrFull.1]** + +At all times there is at least one correct full +node among the primary and the secondaries. + +> For this version of the detection we take this assumption. It +> allows us to establish the invariant that the lightblock +> `root-of-trust` is always the one from the blockchain, and we can +> use it as starting point for the evidence computation. Moreover, it +> allows us to establish the invariant at the supervisor that any +> lightblock in the (top-level) lightstore is from the blockchain. +> In the future we might design a lightclient based on the assumption +> that at least in regular intervals the lightclient is connected to a +> correct full node. This will require the detector to reconsider +> `root-of-trust`, and remove lightblocks from the top-level +> lightstore. + +#### **[LCD-A-RelComm.1]** + +Communication between the detector and a correct full node is +reliable and bounded in time. Reliable communication means that +messages are not lost, not duplicated, and eventually delivered. There +is a (known) end-to-end delay *Delta*, such that if a message is sent +at time *t* then it is received and processed by time *t + Delta*. +This implies that we need a timeout of at least *2 Delta* for remote +procedure calls to ensure that the response of a correct peer arrives +before the timeout expires. + +## Definitions + +### Evidence + +Following the definition of +[[TMBC-LC-ATTACK-EVIDENCE.1]](#TMBC-LC-ATTACK-EVIDENCE1), by evidence +we refer to a variable of the following type + +#### **[LC-DATA-EVIDENCE.1]** + +```go +type LightClientAttackEvidence struct { + ConflictingBlock LightBlock + CommonHeight int64 + + // Evidence also includes application specific data which is not + // part of verification but is sent to the application once the + // evidence gets committed on chain. +} +``` + +As the above data is computed for a specific peer, the following +data structure wraps the evidence and adds the peerID. + +#### **[LC-DATA-EVIDENCE-INT.1]** + +```go +type InternalEvidence struct { + Evidence LightClientAttackEvidence + Peer PeerID +} +``` + +#### **[LC-SUMBIT-EVIDENCE.1]** + +```go +func submitEvidence(Evidences []InternalEvidence) +``` + +- Expected postcondition + - for each `ev` in `Evidences`: submit `ev.Evidence` to `ev.Peer` + +--- + +### LightStore + +Lightblocks and LightStores are defined in the verification +specification [[LCV-DATA-LIGHTBLOCK.1]][LCV-LB-link] +and [[LCV-DATA-LIGHTSTORE.2]][LCV-LS-link]. See +the [verification specification][verification] for details. + +## Distributed Problem statement + +> As the attack detector is there to reduce the impact of faulty +> nodes, and faulty nodes imply that there is a distributed system, +> there is no sequential specification to which this distributed +> problem statement may refer to. + +The detector gets as input a trusted lightblock called *root* and an +auxiliary lightstore called *primary_trace* with lightblocks that have +been verified before, and that were provided by the primary. + +#### **[LCD-DIST-INV-ATTACK.1]** + +If the detector returns evidence for height *h* +[[TMBC-LC-EVIDENCE-DATA.1]](#TMBC-LC-EVIDENCE-DATA1), then there is an +attack at height *h*. [[TMBC-LC-ATTACK.1]](#TMBC-LC-ATTACK1) + +#### **[LCD-DIST-INV-STORE.1]** + +If the detector does not return evidence, then *primary_trace* +contains only blocks from the blockchain. + +#### **[LCD-DIST-LIVE.1]** + +The detector eventually terminates. + +#### **[LCD-DIST-TERM-NORMAL.1]** + +If + +- the *primary_trace* contains only blocks from the blockchain, and +- there is no attack, and +- *Secondaries* is always non-empty, and +- the age of *root* is always less than the trusting period, + +then the detector does not return evidence. + +#### **[LCD-DIST-TERM-ATTACK.1]** + +If + +- there is an attack, and +- a secondary reports a block that conflicts + with one of the blocks in *primary_trace*, and +- *Secondaries* is always non-empty, and +- the age of *root* is always less than the trusting period, + +then the detector returns evidence. + +> Observe that above we require that "a secondary reports a block that +> conflicts". If there is an attack, but no secondary tries to launch +> it against the detector (or the message from the secondary is lost +> by the network), then there is nothing to detect for us. + +#### **[LCD-DIST-SAFE-SECONDARY.1]** + +No correct secondary is ever replaced. + +#### **[LCD-DIST-SAFE-BOGUS.1]** + +If + +- a secondary reports a bogus lightblock, +- the age of *root* is always less than the trusting period, + +then the secondary is replaced before the detector terminates. + +> The above property is quite operational (e.g., the usage of +> "reports"), but it captures closely the requirement. As the +> detector only makes sense in a distributed setting, and does not +> have a sequential specification, a less "pure" specification are +> acceptable. + +# Part III - Protocol + +## Functions and Data defined in other Specifications + +### From the [supervisor][supervisor] + +[[LC-FUNC-REPLACE-SECONDARY.1]][repl] + +```go +Replace_Secondary(addr Address, root-of-trust LightBlock) +``` + +### From the [verifier][verification] + +[[LCV-FUNC-MAIN.2]][vtt] + +```go +func VerifyToTarget(primary PeerID, root LightBlock, + targetHeight Height) (LightStore, Result) +``` + +Observe that `VerifyToTarget` does communication with the secondaries +via the function [FetchLightBlock][fetch]. + +### Shared data of the light client + +- a pool of full nodes *FullNodes* that have not been contacted before +- peer set called *Secondaries* +- primary + +> Note that the lightStore is not needed to be shared. + +## Outline of solution + +The problem laid out is solved by calling the function `AttackDetector` +with a lightstore that contains a light block that has just been +verified by the verifier. + +Then `AttackDetector` downloads headers from the secondaries. In case +a conflicting header is downloaded from a secondary, it calls +`CreateEvidenceForPeer` which computes evidence in the case that +indeed an attack is confirmed. It could be that the secondary reports +a bogus block, which means that there need not be an attack, and the +secondary is replaced. + +## Details of the functions + +#### **[LCD-FUNC-DETECTOR.2]:** + +```go +func AttackDetector(root LightBlock, primary_trace []LightBlock) + ([]InternalEvidence) { + + Evidences := new []InternalEvidence; + + for each secondary in Secondaries { + lb, result := FetchLightBlock(secondary,primary_trace.Latest().Header.Height); + if result != ResultSuccess { + Replace_Secondary(root); + } + else if lb.Header != primary_trace.Latest().Header { + + // we replay the primary trace with the secondary, in + // order to generate evidence that we can submit to the + // secondary. We return the evidence + the trace the + // secondary told us that spans the evidence at its local store + + EvidenceForSecondary, newroot, secondary_trace, result := + CreateEvidenceForPeer(secondary, + root, + primary_trace); + if result == FaultyPeer { + Replace_Secondary(root); + } + else if result == FoundEvidence { + // the conflict is not bogus + Evidences.Add(EvidenceForSecondary); + // we replay the secondary trace with the primary, ... + EvidenceForPrimary, _, result := + CreateEvidenceForPeer(primary, + newroot, + secondary_trace); + if result == FoundEvidence { + Evidences.Add(EvidenceForPrimary); + } + // At this point we do not care about the other error + // codes. We already have generated evidence for an + // attack and need to stop the lightclient. It does not + // help to call replace_primary. Also we will use the + // same primary to check with other secondaries in + // later iterations of the loop + } + // In the case where the secondary reports NoEvidence + // after initially it reported a conflicting header. + // secondary is faulty + Replace_Secondary(root); + } + } + return Evidences; +} +``` + +- Expected precondition + - root and primary trace are a verification trace +- Expected postcondition + - solves the problem statement (if attack found, then evidence is reported) +- Error condition + - `ErrorTrustExpired`: fails if root expires (outside trusting + period) [[LCV-INV-TP.1]][LCV-INV-TP1-link] + - `ErrorNoPeers`: if no peers are left to replace secondaries, and + no evidence was found before that happened + +--- + +```go +func CreateEvidenceForPeer(peer PeerID, root LightBlock, trace LightStore) + (Evidence, LightBlock, LightStore, result) { + + common := root; + + for i in 1 .. len(trace) { + auxLS, result := VerifyToTarget(peer, common, trace[i].Header.Height) + + if result != ResultSuccess { + // something went wrong; peer did not provide a verifiable block + return (nil, nil, nil, FaultyPeer) + } + else { + if auxLS.LatestVerified().Header != trace[i].Header { + // the header reported by the peer differs from the + // reference header in trace but both could be + // verified from common in one step. + // we can create evidence for submission to the secondary + ev := new InternalEvidence; + ev.Evidence.ConflictingBlock := trace[i]; + // CommonHeight is used to indicate the type of attack + // if the CommonHeight != ConflictingBlock.Height this + // is by definition a lunatic attack else it is an + // equivocation attack + ev.Evidence.CommonHeight := common.Height; + ev.Peer := peer + return (ev, common, auxLS, FoundEvidence) + } + else { + // the peer agrees with the trace, we move common forward. + // we could delete auxLS as it will be overwritten in + // the next iteration + common := trace[i] + } + } + } + return (nil, nil, nil, NoEvidence) +} +``` + +- Expected precondition + - root and trace are a verification trace +- Expected postcondition + - finds evidence where trace and peer diverge +- Error condition + - `ErrorTrustExpired`: fails if root expires (outside trusting + period) [[LCV-INV-TP.1]][LCV-INV-TP1-link] + - If `VerifyToTarget` returns error but root is not expired then return + `FaultyPeer` + +--- + +## Correctness arguments + +#### On the existence of evidence + +**Proposition.** In the case of attack, +evidence [[TMBC-LC-ATTACK-EVIDENCE.1]](#TMBC-LC-ATTACK-EVIDENCE1) + exists. +*Proof.* First observe that + +- (A). (NOT E2(i)) implies E1(i+1) + +Now by contradiction assume there is no evidence. Thus + +- for all i, we have NOT E1(i) or NOT E2(i) +- for i = 1 we have E1(1) and thus NOT E2(1) + thus by induction on i, by (A) we have for all i that **E1(i)** +- from attack we have E2(h-1), and as there is no evidence for + i = h - 1 we get **NOT E1(h-1)**. Contradiction. +QED. + +#### Argument for [[LCD-DIST-INV-ATTACK.1]](#LCD-DIST-INV-ATTACK1) + +Under the assumption that root and trace are a verification trace, +when in `CreateEvidenceForPeer` the detector creates +evidence, then the lightclient has seen two different headers (one via +`trace` and one via `VerifyToTarget`) for the same height that can both +be verified in one step. + +#### Argument for [[LCD-DIST-INV-STORE.1]](#LCD-DIST-INV-STORE1) + +We assume that there is at least one correct peer, and there is no +fork. As a result, the correct peer has the correct sequence of +blocks. Since the primary_trace is checked block-by-block also against +each secondary, and at no point evidence was generated that means at +no point there were conflicting blocks. + +#### Argument for [[LCD-DIST-LIVE.1]](#LCD-DIST-LIVE1) + +At the latest when [[LCV-INV-TP.1]][LCV-INV-TP1-link] is violated, +`AttackDetector` terminates. + +#### Argument for [[LCD-DIST-TERM-NORMAL.1]](#LCD-DIST-TERM-NORMAL1) + +As there are finitely many peers, eventually the main loop +terminates. As there is no attack no evidence can be generated. + +#### Argument for [[LCD-DIST-TERM-ATTACK.1]](#LCD-DIST-TERM-ATTACK1) + +Argument similar to [[LCD-DIST-TERM-NORMAL.1]](#LCD-DIST-TERM-NORMAL1) + +#### Argument for [[LCD-DIST-SAFE-SECONDARY.1]](#LCD-DIST-SAFE-SECONDARY1) + +Secondaries are only replaced if they time-out or if they report bogus +blocks. The former is ruled out by the timing assumption, the latter +by correct peers only reporting blocks from the chain. + +#### Argument for [[LCD-DIST-SAFE-BOGUS.1]](#LCD-DIST-SAFE-BOGUS1) + +Once a bogus block is recognized as such the secondary is removed. + +# References + +> links to other specifications/ADRs this document refers to + +[[verification]] The specification of the light client verification. + +[[supervisor]] The specification of the light client supervisor. + +[verification]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md + +[supervisor]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/supervisor/supervisor_001_draft.md + +[block]: https://github.com/tendermint/spec/blob/d46cd7f573a2c6a2399fcab2cde981330aa63f37/spec/core/data_structures.md + +[TMBC-FM-2THIRDS-link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#tmbc-fm-2thirds1 + +[TMBC-SOUND-DISTR-POSS-COMMIT-link]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#tmbc-sound-distr-poss-commit1 + +[LCV-SEQ-SAFE-link]:https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-seq-safe1 + +[TMBC-VAL-CONTAINS-CORR-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#tmbc-val-contains-corr1 + +[fetch]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-func-fetch1 + +[LCV-INV-TP1-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-inv-tp1 + +[LCV-LB-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-data-lightblock1 + +[LCV-LS-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-data-lightstore2 + +[LVC-HD-link]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#tmbc-header-fields2 + +[repl]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/supervisor/supervisor_001_draft.md#lc-func-replace-secondary1 + +[vtt]: +https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/verification/verification_002_draft.md#lcv-func-main2 diff --git a/spec/light-client/detection/discussions.md b/spec/light-client/detection/discussions.md new file mode 100644 index 000000000..82702dd69 --- /dev/null +++ b/spec/light-client/detection/discussions.md @@ -0,0 +1,178 @@ +# Results of Discussions and Decisions + +- Generating a minimal proof of fork (as suggested in [Issue #5083](https://github.com/tendermint/tendermint/issues/5083)) is too costly at the light client + - we do not know all lightblocks from the primary + - therefore there are many scenarios. we might even need to ask + the primary again for additional lightblocks to isolate the + branch. + +> For instance, the light node starts with block at height 1 and the +> primary provides a block of height 10 that the light node can +> verify immediately. In cross-checking, a secondary now provides a +> conflicting header b10 of height 10 that needs another header b5 +> of height 5 to +> verify. Now, in order for the light node to convince the primary: +> +> - The light node cannot just sent b5, as it is not clear whether +> the fork happened before or after 5 +> - The light node cannot just send b10, as the primary would also +> need b5 for verification +> - In order to minimize the evidence, the light node may try to +> figure out where the branch happens, e.g., by asking the primary +> for height 5 (it might be that more queries are required, also +> to the secondary. However, assuming that in this scenario the +> primary is faulty it may not respond. + + As the main goal is to catch misbehavior of the primary, + evidence generation and punishment must not depend on their + cooperation. So the moment we have proof of fork (even if it + contains several light blocks) we should submit right away. + +- decision: "full" proof of fork consists of two traces that originate in the + same lightblock and lead to conflicting headers of the same height. + +- For submission of proof of fork, we may do some optimizations, for + instance, we might just submit a trace of lightblocks that verifies a block + different from the one the full node knows (we do not send the trace + the primary gave us back to the primary) + +- The light client attack is via the primary. Thus we try to + catch if the primary installs a bad light block + - We do not check secondary against secondary + - For each secondary, we check the primary against one secondary + +- Observe that just two blocks for the same height are not +sufficient proof of fork. +One of the blocks may be bogus [TMBC-BOGUS.1] which does +not constitute slashable behavior. +Which leads to the question whether the light node should try to do +fork detection on its initial block (from subjective +initialization). This could be done by doing backwards verification +(with the hashes) until a bifurcation block is found. +While there are scenarios where a +fork could be found, there is also the scenario where a faulty full +node feeds the light node with bogus light blocks and forces the light +node to check hashes until a bogus chain is out of the trusting period. +As a result, the light client +should not try to detect a fork for its initial header. **The initial +header must be trusted as is.** + +# Light Client Sequential Supervisor + +**TODO:** decide where (into which specification) to put the +following: + +We describe the context on which the fork detector is called by giving +a sequential version of the supervisor function. +Roughly, it alternates two phases namely: + +- Light Client Verification. As a result, a header of the required + height has been downloaded from and verified with the primary. +- Light Client Fork Detections. As a result the header has been + cross-checked with the secondaries. In case there is a fork we + submit "proof of fork" and exit. + +#### **[LC-FUNC-SUPERVISOR.1]:** + +```go +func Sequential-Supervisor () (Error) { + loop { + // get the next height + nextHeight := input(); + + // Verify + result := NoResult; + while result != ResultSuccess { + lightStore,result := VerifyToTarget(primary, lightStore, nextHeight); + if result == ResultFailure { + // pick new primary (promote a secondary to primary) + /// and delete all lightblocks above + // LastTrusted (they have not been cross-checked) + Replace_Primary(); + } + } + + // Cross-check + PoFs := Forkdetector(lightStore, PoFs); + if PoFs.Empty { + // no fork detected with secondaries, we trust the new + // lightblock + LightStore.Update(testedLB, StateTrusted); + } + else { + // there is a fork, we submit the proofs and exit + for i, p range PoFs { + SubmitProofOfFork(p); + } + return(ErrorFork); + } + } +} +``` + +**TODO:** finish conditions + +- Implementation remark +- Expected precondition + - *lightStore* initialized with trusted header + - *PoFs* empty +- Expected postcondition + - runs forever, or + - is terminated by user and satisfies LightStore invariant, or **TODO** + - has submitted proof of fork upon detecting a fork +- Error condition + - none + +---- + +# Semantics of the LightStore + +Currently, a lightblock in the lightstore can be in one of the +following states: + +- StateUnverified +- StateVerified +- StateFailed +- StateTrusted + +The intuition is that `StateVerified` captures that the lightblock has +been verified with the primary, and `StateTrusted` is the state after +successful cross-checking with the secondaries. + +Assuming there is **always one correct node among primary and +secondaries**, and there is no fork on the blockchain, lightblocks that +are in `StateTrusted` can be used by the user with the guarantee of +"finality". If a block in `StateVerified` is used, it might be that +detection later finds a fork, and a roll-back might be needed. + +**Remark:** The assumption of one correct node, does not render +verification useless. It is true that if the primary and the +secondaries return the same block we may trust it. However, if there +is a node that provides a different block, the light node still needs +verification to understand whether there is a fork, or whether the +different block is just bogus (without any support of some previous +validator set). + +**Remark:** A light node may choose the full nodes it communicates +with (the light node and the full node might even belong to the same +stakeholder) so the assumption might be justified in some cases. + +In the future, we will do the following changes + +- we assume that only from time to time, the light node is + connected to a correct full node +- this means for some limited time, the light node might have no + means to defend against light client attacks +- as a result we do not have finality +- once the light node reconnects with a correct full node, it + should detect the light client attack and submit evidence. + +Under these assumptions, `StateTrusted` loses its meaning. As a +result, it should be removed from the API. We suggest that we replace +it with a flag "trusted" that can be used + +- internally for efficiency reasons (to maintain + [LCD-INV-TRUSTED-AGREED.1] until a fork is detected) +- by light client based on the "one correct full node" assumption + +---- diff --git a/spec/light-client/detection/draft-functions.md b/spec/light-client/detection/draft-functions.md new file mode 100644 index 000000000..6512d733f --- /dev/null +++ b/spec/light-client/detection/draft-functions.md @@ -0,0 +1,289 @@ +# Draft of Functions for Fork detection and Proof of Fork Submisstion + +This document collects drafts of function for generating and +submitting proof of fork in the IBC context + +- [IBC](#on---chain-ibc-component) + +- [Relayer](#relayer) + +## On-chain IBC Component + +> The following is a suggestions to change the function defined in ICS 007 + +#### [TAG-IBC-MISBEHAVIOR.1] + +```go +func checkMisbehaviorAndUpdateState(cs: ClientState, PoF: LightNodeProofOfFork) +``` + +**TODO:** finish conditions + +- Implementation remark +- Expected precondition + - PoF.TrustedBlock.Header is equal to lightBlock on store with + same height + - both traces end with header of same height + - headers are different + - both traces are supported by PoF.TrustedBlock (`supports` + defined in [TMBC-FUNC]), that is, for `t = currentTimestamp()` (see + ICS 024) + - supports(PoF.TrustedBlock, PoF.PrimaryTrace[1], t) + - supports(PoF.PrimaryTrace[i], PoF.PrimaryTrace[i+1], t) for + *0 < i < length(PoF.PrimaryTrace)* + - supports(PoF.TrustedBlock, PoF.SecondaryTrace[1], t) + - supports(PoF.SecondaryTrace[i], PoF.SecondaryTrace[i+1], t) for + *0 < i < length(PoF.SecondaryTrace)* +- Expected postcondition + - set cs.FrozenHeight to min(cs.FrozenHeight, PoF.TrustedBlock.Header.Height) +- Error condition + - none + +---- + +> The following is a suggestions to add functionality to ICS 002 and 007. +> I suppose the above is the most efficient way to get the required +> information. Another option is to subscribe to "header install" +> events via CosmosSDK + +#### [TAG-IBC-HEIGHTS.1] + +```go +func QueryHeightsRange(id, from, to) ([]Height) +``` + +- Expected postcondition + - returns all heights *h*, with *from <= h <= to* for which the + IBC component has a consensus state. + +---- + +> This function can be used if the relayer has no information about +> the IBC component. This allows late-joining relayers to also +> participate in fork dection and the generation in proof of +> fork. Alternatively, we may also postulate that relayers are not +> responsible to detect forks for heights before they started (and +> subscribed to the transactions reporting fresh headers being +> installed at the IBC component). + +## Relayer + +### Auxiliary Functions to be implemented in the Light Client + +#### [LCV-LS-FUNC-GET-PREV.1] + +```go +func (ls LightStore) GetPreviousVerified(height Height) (LightBlock, bool) +``` + +- Expected postcondition + - returns a verified LightBlock, whose height is maximal among all + verified lightblocks with height smaller than `height` + +---- + +### Relayer Submitting Proof of Fork to the IBC Component + +There are two ways the relayer can detect a fork + +- by the fork detector of one of its lightclients +- be checking the consensus state of the IBC component + +The following function ignores how the proof of fork was generated. +It takes a proof of fork as input and computes a proof of fork that + will be accepted by the IBC component. +The problem addressed here is that both, the relayer's light client + and the IBC component have incomplete light stores, that might + not have all light blocks in common. +Hence the relayer has to figure out what the IBC component knows + (intuitively, a meeting point between the two lightstores + computed in `commonRoot`) and compute a proof of fork + (`extendPoF`) that the IBC component will accept based on its + knowledge. + +The auxiliary functions `commonRoot` and `extendPoF` are +defined below. + +#### [TAG-SUBMIT-POF-IBC.1] + +```go +func SubmitIBCProofOfFork( + lightStore LightStore, + PoF: LightNodeProofOfFork, + ibc IBCComponent) (Error) { + if ibc.queryChainConsensusState(PoF.TrustedBlock.Height) = PoF.TrustedBlock { + // IBC component has root of PoF on store, we can just submit + ibc.submitMisbehaviorToClient(ibc.id,PoF) + return Success + // note sure about the id parameter + } + else { + // the ibc component does not have the TrustedBlock and might + // even be on yet a different branch. We have to compute a PoF + // that the ibc component can verifiy based on its current + // knowledge + + ibcLightBlock, lblock, _, result := commonRoot(lightStore, ibc, PoF.TrustedBlock) + + if result = Success { + newPoF = extendPoF(ibcLightBlock, lblock, lightStore, PoF) + ibc.submitMisbehaviorToClient(ibc.id, newPoF) + return Success + } + else{ + return CouldNotGeneratePoF + } + } +} +``` + +**TODO:** finish conditions + +- Implementation remark +- Expected precondition +- Expected postcondition +- Error condition + - none + +---- + +### Auxiliary Functions at the Relayer + +> If the relayer detects a fork, it has to compute a proof of fork that +> will convince the IBC component. That is it has to compare the +> relayer's local lightstore against the lightstore of the IBC +> component, and find common ancestor lightblocks. + +#### [TAG-COMMON-ROOT.1] + +```go +func commonRoot(lightStore LightStore, ibc IBCComponent, lblock +LightBlock) (LightBlock, LightBlock, LightStore, Result) { + + auxLS.Init + + // first we ask for the heights the ibc component is aware of + ibcHeights = ibc.QueryHeightsRange( + ibc.id, + lightStore.LowestVerified().Height, + lblock.Height - 1); + // this function does not exist yet. Alternatively, we may + // request all transactions that installed headers via CosmosSDK + + + for { + h, result = max(ibcHeights) + if result = Empty { + return (_, _, _, NoRoot) + } + ibcLightBlock = ibc.queryChainConsensusState(h) + auxLS.Update(ibcLightBlock, StateVerified); + connector, result := Connector(lightStore, ibcLightBlock, lblock.Header.Height) + if result = success { + return (ibcLightBlock, connector, auxLS, Success) + } + else{ + ibcHeights.remove(h) + } + } +} +``` + +- Expected postcondition + - returns + - a lightBlock b1 from the IBC component, and + - a lightBlock b2 + from the local lightStore with height less than + lblock.Header.Hight, s.t. b1 supports b2, and + - a lightstore with the blocks downloaded from + the ibc component + +---- + +#### [TAG-LS-FUNC-CONNECT.1] + +```go +func Connector (lightStore LightStore, lb LightBlock, h Height) (LightBlock, bool) +``` + +- Expected postcondition + - returns a verified LightBlock from lightStore with height less + than *h* that can be + verified by lb in one step. + +**TODO:** for the above to work we need an invariant that all verified +lightblocks form a chain of trust. Otherwise, we need a lightblock +that has a chain of trust to height. + +> Once the common root is found, a proof of fork that will be accepted +> by the IBC component needs to be generated. This is done in the +> following function. + +#### [TAG-EXTEND-POF.1] + +```go +func extendPoF (root LightBlock, + connector LightBlock, + lightStore LightStore, + Pof LightNodeProofofFork) (LightNodeProofofFork} +``` + +- Implementation remark + - PoF is not sufficient to convince an IBC component, so we extend + the proof of fork farther in the past +- Expected postcondition + - returns a newPOF: + - newPoF.TrustedBlock = root + - let prefix = + connector + + lightStore.Subtrace(connector.Header.Height, PoF.TrustedBlock.Header.Height-1) + + PoF.TrustedBlock + - newPoF.PrimaryTrace = prefix + PoF.PrimaryTrace + - newPoF.SecondaryTrace = prefix + PoF.SecondaryTrace + +### Detection a fork at the IBC component + +The following functions is assumed to be called regularly to check +that latest consensus state of the IBC component. Alternatively, this +logic can be executed whenever the relayer is informed (via an event) +that a new header has been installed. + +#### [TAG-HANDLER-DETECT-FORK.1] + +```go +func DetectIBCFork(ibc IBCComponent, lightStore LightStore) (LightNodeProofOfFork, Error) { + cs = ibc.queryClientState(ibc); + lb, found := lightStore.Get(cs.Header.Height) + if !found { + **TODO:** need verify to target + lb, result = LightClient.Main(primary, lightStore, cs.Header.Height) + // [LCV-FUNC-IBCMAIN.1] + **TODO** decide what to do following the outcome of Issue #499 + + // I guess here we have to get into the light client + + } + if cs != lb { + // IBC component disagrees with my primary. + // I fetch the + ibcLightBlock, lblock, ibcStore, result := commonRoot(lightStore, ibc, lb) + pof = new LightNodeProofOfFork; + pof.TrustedBlock := ibcLightBlock + pof.PrimaryTrace := ibcStore + cs + pof.SecondaryTrace := lightStore.Subtrace(lblock.Header.Height, + lb.Header.Height); + return(pof, Fork) + } + return(nil , NoFork) +} +``` + +**TODO:** finish conditions + +- Implementation remark + - we ask the handler for the lastest check. Cross-check with the + chain. In case they deviate we generate PoF. + - we assume IBC component is correct. It has verified the + consensus state +- Expected precondition +- Expected postcondition diff --git a/spec/light-client/detection/req-ibc-detection.md b/spec/light-client/detection/req-ibc-detection.md new file mode 100644 index 000000000..9d82a279b --- /dev/null +++ b/spec/light-client/detection/req-ibc-detection.md @@ -0,0 +1,345 @@ +# Requirements for Fork Detection in the IBC Context + +## What you need to know about IBC + +In the following, I distilled what I considered relevant from + + + +### Components and their interface + +#### Tendermint Blockchains + +> I assume you know what that is. + +#### An IBC/Tendermint correspondence + +| IBC Term | Tendermint-RS Spec Term | Comment | +|----------|-------------------------| --------| +| `CommitmentRoot` | AppState | app hash | +| `ConsensusState` | Lightblock | not all fields are there. NextValidator is definitly needed | +| `ClientState` | latest light block + configuration parameters (e.g., trusting period + `frozenHeight` | NextValidators missing; what is `proofSpecs`?| +| `frozenHeight` | height of fork | set when a fork is detected | +| "would-have-been-fooled" | light node fork detection | light node may submit proof of fork to IBC component to halt it | +| `Height` | (no epochs) | (epoch,height) pair in lexicographical order (`compare`) | +| `Header` | ~signed header | validatorSet explicit (no hash); nextValidators missing | +| `Evidence` | t.b.d. | definition unclear "which the light client would have considered valid". Data structure will need to change | +| `verify` | `ValidAndVerified` | signature does not match perfectly (ClientState vs. LightBlock) + in `checkMisbehaviorAndUpdateState` it is unclear whether it uses traces or goes to h1 and h2 in one step | + +#### Some IBC links + +- [QueryConsensusState](https://github.com/cosmos/cosmos-sdk/blob/2651427ab4c6ea9f81d26afa0211757fc76cf747/x/ibc/02-client/client/utils/utils.go#L68) + +#### Required Changes in ICS 007 + +- `assert(height > 0)` in definition of `initialize` doesn't match + definition of `Height` as *(epoch,height)* pair. + +- `initialize` needs to be updated to new data structures + +- `clientState.frozenHeight` semantics seem not totally consistent in + document. E.g., `min` needs to be defined over optional value in + `checkMisbehaviorAndUpdateState`. Also, if you are frozen, why do + you accept more evidence. + +- `checkValidityAndUpdateState` + - `verify`: it needs to be clarified that checkValidityAndUpdateState + does not perform "bisection" (as currently hinted in the text) but + performs a single step of "skipping verification", called, + `ValidAndVerified` + - `assert (header.height > clientState.latestHeight)`: no old + headers can be installed. This might be OK, but we need to check + interplay with misbehavior + - clienstState needs to be updated according to complete data + structure + +- `checkMisbehaviorAndUpdateState`: as evidence will contain a trace + (or two), the assertion that uses verify will need to change. + +- ICS 002 states w.r.t. `queryChainConsensusState` that "Note that + retrieval of past consensus states by height (as opposed to just the + current consensus state) is convenient but not required." For + Tendermint fork detection, this seems to be a necessity. + +- `Header` should become a lightblock + +- `Evidence` should become `LightNodeProofOfFork` [LCV-DATA-POF.1] + +- `upgradeClientState` what is the semantics (in particular what is + `height` doing?). + +- `checkMisbehaviorAndUpdateState(cs: ClientState, PoF: + LightNodeProofOfFork)` needs to be adapted + +#### Handler + +A blockchain runs a **handler** that passively collects information about + other blockchains. It can be thought of a state machine that takes + input events. + +- the state includes a lightstore (I guess called `ConsensusState` + in IBC) + +- The following function is used to pass a header to a handler + +```go +type checkValidityAndUpdateState = (Header) => Void +``` + + For Tendermint, it will perform + `ValidandVerified`, that is, it does the trusting period check and the + +1/3 check (+2/3 for sequential headers). + If it verifies a header, it adds it to its lightstore, + if it does not pass verification it drops it. + Right now it only accepts a header more recent then the latest + header, + and drops older + ones or ones that could not be verified. + +> The above paragraph captures what I believe what is the current + logic of `checkValidityAndUpdateState`. It may be subject to + change. E.g., maintain a lightstore with state (unverified, verified) + +- The following function is used to pass "evidence" (this we + will need to make precise eventually) to a handler + +```go +type checkMisbehaviorAndUpdateState = (bytes) => Void +``` + + We have to design this, and the data that the handler can use to + check that there was some misbehavior (fork) in order react on + it, e.g., flagging a situation and + stop the protocol. + +- The following function is used to query the light store (`ConsensusState`) + +```go +type queryChainConsensusState = (height: uint64) => ConsensusState +``` + +#### Relayer + +- The active components are called **relayer**. + +- a relayer contains light clients to two (or more?) blockchains + +- the relayer send headers and data to the handler to invoke + `checkValidityAndUpdateState` and + `checkMisbehaviorAndUpdateState`. It may also query + `queryChainConsensusState`. + +- multiple relayers may talk to one handler. Some relayers might be + faulty. We assume existence of at least single correct relayer. + +## Informal Problem Statement: Fork detection in IBC + +### Relayer requirement: Evidence for Handler + +- The relayer should provide the handler with + "evidence" that there was a fork. + +- The relayer can read the handler's consensus state. Thus the relayer can + feed the handler precisely the information the handler needs to detect a + fork. + What is this + information needs to be specified. + +- The information depends on the verification the handler does. It + might be necessary to provide a bisection proof (list of + lightblocks) so that the handler can verify based on its local + lightstore a header *h* that is conflicting with a header *h'* in the + local lightstore, that is, *h != h'* and *h.Height = h'.Height* + +### Relayer requirement: Fork detection + +Let's assume there is a fork at chain A. There are two ways the +relayer can figure that out: + +1. as the relayer contains a light client for A, it also includes a fork + detector that can detect a fork. + +2. the relayer may also detect a fork by observing that the + handler for chain A (on chain B) + is on a different branch than the relayer + +- in both detection scenarios, the relayer should submit evidence to + full nodes of chain A where there is a fork. As we assume a fullnode + has a complete list of blocks, it is sufficient to send "Bucky's + evidence" (), + that is, + - two lightblocks from different branches + + - a lightblock (perhaps just a height) from which both blocks + can be verified. + +- in the scenario 2., the relayer must feed the A-handler (on chain B) + a proof of a fork on A so that chain B can react accordingly + +### Handler requirement + +- there are potentially many relayers, some correct some faulty + +- a handler cannot trust the information provided by the relayer, + but must verify + (Доверя́й, но проверя́й) + +- in case of a fork, we accept that the handler temporarily stores + headers (tagged as verified). + +- eventually, a handler should be informed + (`checkMisbehaviorAndUpdateState`) + by some relayer that it has + verified a header from a fork. Then the handler should do what is + required by IBC in this case (stop?) + +### Challenges in the handler requirement + +- handlers and relayers work on different lightstores. In principle + the lightstore need not intersect in any heights a priori + +- if a relayer sees a header *h* it doesn't know at a handler (`queryChainConsensusState`), the + relayer needs to + verify that header. If it cannot do it locally based on downloaded + and verified (trusted?) light blocks, it might need to use + `VerifyToTarget` (bisection). To call `VerifyToTarget` we might keep + *h* in the lightstore. If verification fails, we need to download the + "alternative" header of height *h.Height* to generate evidence for + the handler. + +- we have to specify what precisely `queryChainConsensusState` + returns. It cannot be the complete lightstore. Is the last header enough? + +- we would like to assume that every now and then (smaller than the + trusting period) a correct relayer checks whether the handler is on a + different branch than the relayer. + And we would like that this is enough to achieve + the Handler requirement. + + - here the correctness argument would be easy if a correct relayer is + based on a light client with a *trusted* state, that is, a light + client who never changes its opinion about trusted. Then if such a + correct relayer checks-in with a handler, it will detect a fork, and + act in time. + + - if the light client does not provide this interface, in the case of + a fork, we need some assumption about a correct relayer being on a + different branch than the handler, and we need such a relayer to + check-in not too late. Also + what happens if the relayer's light client is forced to roll-back + its lightstore? + Does it have to re-check all handlers? + +## On the interconnectedness of things + +In the broader discussion of so-called "fork accountability" there are +several subproblems + +- Fork detection + +- Evidence creation and submission + +- Isolating misbehaving nodes (and report them for punishment over abci) + +### Fork detection + +The preliminary specification ./detection.md formalizes the notion of +a fork. Roughly, a fork exists if there are two conflicting headers +for the same height, where both are supported by bonded full nodes +(that have been validators in the near past, that is, within the +trusting period). We distinguish between *fork on the chain* where two +conflicting blocks are signed by +2/3 of the validators of that +height, and a *light client fork* where one of the conflicting headers +is not signed by +2/3 of the current height, but by +1/3 of the +validators of some smaller height. + +In principle everyone can detect a fork + +- ./detection talks about the Tendermint light client with a focus on + light nodes. A relayer runs such light clients and may detect + forks in this way + +- in IBC, a relayer can see that a handler is on a conflicting branch + - the relayer should feed the handler the necessary information so + that it can halt + - the relayer should report the fork to a full node + +### Evidence creation and submission + +- the information sent from the relayer to the handler could be called + evidence, but this is perhaps a bad idea because the information sent to a + full node can also be called evidence. But this evidence might still + not be enough as the full node might need to run the "fork + accountability" protocol to generate evidence in the form of + consensus messages. So perhaps we should + introduce different terms for: + + - proof of fork for the handler (basically consisting of lightblocks) + - proof of fork for a full node (basically consisting of (fewer) lightblocks) + - proof of misbehavior (consensus messages) + +### Isolating misbehaving nodes + +- this is the job of a full node. + +- might be subjective in the future: the protocol depends on what the + full node believes is the "correct" chain. Right now we postulate + that every full node is on the correct chain, that is, there is no + fork on the chain. + +- The full node figures out which nodes are + - lunatic + - double signing + - amnesic; **using the challenge response protocol** + +- We do not punish "phantom" validators + - currently we understand a phantom validator as a node that + - signs a block for a height in which it is not in the + validator set + - the node is not part of the +1/3 of previous validators that + are used to support the header. Whether we call a validator + phantom might be subjective and depend on the header we + check against. Their formalization actually seems not so + clear. + - they can only do something if there are +1/3 faulty validators + that are either lunatic, double signing, or amnesic. + - abci requires that we only report bonded validators. So if a + node is a "phantom", we would need the check whether the node is + bonded, which currently is expensive, as it requires checking + blocks from the last three weeks. + - in the future, with state sync, a correct node might be + convinced by faulty nodes that it is in the validator set. Then + it might appear to be "phantom" although it behaves correctly + +## Next steps + +> The following points are subject to my limited knowledge of the +> state of the work on IBC. Some/most of it might already exist and we +> will just need to bring everything together. + +- "proof of fork for a full node" defines a clean interface between + fork detection and misbehavior isolation. So it should be produced + by protocols (light client, the relayer). So we should fix that + first. + +- Given the problems of not having a light client architecture spec, + for the relayer we should start with this. E.g. + + - the relayer runs light clients for two chains + - the relayer regularly queries consensus state of a handler + - the relayer needs to check the consensus state + - this involves local checks + - this involves calling the light client + - the relayer uses the light client to do IBC business (channels, + packets, connections, etc.) + - the relayer submits proof of fork to handlers and full nodes + +> the list is definitely not complete. I think part of this +> (perhaps all) is +> covered by what Anca presented recently. + +We will need to define what we expect from these components + +- for the parts where the relayer talks to the handler, we need to fix + the interface, and what the handler does + +- we write specs for these components. diff --git a/spec/light-client/experiments.png b/spec/light-client/experiments.png new file mode 100644 index 000000000..94166ffa3 Binary files /dev/null and b/spec/light-client/experiments.png differ diff --git a/spec/light-client/supervisor/supervisor_001_draft.md b/spec/light-client/supervisor/supervisor_001_draft.md new file mode 100644 index 000000000..90add66fc --- /dev/null +++ b/spec/light-client/supervisor/supervisor_001_draft.md @@ -0,0 +1,637 @@ +# Draft of Light Client Supervisor for discussion + +## TODOs + +This specification in done in parallel with updates on the +verification specification. So some hyperlinks have to be placed to +the correct files eventually. + +# Light Client Sequential Supervisor + +The light client implements a read operation of a +[header](TMBC-HEADER-link) from the [blockchain](TMBC-SEQ-link), by +communicating with full nodes, a so-called primary and several +so-called witnesses. As some full nodes may be faulty, this +functionality must be implemented in a fault-tolerant way. + +In the Tendermint blockchain, the validator set may change with every +new block. The staking and unbonding mechanism induces a [security +model](TMBC-FM-2THIRDS-link): starting at time *Time* of the +[header](TMBC-HEADER-link), +more than two-thirds of the next validators of a new block are correct +for the duration of *TrustedPeriod*. + +[Light Client Verification](https://informal.systems) implements the fault-tolerant read +operation designed for this security model. That is, it is safe if the +model assumptions are satisfied and makes progress if it communicates +to a correct primary. + +However, if the [security model](TMBC-FM-2THIRDS-link) is violated, +faulty peers (that have been validators at some point in the past) may +launch attacks on the Tendermint network, and on the light +client. These attacks as well as an axiomatization of blocks in +general are defined in [a document that contains the definitions that +are currently in detection.md](https://informal.systems). + +If there is a light client attack (but no +successful attack on the network), the safety of the verification step +may be violated (as we operate outside its basic assumption). +The light client also +contains a defense mechanism against light clients attacks, called detection. + +[Light Client Detection](https://informal.systems) implements a cross check of the result +of the verification step. If there is a light client attack, and the +light client is connected to a correct peer, the light client as a +whole is safe, that is, it will not operate on invalid +blocks. However, in this case it cannot successfully read, as +inconsistent blocks are in the system. However, in this case the +detection performs a distributed computation that results in so-called +evidence. Evidence can be used to prove +to a correct full node that there has been a +light client attack. + +[Light Client Evidence Accountability](https://informal.systems) is a protocol run on a +full node to check whether submitted evidence indeed proves the +existence of a light client attack. Further, from the evidence and its +own knowledge about the blockchain, the full node computes a set of +bonded full nodes (that at some point had more than one third of the +voting power) that participated in the attack that will be reported +via ABCI to the application. + +In this document we specify + +- Initialization of the Light Client +- The interaction of [verification](https://informal.systems) and [detection](https://informal.systems) + +The details of these two protocols are captured in their own +documents, as is the [accountability](https://informal.systems) protocol. + +> Another related line is IBC attack detection and submission at the +> relayer, as well as attack verification at the IBC handler. This +> will call for yet another spec. + +# Status + +This document is work in progress. In order to develop the +specification step-by-step, +it assumes certain details of [verification](https://informal.systems) and +[detection](https://informal.systems) that are not specified in the respective current +versions yet. This inconsistencies will be addresses over several +upcoming PRs. + +# Part I - Tendermint Blockchain + +See [verification spec](addLinksWhenDone) + +# Part II - Sequential Problem Definition + +#### **[LC-SEQ-INIT-LIVE.1]** + +Upon initialization, the light client gets as input a header of the +blockchain, or the genesis file of the blockchain, and eventually +stores a header of the blockchain. + +#### **[LC-SEQ-LIVE.1]** + +The light client gets a sequence of heights as inputs. For each input +height *targetHeight*, it eventually stores the header of height +*targetHeight*. + +#### **[LC-SEQ-SAFE.1]** + +The light client never stores a header which is not in the blockchain. + +# Part III - Light Client as Distributed System + +## Computational Model + +The light client communicates with remote processes only via the +[verification](TODO) and the [detection](TODO) protocols. The +respective assumptions are given there. + +## Distributed Problem Statement + +### Two Kinds of Liveness + +In case of light client attacks, the sequential problem statement +cannot always be satisfied. The lightclient cannot decide which block +is from the chain and which is not. As a result, the light client just +creates evidence, submits it, and terminates. +For the liveness property, we thus add the +possibility that instead of adding a lightblock, we also might terminate +in case there is an attack. + +#### **[LC-DIST-TERM.1]** + +The light client either runs forever or it *terminates on attack*. + +### Design choices + +#### [LC-DIST-STORE.1] + +The light client has a local data structure called LightStore +that contains light blocks (that contain a header). + +> The light store exposes functions to query and update it. They are +> specified [here](TODO:onceVerificationIsMerged). + +**TODO:** reference light store invariant [LCV-INV-LS-ROOT.2] once +verification is merged + +#### **[LC-DIST-SAFE.1]** + +It is always the case that every header in *LightStore* was +generated by an instance of Tendermint consensus. + +#### **[LC-DIST-LIVE.1]** + +Whenever the light client gets a new height *h* as input, + +- and there is +no light client attack up to height *h*, then the lightclient +eventually puts the lightblock of height *h* in the lightstore and +wait for another input. +- otherwise, that is, if there +is a light client attack on height *h*, then the light client +must perform one of the following: + - it terminates on attack. + - it eventually puts the lightblock of height *h* in the lightstore and +wait for another input. + +> Observe that the "existence of a lightclient attack" just means that some node has generated a conflicting block. It does not necessarily mean that a (faulty) peer sends such a block to "our" lightclient. Thus, even if there is an attack somewhere in the system, our lightclient might still continue to operate normally. + +### Solving the sequential specification + +[LC-DIST-SAFE.1] is guaranteed by the detector; in particular it +follows from +[[LCD-DIST-INV-STORE.1]](TODO) +[[LCD-DIST-LIVE.1]](TODO) + +# Part IV - Light Client Supervisor Protocol + +We provide a specification for a sequential Light Client Supervisor. +The local code for verification is presented by a sequential function +`Sequential-Supervisor` to highlight the control flow of this +functionality. Each lightblock is first verified with a primary, and then +cross-checked with secondaries, and if all goes well, the lightblock +is +added (with the attribute "trusted") to the +lightstore. Intermiate lightblocks that were used to verify the target +block but were not cross-checked are stored as "verified" + +> We note that if a different concurrency model is considered +> for an implementation, the semantics of the lightstore might change: +> In a concurrent implementation, we might do verification for some +> height *h*, add the +> lightblock to the lightstore, and start concurrent threads that +> +> - do verification for the next height *h' != h* +> - do cross-checking for height *h*. If we find an attack, we remove +> *h* from the lightstore. +> - the user might already start to use *h* +> +> Thus, this concurrency model changes the semantics of the +> lightstore (not all lightblocks that are read by the user are +> trusted; they may be removed if +> we find a problem). Whether this is desirable, and whether the gain in +> performance is worth it, we keep for future versions/discussion of +> lightclient protocols. + +## Definitions + +### Peers + +#### **[LC-DATA-PEERS.1]:** + +A fixed set of full nodes is provided in the configuration upon +initialization. Initially this set is partitioned into + +- one full node that is the *primary* (singleton set), +- a set *Secondaries* (of fixed size, e.g., 3), +- a set *FullNodes*; it excludes *primary* and *Secondaries* nodes. +- A set *FaultyNodes* of nodes that the light client suspects of + being faulty; it is initially empty + +#### **[LC-INV-NODES.1]:** + +The detector shall maintain the following invariants: + +- *FullNodes \intersect Secondaries = {}* +- *FullNodes \intersect FaultyNodes = {}* +- *Secondaries \intersect FaultyNodes = {}* + +and the following transition invariant + +- *FullNodes' \union Secondaries' \union FaultyNodes' = FullNodes + \union Secondaries \union FaultyNodes* + +#### **[LC-FUNC-REPLACE-PRIMARY.1]:** + +```go +Replace_Primary(root-of-trust LightBlock) +``` + +- Implementation remark + - the primary is replaced by a secondary + - to maintain a constant size of secondaries, need to + - pick a new secondary *nsec* while ensuring [LC-INV-ROOT-AGREED.1] + - that is, we need to ensure that root-of-trust = FetchLightBlock(nsec, root-of-trust.Header.Height) +- Expected precondition + - *FullNodes* is nonempty +- Expected postcondition + - *primary* is moved to *FaultyNodes* + - a secondary *s* is moved from *Secondaries* to primary +- Error condition + - if precondition is violated + +#### **[LC-FUNC-REPLACE-SECONDARY.1]:** + +```go +Replace_Secondary(addr Address, root-of-trust LightBlock) +``` + +- Implementation remark + - maintain [LC-INV-ROOT-AGREED.1], that is, + ensure root-of-trust = FetchLightBlock(nsec, root-of-trust.Header.Height) +- Expected precondition + - *FullNodes* is nonempty +- Expected postcondition + - addr is moved from *Secondaries* to *FaultyNodes* + - an address *nsec* is moved from *FullNodes* to *Secondaries* +- Error condition + - if precondition is violated + +### Data Types + +The core data structure of the protocol is the LightBlock. + +#### **[LC-DATA-LIGHTBLOCK.1]** + +```go +type LightBlock struct { + Header Header + Commit Commit + Validators ValidatorSet + NextValidators ValidatorSet + Provider PeerID +} +``` + +#### **[LC-DATA-LIGHTSTORE.1]** + +LightBlocks are stored in a structure which stores all LightBlock from +initialization or received from peers. + +```go +type LightStore struct { + ... +} + +``` + +We use the functions that the LightStore exposes, which +are defined in the [verification specification](TODO). + +### Inputs + +The lightclient is initialized with LCInitData + +#### **[LC-DATA-INIT.1]** + +```go +type LCInitData struct { + lightBlock LightBlock + genesisDoc GenesisDoc +} +``` + +where only one of the components must be provided. `GenesisDoc` is +defined in the [Tendermint +Types](https://github.com/tendermint/tendermint/blob/master/types/genesis.go). + +#### **[LC-DATA-GENESIS.1]** + +```go +type GenesisDoc struct { + GenesisTime time.Time `json:"genesis_time"` + ChainID string `json:"chain_id"` + InitialHeight int64 `json:"initial_height"` + ConsensusParams *tmproto.ConsensusParams `json:"consensus_params,omitempty"` + Validators []GenesisValidator `json:"validators,omitempty"` + AppHash tmbytes.HexBytes `json:"app_hash"` + AppState json.RawMessage `json:"app_state,omitempty"` +} +``` + +We use the following function +`makeblock` so that we create a lightblock from the genesis +file in order to do verification based on the data from the genesis +file using the same verification function we use in normal operation. + +#### **[LC-FUNC-MAKEBLOCK.1]** + +```go +func makeblock (genesisDoc GenesisDoc) (lightBlock LightBlock)) +``` + +- Implementation remark + - none +- Expected precondition + - none +- Expected postcondition + - lightBlock.Header.Height = genesisDoc.InitialHeight + - lightBlock.Header.Time = genesisDoc.GenesisTime + - lightBlock.Header.LastBlockID = nil + - lightBlock.Header.LastCommit = nil + - lightBlock.Header.Validators = genesisDoc.Validators + - lightBlock.Header.NextValidators = genesisDoc.Validators + - lightBlock.Header.Data = nil + - lightBlock.Header.AppState = genesisDoc.AppState + - lightBlock.Header.LastResult = nil + - lightBlock.Commit = nil + - lightBlock.Validators = genesisDoc.Validators + - lightBlock.NextValidators = genesisDoc.Validators + - lightBlock.Provider = nil +- Error condition + - none + +---- + +### Configuration Parameters + +#### **[LC-INV-ROOT-AGREED.1]** + +In the Sequential-Supervisor, it is always the case that the primary +and all secondaries agree on lightStore.Latest(). + +### Assumptions + +We have to assume that the initialization data (the lightblock or the +genesis file) are consistent with the blockchain. This is subjective +initialization and it cannot be checked locally. + +### Invariants + +#### **[LC-INV-PEERLIST.1]:** + +The peer list contains a primary and a secondary. + +> If the invariant is violated, the light client does not have enough +> peers to download headers from. As a result, the light client +> needs to terminate in case this invariant is violated. + +## Supervisor + +### Outline + +The supervisor implements the functionality of the lightclient. It is +initialized with a genesis file or with a lightblock the user +trusts. This initialization is subjective, that is, the security of +the lightclient is based on the validity of the input. If the genesis +file or the lightblock deviate from the actual ones on the blockchain, +the lightclient provides no guarantees. + +After initialization, the supervisor awaits an input, that is, the +height of the next lightblock that should be obtained. Then it +downloads, verifies, and cross-checks a lightblock, and if all tests +go through, the light block (and possibly other lightblocks) are added +to the lightstore, which is returned in an output event to the user. + +The following main loop does the interaction with the user (input, +output) and calls the following two functions: + +- `InitLightClient`: it initializes the lightstore either with the + provided lightblock or with the lightblock that corresponds to the + first block generated by the blockchain (by the validators defined + by the genesis file) +- `VerifyAndDetect`: takes as input a lightstore and a height and + returns the updated lightstore. + +#### **[LC-FUNC-SUPERVISOR.1]:** + +```go +func Sequential-Supervisor (initdata LCInitData) (Error) { + + lightStore,result := InitLightClient(initData); + if result != OK { + return result; + } + + loop { + // get the next height + nextHeight := input(); + + lightStore,result := VerifyAndDetect(lightStore, nextHeight); + + if result == OK { + output(LightStore.Get(targetHeight)); + // we only output a trusted lightblock + } + else { + return result + } + // QUESTION: is it OK to generate output event in normal case, + // and terminate with failure in the (light client) attack case? + } +} +``` + +- Implementation remark + - infinite loop unless a light client attack is detected + - In typical implementations (e.g., the one in Rust), + there are mutliple input actions: + `VerifytoLatest`, `LatestTrusted`, and `GetStatus`. The + information can be easily obtained from the lightstore, so that + we do not treat these requests explicitly here but just consider + the request for a block of a given height which requires more + involved computation and communication. +- Expected precondition + - *LCInitData* contains a genesis file or a lightblock. +- Expected postcondition + - if a light client attack is detected: it stops and submits + evidence (in `InitLightClient` or `VerifyAndDetect`) + - otherwise: non. It runs forever. +- Invariant: *lightStore* contains trusted lightblocks only. +- Error condition + - if `InitLightClient` or `VerifyAndDetect` fails (if a attack is + detected, or if [LCV-INV-TP.1] is violated) + +---- + +### Details of the Functions + +#### Initialization + +The light client is based on subjective initialization. It has to +trust the initial data given to it by the user. It cannot do any +detection of attack. So either upon initialization we obtain a +lightblock and just initialize the lightstore with it. Or in case of a +genesis file, we download, verify, and cross-check the first block, to +initialize the lightstore with this first block. The reason is that +we want to maintain [LCV-INV-TP.1] from the beginning. + +> If the lightclient is initialized with a lightblock, one might think +> it may increase trust, when one cross-checks the initial light +> block. However, if a peer provides a conflicting +> lightblock, the question is to distinguish the case of a +> [bogus](https://informal.systems) block (upon which operation should proceed) from a +> [light client attack](https://informal.systems) (upon which operation should stop). In +> case of a bogus block, the lightclient might be forced to do +> backwards verification until the blocks are out of the trusting +> period, to make sure no previous validator set could have generated +> the bogus block, which effectively opens up a DoS attack on the lightclient +> without adding effective robustness. + +#### **[LC-FUNC-INIT.1]:** + +```go +func InitLightClient (initData LCInitData) (LightStore, Error) { + + if LCInitData.LightBlock != nil { + // we trust the provided initial block. + newblock := LCInitData.LightBlock + } + else { + genesisBlock := makeblock(initData.genesisDoc); + + result := NoResult; + while result != ResultSuccess { + current = FetchLightBlock(PeerList.primary(), genesisBlock.Header.Height + 1) + // QUESTION: is the height with "+1" OK? + + if CANNOT_VERIFY = ValidAndVerify(genesisBlock, current) { + Replace_Primary(); + } + else { + result = ResultSuccess + } + } + + // cross-check + auxLS := new LightStore + auxLS.Add(current) + Evidences := AttackDetector(genesisBlock, auxLS) + if Evidences.Empty { + newBlock := current + } + else { + // [LC-SUMBIT-EVIDENCE.1] + submitEvidence(Evidences); + return(nil, ErrorAttack); + } + } + + lightStore := new LightStore; + lightStore.Add(newBlock); + return (lightStore, OK); +} + +``` + +- Implementation remark + - none +- Expected precondition + - *LCInitData* contains either a genesis file of a lightblock + - if genesis it passes `ValidateAndComplete()` see [Tendermint](https://informal.systems) +- Expected postcondition + - *lightStore* initialized with trusted lightblock. It has either been + cross-checked (from genesis) or it has initial trust from the + user. +- Error condition + - if precondition is violated + - empty peerList + +---- + +#### Main verification and detection logic + +#### **[LC-FUNC-MAIN-VERIF-DETECT.1]:** + +```go +func VerifyAndDetect (lightStore LightStore, targetHeight Height) + (LightStore, Result) { + + b1, r1 = lightStore.Get(targetHeight) + if r1 == true { + if b1.State == StateTrusted { + // block already there and trusted + return (lightStore, ResultSuccess) + } + else { + // We have a lightblock in the store, but it has not been + // cross-checked by now. We do that now. + root_of_trust, auxLS := lightstore.TraceTo(b1); + + // Cross-check + Evidences := AttackDetector(root_of_trust, auxLS); + if Evidences.Empty { + // no attack detected, we trust the new lightblock + lightStore.Update(auxLS.Latest(), + StateTrusted, + verfiedLS.Latest().verification-root); + return (lightStore, OK); + } + else { + // there is an attack, we exit + submitEvidence(Evidences); + return(lightStore, ErrorAttack); + } + } + } + + // get the lightblock with maximum height smaller than targetHeight + // would typically be the heighest, if we always move forward + root_of_trust, r2 = lightStore.LatestPrevious(targetHeight); + + if r2 = false { + // there is no lightblock from which we can do forward + // (skipping) verification. Thus we have to go backwards. + // No cross-check needed. We trust hashes. Therefore, we + // directly return the result + return Backwards(primary, lightStore.Lowest(), targetHeight) + } + else { + // Forward verification + detection + result := NoResult; + while result != ResultSuccess { + verifiedLS,result := VerifyToTarget(primary, + root_of_trust, + nextHeight); + if result == ResultFailure { + // pick new primary (promote a secondary to primary) + Replace_Primary(root_of_trust); + } + else if result == ResultExpired { + return (lightStore, result) + } + } + + // Cross-check + Evidences := AttackDetector(root_of_trust, verifiedLS); + if Evidences.Empty { + // no attack detected, we trust the new lightblock + verifiedLS.Update(verfiedLS.Latest(), + StateTrusted, + verfiedLS.Latest().verification-root); + lightStore.store_chain(verifidLS); + return (lightStore, OK); + } + else { + // there is an attack, we exit + return(lightStore, ErrorAttack); + } + } +} +``` + +- Implementation remark + - none +- Expected precondition + - none +- Expected postcondition + - lightblock of height *targetHeight* (and possibly additional blocks) added to *lightStore* +- Error condition + - an attack is detected + - [LC-DATA-PEERLIST-INV.1] is violated + +---- \ No newline at end of file diff --git a/spec/light-client/supervisor/supervisor_001_draft.tla b/spec/light-client/supervisor/supervisor_001_draft.tla new file mode 100644 index 000000000..949a7c020 --- /dev/null +++ b/spec/light-client/supervisor/supervisor_001_draft.tla @@ -0,0 +1,71 @@ +------------------------- MODULE supervisor_001_draft ------------------------ +(* +This is the beginning of a spec that will eventually use verification and detector API +*) + +EXTENDS Integers, FiniteSets + +VARIABLES + state, + output + +vars == <> + +CONSTANT + INITDATA + +Init == + /\ state = "Init" + /\ output = "none" + +NextInit == + /\ state = "Init" + /\ \/ state' = "EnterLoop" + \/ state' = "FailedToInitialize" + /\ UNCHANGED output + +NextVerifyToTarget == + /\ state = "EnterLoop" + /\ \/ state' = "EnterLoop" \* replace primary + \/ state' = "EnterDetect" + \/ state' = "ExhaustedPeersPrimary" + /\ UNCHANGED output + +NextAttackDetector == + /\ state = "EnterDetect" + /\ \/ state' = "NoEvidence" + \/ state' = "EvidenceFound" + \/ state' = "ExhaustedPeersSecondaries" + /\ UNCHANGED output + +NextVerifyAndDetect == + \/ NextVerifyToTarget + \/ NextAttackDetector + +NextOutput == + /\ state = "NoEvidence" + /\ state' = "EnterLoop" + /\ output' = "data" \* to generate a trace + +NextTerminated == + /\ \/ state = "FailedToInitialize" + \/ state = "ExhaustedPeersPrimary" + \/ state = "EvidenceFound" + \/ state = "ExhaustedPeersSecondaries" + /\ UNCHANGED vars + +Next == + \/ NextInit + \/ NextVerifyAndDetect + \/ NextOutput + \/ NextTerminated + +InvEnoughPeers == + /\ state /= "ExhaustedPeersPrimary" + /\ state /= "ExhaustedPeersSecondaries" + + +============================================================================= +\* Modification History +\* Last modified Sun Oct 18 11:48:45 CEST 2020 by widder +\* Created Sun Oct 18 11:18:53 CEST 2020 by widder diff --git a/spec/light-client/supervisor/supervisor_002_draft.md b/spec/light-client/supervisor/supervisor_002_draft.md new file mode 100644 index 000000000..4300b8044 --- /dev/null +++ b/spec/light-client/supervisor/supervisor_002_draft.md @@ -0,0 +1,131 @@ +# Draft of Light Client Supervisor for discussion + +## Modification to the initialization + +The lightclient is initialized with LCInitData + +### **[LC-DATA-INIT.2]** + +```go +type LCInitData struct { + TrustedBlock LightBlock + Genesis GenesisDoc + TrustedHash []byte + TrustedHeight int64 +} +``` + +where only one of the components must be provided. `GenesisDoc` is +defined in the [Tendermint +Types](https://github.com/tendermint/tendermint/blob/master/types/genesis.go). + + +### Initialization + +The light client is based on subjective initialization. It has to +trust the initial data given to it by the user. It cannot perform any +detection of an attack yet instead requires an initial point of trust. +There are three forms of initial data which are used to obtain the +first trusted block: + +- A trusted block from a prior initialization +- A trusted height and hash +- A genesis file + +The golang light client implementation checks this initial data in that +order; first attempting to find a trusted block from the trusted store, +then acquiring a light block from the primary at the trusted height and matching +the hash, or finally checking for a genesis file to verify the initial header. + +The light client doesn't need to check if the trusted block is within the +trusted period because it already trusts it, however, if the light block is +outside the trust period, there is a higher chance the light client won't be +able to verify anything. + +Cross-checking this trusted block with providers upon initialization is helpful +for ensuring that the node is responsive and correctly configured but does not +increase trust since proving a conflicting block is a +[light client attack](https://github.com/tendermint/spec/blob/master/spec/light-client/detection/detection_003_reviewed.md#tmbc-lc-attack1) +and not just a [bogus](https://github.com/tendermint/spec/blob/master/spec/light-client/detection/detection_003_reviewed.md#tmbc-bogus1) block could result in +performing backwards verification beyond the trusted period, thus a fruitless +endeavour. + +However, with the notion of it's better to fail earlier than later, the golang +light client implementation will perform a consistency check on all providers +and will error if one returns a different header, allowing the user +the opportunity to reinitialize. + +#### **[LC-FUNC-INIT.2]:** + +```go +func InitLightClient(initData LCInitData) (LightStore, Error) { + var initialBlock LightBlock + + switch { + case LCInitData.TrustedBlock != nil: + // we trust the block from a prior initialization + initialBlock = LCInitData.TrustedBlock + + case LCInitData.TrustedHash != nil: + untrustedBlock := FetchLightBlock(PeerList.Primary(), LCInitData.TrustedHeight) + + + // verify that the hashes match + if untrustedBlock.Hash() != LCInitData.TrustedHash { + return nil, Error("Primary returned block with different hash") + } + // after checking the hash we now trust the block + initialBlock = untrustedBlock + } + case LCInitData.Genesis != nil: + untrustedBlock := FetchLightBlock(PeerList.Primary(), LCInitData.Genesis.InitialHeight) + + // verify that 2/3+ of the validator set signed the untrustedBlock + if err := VerifyCommitFull(untrustedBlock.Commit, LCInitData.Genesis.Validators); err != nil { + return nil, err + } + + // we can now trust the block + initialBlock = untrustedBlock + default: + return nil, Error("No initial data was provided") + + // This is done in the golang version but is optional and not strictly part of the protocol + if err := CrossCheck(initialBlock, PeerList.Witnesses()); err != nil { + return nil, err + } + + // initialize light store + lightStore := new LightStore; + lightStore.Add(newBlock); + return (lightStore, OK); +} + +func CrossCheck(lb LightBlock, witnesses []Provider) error { + for _, witness := range witnesses { + witnessBlock := FetchLightBlock(witness, lb.Height) + + if witnessBlock.Hash() != lb.Hash() { + return Error("Witness has different block") + } + } + return OK +} + +``` + +- Implementation remark + - none +- Expected precondition + - *LCInitData* contains either a genesis file of a lightblock + - if genesis it passes `ValidateAndComplete()` see [Tendermint](https://informal.systems) +- Expected postcondition + - *lightStore* initialized with trusted lightblock. It has either been + cross-checked (from genesis) or it has initial trust from the + user. +- Error condition + - if precondition is violated + - empty peerList + +---- + diff --git a/spec/light-client/verification/001bmc-apalache.csv b/spec/light-client/verification/001bmc-apalache.csv new file mode 100644 index 000000000..8d5ad8ea3 --- /dev/null +++ b/spec/light-client/verification/001bmc-apalache.csv @@ -0,0 +1,49 @@ +no,filename,tool,timeout,init,inv,next,args +1,MC4_3_correct.tla,apalache,1h,,PositiveBeforeTrustedHeaderExpires,,--length=30 +2,MC4_3_correct.tla,apalache,1h,,CorrectnessInv,,--length=30 +3,MC4_3_correct.tla,apalache,1h,,PrecisionInv,,--length=30 +4,MC4_3_correct.tla,apalache,1h,,SuccessOnCorrectPrimaryAndChainOfTrust,,--length=30 +5,MC4_3_correct.tla,apalache,1h,,NoFailedBlocksOnSuccessInv,,--length=30 +6,MC4_3_correct.tla,apalache,1h,,StoredHeadersAreVerifiedOrNotTrustedInv,,--length=30 +7,MC4_3_correct.tla,apalache,1h,,CorrectPrimaryAndTimeliness,,--length=30 +8,MC4_3_correct.tla,apalache,1h,,Complexity,,--length=30 +9,MC4_3_faulty.tla,apalache,1h,,PositiveBeforeTrustedHeaderExpires,,--length=30 +10,MC4_3_faulty.tla,apalache,1h,,CorrectnessInv,,--length=30 +11,MC4_3_faulty.tla,apalache,1h,,PrecisionInv,,--length=30 +12,MC4_3_faulty.tla,apalache,1h,,SuccessOnCorrectPrimaryAndChainOfTrust,,--length=30 +13,MC4_3_faulty.tla,apalache,1h,,NoFailedBlocksOnSuccessInv,,--length=30 +14,MC4_3_faulty.tla,apalache,1h,,StoredHeadersAreVerifiedOrNotTrustedInv,,--length=30 +15,MC4_3_faulty.tla,apalache,1h,,CorrectPrimaryAndTimeliness,,--length=30 +16,MC4_3_faulty.tla,apalache,1h,,Complexity,,--length=30 +17,MC5_5_correct.tla,apalache,1h,,PositiveBeforeTrustedHeaderExpires,,--length=30 +18,MC5_5_correct.tla,apalache,1h,,CorrectnessInv,,--length=30 +19,MC5_5_correct.tla,apalache,1h,,PrecisionInv,,--length=30 +20,MC5_5_correct.tla,apalache,1h,,SuccessOnCorrectPrimaryAndChainOfTrust,,--length=30 +21,MC5_5_correct.tla,apalache,1h,,NoFailedBlocksOnSuccessInv,,--length=30 +22,MC5_5_correct.tla,apalache,1h,,StoredHeadersAreVerifiedOrNotTrustedInv,,--length=30 +23,MC5_5_correct.tla,apalache,1h,,CorrectPrimaryAndTimeliness,,--length=30 +24,MC5_5_correct.tla,apalache,1h,,Complexity,,--length=30 +25,MC5_5_faulty.tla,apalache,1h,,PositiveBeforeTrustedHeaderExpires,,--length=30 +26,MC5_5_faulty.tla,apalache,1h,,CorrectnessInv,,--length=30 +27,MC5_5_faulty.tla,apalache,1h,,PrecisionInv,,--length=30 +28,MC5_5_faulty.tla,apalache,1h,,SuccessOnCorrectPrimaryAndChainOfTrust,,--length=30 +29,MC5_5_faulty.tla,apalache,1h,,NoFailedBlocksOnSuccessInv,,--length=30 +30,MC5_5_faulty.tla,apalache,1h,,StoredHeadersAreVerifiedOrNotTrustedInv,,--length=30 +31,MC5_5_faulty.tla,apalache,1h,,CorrectPrimaryAndTimeliness,,--length=30 +32,MC5_5_faulty.tla,apalache,1h,,Complexity,,--length=30 +33,MC7_5_faulty.tla,apalache,10h,,PositiveBeforeTrustedHeaderExpires,,--length=30 +34,MC7_5_faulty.tla,apalache,10h,,CorrectnessInv,,--length=30 +35,MC7_5_faulty.tla,apalache,10h,,PrecisionInv,,--length=30 +36,MC7_5_faulty.tla,apalache,10h,,SuccessOnCorrectPrimaryAndChainOfTrust,,--length=30 +37,MC7_5_faulty.tla,apalache,10h,,NoFailedBlocksOnSuccessInv,,--length=30 +38,MC7_5_faulty.tla,apalache,10h,,StoredHeadersAreVerifiedOrNotTrustedInv,,--length=30 +39,MC7_5_faulty.tla,apalache,10h,,CorrectPrimaryAndTimeliness,,--length=30 +40,MC7_5_faulty.tla,apalache,10h,,Complexity,,--length=30 +41,MC4_7_faulty.tla,apalache,10h,,PositiveBeforeTrustedHeaderExpires,,--length=30 +42,MC4_7_faulty.tla,apalache,10h,,CorrectnessInv,,--length=30 +43,MC4_7_faulty.tla,apalache,10h,,PrecisionInv,,--length=30 +44,MC4_7_faulty.tla,apalache,10h,,SuccessOnCorrectPrimaryAndChainOfTrust,,--length=30 +45,MC4_7_faulty.tla,apalache,10h,,NoFailedBlocksOnSuccessInv,,--length=30 +46,MC4_7_faulty.tla,apalache,10h,,StoredHeadersAreVerifiedOrNotTrustedInv,,--length=30 +47,MC4_7_faulty.tla,apalache,10h,,CorrectPrimaryAndTimeliness,,--length=30 +48,MC4_7_faulty.tla,apalache,10h,,Complexity,,--length=30 diff --git a/spec/light-client/verification/002bmc-apalache-ok.csv b/spec/light-client/verification/002bmc-apalache-ok.csv new file mode 100644 index 000000000..eb26aa89e --- /dev/null +++ b/spec/light-client/verification/002bmc-apalache-ok.csv @@ -0,0 +1,55 @@ +no;filename;tool;timeout;init;inv;next;args +1;MC4_3_correct.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=5 +2;MC4_3_correct.tla;apalache;1h;;StoredHeadersAreVerifiedOrNotTrustedInv;;--length=5 +3;MC4_3_correct.tla;apalache;1h;;CorrectnessInv;;--length=5 +4;MC4_3_correct.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=5 +5;MC4_3_correct.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=5 +6;MC4_3_correct.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=5 +7;MC4_3_correct.tla;apalache;1h;;Complexity;;--length=5 +8;MC4_3_correct.tla;apalache;1h;;ApiPostInv;;--length=5 +9;MC4_4_correct.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=7 +10;MC4_4_correct.tla;apalache;1h;;CorrectnessInv;;--length=7 +11;MC4_4_correct.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=7 +12;MC4_4_correct.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=7 +13;MC4_4_correct.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=7 +14;MC4_4_correct.tla;apalache;1h;;Complexity;;--length=7 +15;MC4_4_correct.tla;apalache;1h;;ApiPostInv;;--length=7 +16;MC4_5_correct.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=11 +17;MC4_5_correct.tla;apalache;1h;;CorrectnessInv;;--length=11 +18;MC4_5_correct.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=11 +19;MC4_5_correct.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=11 +20;MC4_5_correct.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=11 +21;MC4_5_correct.tla;apalache;1h;;Complexity;;--length=11 +22;MC4_5_correct.tla;apalache;1h;;ApiPostInv;;--length=11 +23;MC5_5_correct.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=11 +24;MC5_5_correct.tla;apalache;1h;;CorrectnessInv;;--length=11 +25;MC5_5_correct.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=11 +26;MC5_5_correct.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=11 +27;MC5_5_correct.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=11 +28;MC5_5_correct.tla;apalache;1h;;Complexity;;--length=11 +29;MC5_5_correct.tla;apalache;1h;;ApiPostInv;;--length=11 +30;MC4_3_faulty.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=5 +31;MC4_3_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedOrNotTrustedInv;;--length=5 +32;MC4_3_faulty.tla;apalache;1h;;CorrectnessInv;;--length=5 +33;MC4_3_faulty.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=5 +34;MC4_3_faulty.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=5 +35;MC4_3_faulty.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=5 +36;MC4_3_faulty.tla;apalache;1h;;Complexity;;--length=5 +37;MC4_3_faulty.tla;apalache;1h;;ApiPostInv;;--length=5 +38;MC4_4_faulty.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=7 +39;MC4_4_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedOrNotTrustedInv;;--length=7 +40;MC4_4_faulty.tla;apalache;1h;;CorrectnessInv;;--length=7 +41;MC4_4_faulty.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=7 +42;MC4_4_faulty.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=7 +43;MC4_4_faulty.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=7 +44;MC4_4_faulty.tla;apalache;1h;;Complexity;;--length=7 +45;MC4_4_faulty.tla;apalache;1h;;ApiPostInv;;--length=7 +46;MC4_5_faulty.tla;apalache;1h;;TargetHeightOnSuccessInv;;--length=11 +47;MC4_5_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedOrNotTrustedInv;;--length=11 +48;MC4_5_faulty.tla;apalache;1h;;CorrectnessInv;;--length=11 +49;MC4_5_faulty.tla;apalache;1h;;NoTrustOnFaultyBlockInv;;--length=11 +50;MC4_5_faulty.tla;apalache;1h;;ProofOfChainOfTrustInv;;--length=11 +51;MC4_5_faulty.tla;apalache;1h;;NoFailedBlocksOnSuccessInv;;--length=11 +52;MC4_5_faulty.tla;apalache;1h;;Complexity;;--length=11 +53;MC4_5_faulty.tla;apalache;1h;;ApiPostInv;;--length=11 + diff --git a/spec/light-client/verification/003bmc-apalache-error.csv b/spec/light-client/verification/003bmc-apalache-error.csv new file mode 100644 index 000000000..ad5ef9654 --- /dev/null +++ b/spec/light-client/verification/003bmc-apalache-error.csv @@ -0,0 +1,45 @@ +no;filename;tool;timeout;init;inv;next;args +1;MC4_3_correct.tla;apalache;1h;;StoredHeadersAreVerifiedInv;;--length=5 +2;MC4_3_correct.tla;apalache;1h;;PositiveBeforeTrustedHeaderExpires;;--length=5 +3;MC4_3_correct.tla;apalache;1h;;CorrectPrimaryAndTimeliness;;--length=5 +4;MC4_3_correct.tla;apalache;1h;;PrecisionInv;;--length=5 +5;MC4_3_correct.tla;apalache;1h;;PrecisionBuggyInv;;--length=5 +6;MC4_3_correct.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustGlobal;;--length=5 +7;MC4_3_correct.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustLocal;;--length=5 +8;MC4_4_correct.tla;apalache;1h;;StoredHeadersAreVerifiedInv;;--length=7 +9;MC4_4_correct.tla;apalache;1h;;PositiveBeforeTrustedHeaderExpires;;--length=7 +10;MC4_4_correct.tla;apalache;1h;;CorrectPrimaryAndTimeliness;;--length=7 +11;MC4_4_correct.tla;apalache;1h;;PrecisionInv;;--length=7 +12;MC4_4_correct.tla;apalache;1h;;PrecisionBuggyInv;;--length=7 +13;MC4_4_correct.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustGlobal;;--length=7 +14;MC4_4_correct.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustLocal;;--length=7 +15;MC4_5_correct.tla;apalache;1h;;StoredHeadersAreVerifiedInv;;--length=11 +16;MC4_5_correct.tla;apalache;1h;;PositiveBeforeTrustedHeaderExpires;;--length=11 +17;MC4_5_correct.tla;apalache;1h;;CorrectPrimaryAndTimeliness;;--length=11 +18;MC4_5_correct.tla;apalache;1h;;PrecisionInv;;--length=11 +19;MC4_5_correct.tla;apalache;1h;;PrecisionBuggyInv;;--length=11 +20;MC4_5_correct.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustGlobal;;--length=11 +21;MC4_5_correct.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustLocal;;--length=11 +22;MC4_5_correct.tla;apalache;1h;;StoredHeadersAreVerifiedOrNotTrustedInv;;--length=11 +23;MC4_3_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedInv;;--length=5 +24;MC4_3_faulty.tla;apalache;1h;;PositiveBeforeTrustedHeaderExpires;;--length=5 +25;MC4_3_faulty.tla;apalache;1h;;CorrectPrimaryAndTimeliness;;--length=5 +26;MC4_3_faulty.tla;apalache;1h;;PrecisionInv;;--length=5 +27;MC4_3_faulty.tla;apalache;1h;;PrecisionBuggyInv;;--length=5 +28;MC4_3_faulty.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustGlobal;;--length=5 +29;MC4_3_faulty.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustLocal;;--length=5 +30;MC4_4_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedInv;;--length=7 +31;MC4_4_faulty.tla;apalache;1h;;PositiveBeforeTrustedHeaderExpires;;--length=7 +32;MC4_4_faulty.tla;apalache;1h;;CorrectPrimaryAndTimeliness;;--length=7 +33;MC4_4_faulty.tla;apalache;1h;;PrecisionInv;;--length=7 +34;MC4_4_faulty.tla;apalache;1h;;PrecisionBuggyInv;;--length=7 +35;MC4_4_faulty.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustGlobal;;--length=7 +36;MC4_4_faulty.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustLocal;;--length=7 +37;MC4_5_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedInv;;--length=11 +38;MC4_5_faulty.tla;apalache;1h;;PositiveBeforeTrustedHeaderExpires;;--length=11 +39;MC4_5_faulty.tla;apalache;1h;;CorrectPrimaryAndTimeliness;;--length=11 +40;MC4_5_faulty.tla;apalache;1h;;PrecisionInv;;--length=11 +41;MC4_5_faulty.tla;apalache;1h;;PrecisionBuggyInv;;--length=11 +42;MC4_5_faulty.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustGlobal;;--length=11 +43;MC4_5_faulty.tla;apalache;1h;;SuccessOnCorrectPrimaryAndChainOfTrustLocal;;--length=11 +44;MC4_5_faulty.tla;apalache;1h;;StoredHeadersAreVerifiedOrNotTrustedInv;;--length=11 diff --git a/spec/light-client/verification/004bmc-apalache-ok.csv b/spec/light-client/verification/004bmc-apalache-ok.csv new file mode 100644 index 000000000..bf4f53ea2 --- /dev/null +++ b/spec/light-client/verification/004bmc-apalache-ok.csv @@ -0,0 +1,10 @@ +no;filename;tool;timeout;init;inv;next;args +1;LCD_MC3_3_faulty.tla;apalache;1h;;CommonHeightOnEvidenceInv;;--length=10 +2;LCD_MC3_3_faulty.tla;apalache;1h;;AccuracyInv;;--length=10 +3;LCD_MC3_3_faulty.tla;apalache;1h;;PrecisionInvLocal;;--length=10 +4;LCD_MC3_4_faulty.tla;apalache;1h;;CommonHeightOnEvidenceInv;;--length=10 +5;LCD_MC3_4_faulty.tla;apalache;1h;;AccuracyInv;;--length=10 +6;LCD_MC3_4_faulty.tla;apalache;1h;;PrecisionInvLocal;;--length=10 +7;LCD_MC4_4_faulty.tla;apalache;1h;;CommonHeightOnEvidenceInv;;--length=10 +8;LCD_MC4_4_faulty.tla;apalache;1h;;AccuracyInv;;--length=10 +9;LCD_MC4_4_faulty.tla;apalache;1h;;PrecisionInvLocal;;--length=10 diff --git a/spec/light-client/verification/005bmc-apalache-error.csv b/spec/light-client/verification/005bmc-apalache-error.csv new file mode 100644 index 000000000..1b9dd05ca --- /dev/null +++ b/spec/light-client/verification/005bmc-apalache-error.csv @@ -0,0 +1,4 @@ +no;filename;tool;timeout;init;inv;next;args +1;LCD_MC3_3_faulty.tla;apalache;1h;;PrecisionInvGrayZone;;--length=10 +2;LCD_MC3_4_faulty.tla;apalache;1h;;PrecisionInvGrayZone;;--length=10 +3;LCD_MC4_4_faulty.tla;apalache;1h;;PrecisionInvGrayZone;;--length=10 diff --git a/spec/light-client/verification/Blockchain_002_draft.tla b/spec/light-client/verification/Blockchain_002_draft.tla new file mode 100644 index 000000000..f2ca5aba5 --- /dev/null +++ b/spec/light-client/verification/Blockchain_002_draft.tla @@ -0,0 +1,171 @@ +------------------------ MODULE Blockchain_002_draft ----------------------------- +(* + This is a high-level specification of Tendermint blockchain + that is designed specifically for the light client. + Validators have the voting power of one. If you like to model various + voting powers, introduce multiple copies of the same validator + (do not forget to give them unique names though). + *) +EXTENDS Integers, FiniteSets + +Min(a, b) == IF a < b THEN a ELSE b + +CONSTANT + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + ULTIMATE_HEIGHT, + (* a maximal height that can be ever reached (modelling artifact) *) + TRUSTING_PERIOD + (* the period within which the validators are trusted *) + +Heights == 1..ULTIMATE_HEIGHT (* possible heights *) + +(* A commit is just a set of nodes who have committed the block *) +Commits == SUBSET AllNodes + +(* The set of all block headers that can be on the blockchain. + This is a simplified version of the Block data structure in the actual implementation. *) +BlockHeaders == [ + height: Heights, + \* the block height + time: Int, + \* the block timestamp in some integer units + lastCommit: Commits, + \* the nodes who have voted on the previous block, the set itself instead of a hash + (* in the implementation, only the hashes of V and NextV are stored in a block, + as V and NextV are stored in the application state *) + VS: SUBSET AllNodes, + \* the validators of this bloc. We store the validators instead of the hash. + NextVS: SUBSET AllNodes + \* the validators of the next block. We store the next validators instead of the hash. +] + +(* A signed header is just a header together with a set of commits *) +LightBlocks == [header: BlockHeaders, Commits: Commits] + +VARIABLES + now, + (* the current global time in integer units *) + blockchain, + (* A sequence of BlockHeaders, which gives us a bird view of the blockchain. *) + Faulty + (* A set of faulty nodes, which can act as validators. We assume that the set + of faulty processes is non-decreasing. If a process has recovered, it should + connect using a different id. *) + +(* all variables, to be used with UNCHANGED *) +vars == <> + +(* The set of all correct nodes in a state *) +Corr == AllNodes \ Faulty + +(* APALACHE annotations *) +a <: b == a \* type annotation + +NT == STRING +NodeSet(S) == S <: {NT} +EmptyNodeSet == NodeSet({}) + +BT == [height |-> Int, time |-> Int, lastCommit |-> {NT}, VS |-> {NT}, NextVS |-> {NT}] + +LBT == [header |-> BT, Commits |-> {NT}] +(* end of APALACHE annotations *) + +(****************************** BLOCKCHAIN ************************************) + +(* the header is still within the trusting period *) +InTrustingPeriod(header) == + now < header.time + TRUSTING_PERIOD + +(* + Given a function pVotingPower \in D -> Powers for some D \subseteq AllNodes + and pNodes \subseteq D, test whether the set pNodes \subseteq AllNodes has + more than 2/3 of voting power among the nodes in D. + *) +TwoThirds(pVS, pNodes) == + LET TP == Cardinality(pVS) + SP == Cardinality(pVS \intersect pNodes) + IN + 3 * SP > 2 * TP \* when thinking in real numbers, not integers: SP > 2.0 / 3.0 * TP + +(* + Given a set of FaultyNodes, test whether the voting power of the correct nodes in D + is more than 2/3 of the voting power of the faulty nodes in D. + *) +IsCorrectPower(pFaultyNodes, pVS) == + LET FN == pFaultyNodes \intersect pVS \* faulty nodes in pNodes + CN == pVS \ pFaultyNodes \* correct nodes in pNodes + CP == Cardinality(CN) \* power of the correct nodes + FP == Cardinality(FN) \* power of the faulty nodes + IN + \* CP + FP = TP is the total voting power, so we write CP > 2.0 / 3 * TP as follows: + CP > 2 * FP \* Note: when FP = 0, this implies CP > 0. + +(* This is what we believe is the assumption about failures in Tendermint *) +FaultAssumption(pFaultyNodes, pNow, pBlockchain) == + \A h \in Heights: + pBlockchain[h].time + TRUSTING_PERIOD > pNow => + IsCorrectPower(pFaultyNodes, pBlockchain[h].NextVS) + +(* Can a block be produced by a correct peer, or an authenticated Byzantine peer *) +IsLightBlockAllowedByDigitalSignatures(ht, block) == + \/ block.header = blockchain[ht] \* signed by correct and faulty (maybe) + \/ block.Commits \subseteq Faulty /\ block.header.height = ht /\ block.header.time >= 0 \* signed only by faulty + +(* + Initialize the blockchain to the ultimate height right in the initial states. + We pick the faulty validators statically, but that should not affect the light client. + *) +InitToHeight == + /\ Faulty \in SUBSET AllNodes \* some nodes may fail + \* pick the validator sets and last commits + /\ \E vs, lastCommit \in [Heights -> SUBSET AllNodes]: + \E timestamp \in [Heights -> Int]: + \* now is at least as early as the timestamp in the last block + /\ \E tm \in Int: now = tm /\ tm >= timestamp[ULTIMATE_HEIGHT] + \* the genesis starts on day 1 + /\ timestamp[1] = 1 + /\ vs[1] = AllNodes + /\ lastCommit[1] = EmptyNodeSet + /\ \A h \in Heights \ {1}: + /\ lastCommit[h] \subseteq vs[h - 1] \* the non-validators cannot commit + /\ TwoThirds(vs[h - 1], lastCommit[h]) \* the commit has >2/3 of validator votes + /\ IsCorrectPower(Faulty, vs[h]) \* the correct validators have >2/3 of power + /\ timestamp[h] > timestamp[h - 1] \* the time grows monotonically + /\ timestamp[h] < timestamp[h - 1] + TRUSTING_PERIOD \* but not too fast + \* form the block chain out of validator sets and commits (this makes apalache faster) + /\ blockchain = [h \in Heights |-> + [height |-> h, + time |-> timestamp[h], + VS |-> vs[h], + NextVS |-> IF h < ULTIMATE_HEIGHT THEN vs[h + 1] ELSE AllNodes, + lastCommit |-> lastCommit[h]] + ] \****** + + +(* is the blockchain in the faulty zone where the Tendermint security model does not apply *) +InFaultyZone == + ~FaultAssumption(Faulty, now, blockchain) + +(********************* BLOCKCHAIN ACTIONS ********************************) +(* + Advance the clock by zero or more time units. + *) +AdvanceTime == + \E tm \in Int: tm >= now /\ now' = tm + /\ UNCHANGED <> + +(* + One more process fails. As a result, the blockchain may move into the faulty zone. + The light client is not using this action, as the faults are picked in the initial state. + However, this action may be useful when reasoning about fork detection. + *) +OneMoreFault == + /\ \E n \in AllNodes \ Faulty: + /\ Faulty' = Faulty \cup {n} + /\ Faulty' /= AllNodes \* at least process remains non-faulty + /\ UNCHANGED <> +============================================================================= +\* Modification History +\* Last modified Wed Jun 10 14:10:54 CEST 2020 by igor +\* Created Fri Oct 11 15:45:11 CEST 2019 by igor diff --git a/spec/light-client/verification/Blockchain_003_draft.tla b/spec/light-client/verification/Blockchain_003_draft.tla new file mode 100644 index 000000000..2b37c1b18 --- /dev/null +++ b/spec/light-client/verification/Blockchain_003_draft.tla @@ -0,0 +1,164 @@ +------------------------ MODULE Blockchain_003_draft ----------------------------- +(* + This is a high-level specification of Tendermint blockchain + that is designed specifically for the light client. + Validators have the voting power of one. If you like to model various + voting powers, introduce multiple copies of the same validator + (do not forget to give them unique names though). + *) +EXTENDS Integers, FiniteSets + +Min(a, b) == IF a < b THEN a ELSE b + +CONSTANT + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + ULTIMATE_HEIGHT, + (* a maximal height that can be ever reached (modelling artifact) *) + TRUSTING_PERIOD + (* the period within which the validators are trusted *) + +Heights == 1..ULTIMATE_HEIGHT (* possible heights *) + +(* A commit is just a set of nodes who have committed the block *) +Commits == SUBSET AllNodes + +(* The set of all block headers that can be on the blockchain. + This is a simplified version of the Block data structure in the actual implementation. *) +BlockHeaders == [ + height: Heights, + \* the block height + time: Int, + \* the block timestamp in some integer units + lastCommit: Commits, + \* the nodes who have voted on the previous block, the set itself instead of a hash + (* in the implementation, only the hashes of V and NextV are stored in a block, + as V and NextV are stored in the application state *) + VS: SUBSET AllNodes, + \* the validators of this bloc. We store the validators instead of the hash. + NextVS: SUBSET AllNodes + \* the validators of the next block. We store the next validators instead of the hash. +] + +(* A signed header is just a header together with a set of commits *) +LightBlocks == [header: BlockHeaders, Commits: Commits] + +VARIABLES + refClock, + (* the current global time in integer units as perceived by the reference chain *) + blockchain, + (* A sequence of BlockHeaders, which gives us a bird view of the blockchain. *) + Faulty + (* A set of faulty nodes, which can act as validators. We assume that the set + of faulty processes is non-decreasing. If a process has recovered, it should + connect using a different id. *) + +(* all variables, to be used with UNCHANGED *) +vars == <> + +(* The set of all correct nodes in a state *) +Corr == AllNodes \ Faulty + +(* APALACHE annotations *) +a <: b == a \* type annotation + +NT == STRING +NodeSet(S) == S <: {NT} +EmptyNodeSet == NodeSet({}) + +BT == [height |-> Int, time |-> Int, lastCommit |-> {NT}, VS |-> {NT}, NextVS |-> {NT}] + +LBT == [header |-> BT, Commits |-> {NT}] +(* end of APALACHE annotations *) + +(****************************** BLOCKCHAIN ************************************) + +(* the header is still within the trusting period *) +InTrustingPeriod(header) == + refClock < header.time + TRUSTING_PERIOD + +(* + Given a function pVotingPower \in D -> Powers for some D \subseteq AllNodes + and pNodes \subseteq D, test whether the set pNodes \subseteq AllNodes has + more than 2/3 of voting power among the nodes in D. + *) +TwoThirds(pVS, pNodes) == + LET TP == Cardinality(pVS) + SP == Cardinality(pVS \intersect pNodes) + IN + 3 * SP > 2 * TP \* when thinking in real numbers, not integers: SP > 2.0 / 3.0 * TP + +(* + Given a set of FaultyNodes, test whether the voting power of the correct nodes in D + is more than 2/3 of the voting power of the faulty nodes in D. + + Parameters: + - pFaultyNodes is a set of nodes that are considered faulty + - pVS is a set of all validators, maybe including Faulty, intersecting with it, etc. + - pMaxFaultRatio is a pair <> that limits the ratio a / b of the faulty + validators from above (exclusive) + *) +FaultyValidatorsFewerThan(pFaultyNodes, pVS, maxRatio) == + LET FN == pFaultyNodes \intersect pVS \* faulty nodes in pNodes + CN == pVS \ pFaultyNodes \* correct nodes in pNodes + CP == Cardinality(CN) \* power of the correct nodes + FP == Cardinality(FN) \* power of the faulty nodes + IN + \* CP + FP = TP is the total voting power + LET TP == CP + FP IN + FP * maxRatio[2] < TP * maxRatio[1] + +(* Can a block be produced by a correct peer, or an authenticated Byzantine peer *) +IsLightBlockAllowedByDigitalSignatures(ht, block) == + \/ block.header = blockchain[ht] \* signed by correct and faulty (maybe) + \/ /\ block.Commits \subseteq Faulty + /\ block.header.height = ht + /\ block.header.time >= 0 \* signed only by faulty + +(* + Initialize the blockchain to the ultimate height right in the initial states. + We pick the faulty validators statically, but that should not affect the light client. + + Parameters: + - pMaxFaultyRatioExclusive is a pair <> that bound the number of + faulty validators in each block by the ratio a / b (exclusive) + *) +InitToHeight(pMaxFaultyRatioExclusive) == + /\ Faulty \in SUBSET AllNodes \* some nodes may fail + \* pick the validator sets and last commits + /\ \E vs, lastCommit \in [Heights -> SUBSET AllNodes]: + \E timestamp \in [Heights -> Int]: + \* refClock is at least as early as the timestamp in the last block + /\ \E tm \in Int: refClock = tm /\ tm >= timestamp[ULTIMATE_HEIGHT] + \* the genesis starts on day 1 + /\ timestamp[1] = 1 + /\ vs[1] = AllNodes + /\ lastCommit[1] = EmptyNodeSet + /\ \A h \in Heights \ {1}: + /\ lastCommit[h] \subseteq vs[h - 1] \* the non-validators cannot commit + /\ TwoThirds(vs[h - 1], lastCommit[h]) \* the commit has >2/3 of validator votes + \* the faulty validators have the power below the threshold + /\ FaultyValidatorsFewerThan(Faulty, vs[h], pMaxFaultyRatioExclusive) + /\ timestamp[h] > timestamp[h - 1] \* the time grows monotonically + /\ timestamp[h] < timestamp[h - 1] + TRUSTING_PERIOD \* but not too fast + \* form the block chain out of validator sets and commits (this makes apalache faster) + /\ blockchain = [h \in Heights |-> + [height |-> h, + time |-> timestamp[h], + VS |-> vs[h], + NextVS |-> IF h < ULTIMATE_HEIGHT THEN vs[h + 1] ELSE AllNodes, + lastCommit |-> lastCommit[h]] + ] \****** + +(********************* BLOCKCHAIN ACTIONS ********************************) +(* + Advance the clock by zero or more time units. + *) +AdvanceTime == + /\ \E tm \in Int: tm >= refClock /\ refClock' = tm + /\ UNCHANGED <> + +============================================================================= +\* Modification History +\* Last modified Wed Jun 10 14:10:54 CEST 2020 by igor +\* Created Fri Oct 11 15:45:11 CEST 2019 by igor diff --git a/spec/light-client/verification/Blockchain_A_1.tla b/spec/light-client/verification/Blockchain_A_1.tla new file mode 100644 index 000000000..70f59bf97 --- /dev/null +++ b/spec/light-client/verification/Blockchain_A_1.tla @@ -0,0 +1,171 @@ +------------------------ MODULE Blockchain_A_1 ----------------------------- +(* + This is a high-level specification of Tendermint blockchain + that is designed specifically for the light client. + Validators have the voting power of one. If you like to model various + voting powers, introduce multiple copies of the same validator + (do not forget to give them unique names though). + *) +EXTENDS Integers, FiniteSets + +Min(a, b) == IF a < b THEN a ELSE b + +CONSTANT + AllNodes, + (* a set of all nodes that can act as validators (correct and faulty) *) + ULTIMATE_HEIGHT, + (* a maximal height that can be ever reached (modelling artifact) *) + TRUSTING_PERIOD + (* the period within which the validators are trusted *) + +Heights == 1..ULTIMATE_HEIGHT (* possible heights *) + +(* A commit is just a set of nodes who have committed the block *) +Commits == SUBSET AllNodes + +(* The set of all block headers that can be on the blockchain. + This is a simplified version of the Block data structure in the actual implementation. *) +BlockHeaders == [ + height: Heights, + \* the block height + time: Int, + \* the block timestamp in some integer units + lastCommit: Commits, + \* the nodes who have voted on the previous block, the set itself instead of a hash + (* in the implementation, only the hashes of V and NextV are stored in a block, + as V and NextV are stored in the application state *) + VS: SUBSET AllNodes, + \* the validators of this bloc. We store the validators instead of the hash. + NextVS: SUBSET AllNodes + \* the validators of the next block. We store the next validators instead of the hash. +] + +(* A signed header is just a header together with a set of commits *) +LightBlocks == [header: BlockHeaders, Commits: Commits] + +VARIABLES + now, + (* the current global time in integer units *) + blockchain, + (* A sequence of BlockHeaders, which gives us a bird view of the blockchain. *) + Faulty + (* A set of faulty nodes, which can act as validators. We assume that the set + of faulty processes is non-decreasing. If a process has recovered, it should + connect using a different id. *) + +(* all variables, to be used with UNCHANGED *) +vars == <> + +(* The set of all correct nodes in a state *) +Corr == AllNodes \ Faulty + +(* APALACHE annotations *) +a <: b == a \* type annotation + +NT == STRING +NodeSet(S) == S <: {NT} +EmptyNodeSet == NodeSet({}) + +BT == [height |-> Int, time |-> Int, lastCommit |-> {NT}, VS |-> {NT}, NextVS |-> {NT}] + +LBT == [header |-> BT, Commits |-> {NT}] +(* end of APALACHE annotations *) + +(****************************** BLOCKCHAIN ************************************) + +(* the header is still within the trusting period *) +InTrustingPeriod(header) == + now <= header.time + TRUSTING_PERIOD + +(* + Given a function pVotingPower \in D -> Powers for some D \subseteq AllNodes + and pNodes \subseteq D, test whether the set pNodes \subseteq AllNodes has + more than 2/3 of voting power among the nodes in D. + *) +TwoThirds(pVS, pNodes) == + LET TP == Cardinality(pVS) + SP == Cardinality(pVS \intersect pNodes) + IN + 3 * SP > 2 * TP \* when thinking in real numbers, not integers: SP > 2.0 / 3.0 * TP + +(* + Given a set of FaultyNodes, test whether the voting power of the correct nodes in D + is more than 2/3 of the voting power of the faulty nodes in D. + *) +IsCorrectPower(pFaultyNodes, pVS) == + LET FN == pFaultyNodes \intersect pVS \* faulty nodes in pNodes + CN == pVS \ pFaultyNodes \* correct nodes in pNodes + CP == Cardinality(CN) \* power of the correct nodes + FP == Cardinality(FN) \* power of the faulty nodes + IN + \* CP + FP = TP is the total voting power, so we write CP > 2.0 / 3 * TP as follows: + CP > 2 * FP \* Note: when FP = 0, this implies CP > 0. + +(* This is what we believe is the assumption about failures in Tendermint *) +FaultAssumption(pFaultyNodes, pNow, pBlockchain) == + \A h \in Heights: + pBlockchain[h].time + TRUSTING_PERIOD > pNow => + IsCorrectPower(pFaultyNodes, pBlockchain[h].NextVS) + +(* Can a block be produced by a correct peer, or an authenticated Byzantine peer *) +IsLightBlockAllowedByDigitalSignatures(ht, block) == + \/ block.header = blockchain[ht] \* signed by correct and faulty (maybe) + \/ block.Commits \subseteq Faulty /\ block.header.height = ht \* signed only by faulty + +(* + Initialize the blockchain to the ultimate height right in the initial states. + We pick the faulty validators statically, but that should not affect the light client. + *) +InitToHeight == + /\ Faulty \in SUBSET AllNodes \* some nodes may fail + \* pick the validator sets and last commits + /\ \E vs, lastCommit \in [Heights -> SUBSET AllNodes]: + \E timestamp \in [Heights -> Int]: + \* now is at least as early as the timestamp in the last block + /\ \E tm \in Int: now = tm /\ tm >= timestamp[ULTIMATE_HEIGHT] + \* the genesis starts on day 1 + /\ timestamp[1] = 1 + /\ vs[1] = AllNodes + /\ lastCommit[1] = EmptyNodeSet + /\ \A h \in Heights \ {1}: + /\ lastCommit[h] \subseteq vs[h - 1] \* the non-validators cannot commit + /\ TwoThirds(vs[h - 1], lastCommit[h]) \* the commit has >2/3 of validator votes + /\ IsCorrectPower(Faulty, vs[h]) \* the correct validators have >2/3 of power + /\ timestamp[h] > timestamp[h - 1] \* the time grows monotonically + /\ timestamp[h] < timestamp[h - 1] + TRUSTING_PERIOD \* but not too fast + \* form the block chain out of validator sets and commits (this makes apalache faster) + /\ blockchain = [h \in Heights |-> + [height |-> h, + time |-> timestamp[h], + VS |-> vs[h], + NextVS |-> IF h < ULTIMATE_HEIGHT THEN vs[h + 1] ELSE AllNodes, + lastCommit |-> lastCommit[h]] + ] \****** + + +(* is the blockchain in the faulty zone where the Tendermint security model does not apply *) +InFaultyZone == + ~FaultAssumption(Faulty, now, blockchain) + +(********************* BLOCKCHAIN ACTIONS ********************************) +(* + Advance the clock by zero or more time units. + *) +AdvanceTime == + \E tm \in Int: tm >= now /\ now' = tm + /\ UNCHANGED <> + +(* + One more process fails. As a result, the blockchain may move into the faulty zone. + The light client is not using this action, as the faults are picked in the initial state. + However, this action may be useful when reasoning about fork detection. + *) +OneMoreFault == + /\ \E n \in AllNodes \ Faulty: + /\ Faulty' = Faulty \cup {n} + /\ Faulty' /= AllNodes \* at least process remains non-faulty + /\ UNCHANGED <> +============================================================================= +\* Modification History +\* Last modified Wed Jun 10 14:10:54 CEST 2020 by igor +\* Created Fri Oct 11 15:45:11 CEST 2019 by igor diff --git a/spec/light-client/verification/LCVerificationApi_003_draft.tla b/spec/light-client/verification/LCVerificationApi_003_draft.tla new file mode 100644 index 000000000..909eab92b --- /dev/null +++ b/spec/light-client/verification/LCVerificationApi_003_draft.tla @@ -0,0 +1,192 @@ +-------------------- MODULE LCVerificationApi_003_draft -------------------------- +(** + * The common interface of the light client verification and detection. + *) +EXTENDS Integers, FiniteSets + +\* the parameters of Light Client +CONSTANTS + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + CLOCK_DRIFT, + (* the assumed precision of the clock *) + REAL_CLOCK_DRIFT, + (* the actual clock drift, which under normal circumstances should not + be larger than CLOCK_DRIFT (otherwise, there will be a bug) *) + FAULTY_RATIO + (* a pair <> that limits that ratio of faulty validator in the blockchain + from above (exclusive). Tendermint security model prescribes 1 / 3. *) + +VARIABLES + localClock (* current time as measured by the light client *) + +(* the header is still within the trusting period *) +InTrustingPeriodLocal(header) == + \* note that the assumption about the drift reduces the period of trust + localClock < header.time + TRUSTING_PERIOD - CLOCK_DRIFT + +(* the header is still within the trusting period, even if the clock can go backwards *) +InTrustingPeriodLocalSurely(header) == + \* note that the assumption about the drift reduces the period of trust + localClock < header.time + TRUSTING_PERIOD - 2 * CLOCK_DRIFT + +(* ensure that the local clock does not drift far away from the global clock *) +IsLocalClockWithinDrift(local, global) == + /\ global - REAL_CLOCK_DRIFT <= local + /\ local <= global + REAL_CLOCK_DRIFT + +(** + * Check that the commits in an untrusted block form 1/3 of the next validators + * in a trusted header. + *) +SignedByOneThirdOfTrusted(trusted, untrusted) == + LET TP == Cardinality(trusted.header.NextVS) + SP == Cardinality(untrusted.Commits \intersect trusted.header.NextVS) + IN + 3 * SP > TP + +(** + The first part of the precondition of ValidAndVerified, which does not take + the current time into account. + + [LCV-FUNC-VALID.1::TLA-PRE-UNTIMED.1] + *) +ValidAndVerifiedPreUntimed(trusted, untrusted) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ thdr.height < uhdr.height + \* the trusted block has been created earlier + /\ thdr.time < uhdr.time + /\ untrusted.Commits \subseteq uhdr.VS + /\ LET TP == Cardinality(uhdr.VS) + SP == Cardinality(untrusted.Commits) + IN + 3 * SP > 2 * TP + /\ thdr.height + 1 = uhdr.height => thdr.NextVS = uhdr.VS + (* As we do not have explicit hashes we ignore these three checks of the English spec: + + 1. "trusted.Commit is a commit is for the header trusted.Header, + i.e. it contains the correct hash of the header". + 2. untrusted.Validators = hash(untrusted.Header.Validators) + 3. untrusted.NextValidators = hash(untrusted.Header.NextValidators) + *) + +(** + Check the precondition of ValidAndVerified, including the time checks. + + [LCV-FUNC-VALID.1::TLA-PRE.1] + *) +ValidAndVerifiedPre(trusted, untrusted, checkFuture) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ InTrustingPeriodLocal(thdr) + \* The untrusted block is not from the future (modulo clock drift). + \* Do the check, if it is required. + /\ checkFuture => uhdr.time < localClock + CLOCK_DRIFT + /\ ValidAndVerifiedPreUntimed(trusted, untrusted) + + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + This test does take current time into account, but only looks at the block structure. + + [LCV-FUNC-VALID.1::TLA-UNTIMED.1] + *) +ValidAndVerifiedUntimed(trusted, untrusted) == + IF ~ValidAndVerifiedPreUntimed(trusted, untrusted) + THEN "INVALID" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + + [LCV-FUNC-VALID.1::TLA.1] + *) +ValidAndVerified(trusted, untrusted, checkFuture) == + IF ~ValidAndVerifiedPre(trusted, untrusted, checkFuture) + THEN "INVALID" + ELSE IF ~InTrustingPeriodLocal(untrusted.header) + (* We leave the following test for the documentation purposes. + The implementation should do this test, as signature verification may be slow. + In the TLA+ specification, ValidAndVerified happens in no time. + *) + THEN "FAILED_TRUSTING_PERIOD" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + + +(** + The invariant of the light store that is not related to the blockchain + *) +LightStoreInv(fetchedLightBlocks, lightBlockStatus) == + \A lh, rh \in DOMAIN fetchedLightBlocks: + \* for every pair of stored headers that have been verified + \/ lh >= rh + \/ lightBlockStatus[lh] /= "StateVerified" + \/ lightBlockStatus[rh] /= "StateVerified" + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh /\ lightBlockStatus[mh] = "StateVerified" + \* or the left header is outside the trusting period, so no guarantees + \/ LET lhdr == fetchedLightBlocks[lh] + rhdr == fetchedLightBlocks[rh] + IN + \* we can verify the right one using the left one + "SUCCESS" = ValidAndVerifiedUntimed(lhdr, rhdr) + +(** + Correctness states that all the obtained headers are exactly like in the blockchain. + + It is always the case that every verified header in LightStore was generated by + an instance of Tendermint consensus. + + [LCV-DIST-SAFE.1::CORRECTNESS-INV.1] + *) +CorrectnessInv(blockchain, fetchedLightBlocks, lightBlockStatus) == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" => + fetchedLightBlocks[h].header = blockchain[h] + +(** + * When the light client terminates, there are no failed blocks. + * (Otherwise, someone lied to us.) + *) +NoFailedBlocksOnSuccessInv(fetchedLightBlocks, lightBlockStatus) == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] /= "StateFailed" + +(** + The expected post-condition of VerifyToTarget. + *) +VerifyToTargetPost(blockchain, isPeerCorrect, + fetchedLightBlocks, lightBlockStatus, + trustedHeight, targetHeight, finalState) == + LET trustedHeader == fetchedLightBlocks[trustedHeight].header IN + \* The light client is not lying us on the trusted block. + \* It is straightforward to detect. + /\ lightBlockStatus[trustedHeight] = "StateVerified" + /\ trustedHeight \in DOMAIN fetchedLightBlocks + /\ trustedHeader = blockchain[trustedHeight] + \* the invariants we have found in the light client verification + \* there is a problem with trusting period + /\ isPeerCorrect + => CorrectnessInv(blockchain, fetchedLightBlocks, lightBlockStatus) + \* a correct peer should fail the light client, + \* if the trusted block is in the trusting period + /\ isPeerCorrect /\ InTrustingPeriodLocalSurely(trustedHeader) + => finalState = "finishedSuccess" + /\ finalState = "finishedSuccess" => + /\ lightBlockStatus[targetHeight] = "StateVerified" + /\ targetHeight \in DOMAIN fetchedLightBlocks + /\ NoFailedBlocksOnSuccessInv(fetchedLightBlocks, lightBlockStatus) + /\ LightStoreInv(fetchedLightBlocks, lightBlockStatus) + + +================================================================================== diff --git a/spec/light-client/verification/Lightclient_002_draft.tla b/spec/light-client/verification/Lightclient_002_draft.tla new file mode 100644 index 000000000..32c807f6e --- /dev/null +++ b/spec/light-client/verification/Lightclient_002_draft.tla @@ -0,0 +1,465 @@ +-------------------------- MODULE Lightclient_002_draft ---------------------------- +(** + * A state-machine specification of the lite client, following the English spec: + * + * https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/verification.md + *) + +EXTENDS Integers, FiniteSets + +\* the parameters of Light Client +CONSTANTS + TRUSTED_HEIGHT, + (* an index of the block header that the light client trusts by social consensus *) + TARGET_HEIGHT, + (* an index of the block header that the light client tries to verify *) + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + IS_PRIMARY_CORRECT + (* is primary correct? *) + +VARIABLES (* see TypeOK below for the variable types *) + state, (* the current state of the light client *) + nextHeight, (* the next height to explore by the light client *) + nprobes (* the lite client iteration, or the number of block tests *) + +(* the light store *) +VARIABLES + fetchedLightBlocks, (* a function from heights to LightBlocks *) + lightBlockStatus, (* a function from heights to block statuses *) + latestVerified (* the latest verified block *) + +(* the variables of the lite client *) +lcvars == <> + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevNow, + prevVerdict + +InitMonitor(verified, current, now, verdict) == + /\ prevVerified = verified + /\ prevCurrent = current + /\ prevNow = now + /\ prevVerdict = verdict + +NextMonitor(verified, current, now, verdict) == + /\ prevVerified' = verified + /\ prevCurrent' = current + /\ prevNow' = now + /\ prevVerdict' = verdict + + +(******************* Blockchain instance ***********************************) + +\* the parameters that are propagated into Blockchain +CONSTANTS + AllNodes + (* a set of all nodes that can act as validators (correct and faulty) *) + +\* the state variables of Blockchain, see Blockchain.tla for the details +VARIABLES now, blockchain, Faulty + +\* All the variables of Blockchain. For some reason, BC!vars does not work +bcvars == <> + +(* Create an instance of Blockchain. + We could write EXTENDS Blockchain, but then all the constants and state variables + would be hidden inside the Blockchain module. + *) +ULTIMATE_HEIGHT == TARGET_HEIGHT + 1 + +BC == INSTANCE Blockchain_002_draft WITH + now <- now, blockchain <- blockchain, Faulty <- Faulty + +(************************** Lite client ************************************) + +(* the heights on which the light client is working *) +HEIGHTS == TRUSTED_HEIGHT..TARGET_HEIGHT + +(* the control states of the lite client *) +States == { "working", "finishedSuccess", "finishedFailure" } + +(** + Check the precondition of ValidAndVerified. + + [LCV-FUNC-VALID.1::TLA-PRE.1] + *) +ValidAndVerifiedPre(trusted, untrusted) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ BC!InTrustingPeriod(thdr) + /\ thdr.height < uhdr.height + \* the trusted block has been created earlier (no drift here) + /\ thdr.time < uhdr.time + \* the untrusted block is not from the future + /\ uhdr.time < now + /\ untrusted.Commits \subseteq uhdr.VS + /\ LET TP == Cardinality(uhdr.VS) + SP == Cardinality(untrusted.Commits) + IN + 3 * SP > 2 * TP + /\ thdr.height + 1 = uhdr.height => thdr.NextVS = uhdr.VS + (* As we do not have explicit hashes we ignore these three checks of the English spec: + + 1. "trusted.Commit is a commit is for the header trusted.Header, + i.e. it contains the correct hash of the header". + 2. untrusted.Validators = hash(untrusted.Header.Validators) + 3. untrusted.NextValidators = hash(untrusted.Header.NextValidators) + *) + +(** + * Check that the commits in an untrusted block form 1/3 of the next validators + * in a trusted header. + *) +SignedByOneThirdOfTrusted(trusted, untrusted) == + LET TP == Cardinality(trusted.header.NextVS) + SP == Cardinality(untrusted.Commits \intersect trusted.header.NextVS) + IN + 3 * SP > TP + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + + [LCV-FUNC-VALID.1::TLA.1] + *) +ValidAndVerified(trusted, untrusted) == + IF ~ValidAndVerifiedPre(trusted, untrusted) + THEN "INVALID" + ELSE IF ~BC!InTrustingPeriod(untrusted.header) + (* We leave the following test for the documentation purposes. + The implementation should do this test, as signature verification may be slow. + In the TLA+ specification, ValidAndVerified happens in no time. + *) + THEN "FAILED_TRUSTING_PERIOD" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "SUCCESS" + ELSE "NOT_ENOUGH_TRUST" + +(* + Initial states of the light client. + Initially, only the trusted light block is present. + *) +LCInit == + /\ state = "working" + /\ nextHeight = TARGET_HEIGHT + /\ nprobes = 0 \* no tests have been done so far + /\ LET trustedBlock == blockchain[TRUSTED_HEIGHT] + trustedLightBlock == [header |-> trustedBlock, Commits |-> AllNodes] + IN + \* initially, fetchedLightBlocks is a function of one element, i.e., TRUSTED_HEIGHT + /\ fetchedLightBlocks = [h \in {TRUSTED_HEIGHT} |-> trustedLightBlock] + \* initially, lightBlockStatus is a function of one element, i.e., TRUSTED_HEIGHT + /\ lightBlockStatus = [h \in {TRUSTED_HEIGHT} |-> "StateVerified"] + \* the latest verified block the the trusted block + /\ latestVerified = trustedLightBlock + /\ InitMonitor(trustedLightBlock, trustedLightBlock, now, "SUCCESS") + +\* block should contain a copy of the block from the reference chain, with a matching commit +CopyLightBlockFromChain(block, height) == + LET ref == blockchain[height] + lastCommit == + IF height < ULTIMATE_HEIGHT + THEN blockchain[height + 1].lastCommit + \* for the ultimate block, which we never use, as ULTIMATE_HEIGHT = TARGET_HEIGHT + 1 + ELSE blockchain[height].VS + IN + block = [header |-> ref, Commits |-> lastCommit] + +\* Either the primary is correct and the block comes from the reference chain, +\* or the block is produced by a faulty primary. +\* +\* [LCV-FUNC-FETCH.1::TLA.1] +FetchLightBlockInto(block, height) == + IF IS_PRIMARY_CORRECT + THEN CopyLightBlockFromChain(block, height) + ELSE BC!IsLightBlockAllowedByDigitalSignatures(height, block) + +\* add a block into the light store +\* +\* [LCV-FUNC-UPDATE.1::TLA.1] +LightStoreUpdateBlocks(lightBlocks, block) == + LET ht == block.header.height IN + [h \in DOMAIN lightBlocks \union {ht} |-> + IF h = ht THEN block ELSE lightBlocks[h]] + +\* update the state of a light block +\* +\* [LCV-FUNC-UPDATE.1::TLA.1] +LightStoreUpdateStates(statuses, ht, blockState) == + [h \in DOMAIN statuses \union {ht} |-> + IF h = ht THEN blockState ELSE statuses[h]] + +\* Check, whether newHeight is a possible next height for the light client. +\* +\* [LCV-FUNC-SCHEDULE.1::TLA.1] +CanScheduleTo(newHeight, pLatestVerified, pNextHeight, pTargetHeight) == + LET ht == pLatestVerified.header.height IN + \/ /\ ht = pNextHeight + /\ ht < pTargetHeight + /\ pNextHeight < newHeight + /\ newHeight <= pTargetHeight + \/ /\ ht < pNextHeight + /\ ht < pTargetHeight + /\ ht < newHeight + /\ newHeight < pNextHeight + \/ /\ ht = pTargetHeight + /\ newHeight = pTargetHeight + +\* The loop of VerifyToTarget. +\* +\* [LCV-FUNC-MAIN.1::TLA-LOOP.1] +VerifyToTargetLoop == + \* the loop condition is true + /\ latestVerified.header.height < TARGET_HEIGHT + \* pick a light block, which will be constrained later + /\ \E current \in BC!LightBlocks: + \* Get next LightBlock for verification + /\ IF nextHeight \in DOMAIN fetchedLightBlocks + THEN \* copy the block from the light store + /\ current = fetchedLightBlocks[nextHeight] + /\ UNCHANGED fetchedLightBlocks + ELSE \* retrieve a light block and save it in the light store + /\ FetchLightBlockInto(current, nextHeight) + /\ fetchedLightBlocks' = LightStoreUpdateBlocks(fetchedLightBlocks, current) + \* Record that one more probe has been done (for complexity and model checking) + /\ nprobes' = nprobes + 1 + \* Verify the current block + /\ LET verdict == ValidAndVerified(latestVerified, current) IN + NextMonitor(latestVerified, current, now, verdict) /\ + \* Decide whether/how to continue + CASE verdict = "SUCCESS" -> + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateVerified") + /\ latestVerified' = current + /\ state' = + IF latestVerified'.header.height < TARGET_HEIGHT + THEN "working" + ELSE "finishedSuccess" + /\ \E newHeight \in HEIGHTS: + /\ CanScheduleTo(newHeight, current, nextHeight, TARGET_HEIGHT) + /\ nextHeight' = newHeight + + [] verdict = "NOT_ENOUGH_TRUST" -> + (* + do nothing: the light block current passed validation, but the validator + set is too different to verify it. We keep the state of + current at StateUnverified. For a later iteration, Schedule + might decide to try verification of that light block again. + *) + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateUnverified") + /\ \E newHeight \in HEIGHTS: + /\ CanScheduleTo(newHeight, latestVerified, nextHeight, TARGET_HEIGHT) + /\ nextHeight' = newHeight + /\ UNCHANGED <> + + [] OTHER -> + \* verdict is some error code + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateFailed") + /\ state' = "finishedFailure" + /\ UNCHANGED <> + +\* The terminating condition of VerifyToTarget. +\* +\* [LCV-FUNC-MAIN.1::TLA-LOOPCOND.1] +VerifyToTargetDone == + /\ latestVerified.header.height >= TARGET_HEIGHT + /\ state' = "finishedSuccess" + /\ UNCHANGED <> + /\ UNCHANGED <> + +(********************* Lite client + Blockchain *******************) +Init == + \* the blockchain is initialized immediately to the ULTIMATE_HEIGHT + /\ BC!InitToHeight + \* the light client starts + /\ LCInit + +(* + The system step is very simple. + The light client is either executing VerifyToTarget, or it has terminated. + (In the latter case, a model checker reports a deadlock.) + Simultaneously, the global clock may advance. + *) +Next == + /\ state = "working" + /\ VerifyToTargetLoop \/ VerifyToTargetDone + /\ BC!AdvanceTime \* the global clock is advanced by zero or more time units + +(************************* Types ******************************************) +TypeOK == + /\ state \in States + /\ nextHeight \in HEIGHTS + /\ latestVerified \in BC!LightBlocks + /\ \E HS \in SUBSET HEIGHTS: + /\ fetchedLightBlocks \in [HS -> BC!LightBlocks] + /\ lightBlockStatus + \in [HS -> {"StateVerified", "StateUnverified", "StateFailed"}] + +(************************* Properties ******************************************) + +(* The properties to check *) +\* this invariant candidate is false +NeverFinish == + state = "working" + +\* this invariant candidate is false +NeverFinishNegative == + state /= "finishedFailure" + +\* This invariant holds true, when the primary is correct. +\* This invariant candidate is false when the primary is faulty. +NeverFinishNegativeWhenTrusted == + (*(minTrustedHeight <= TRUSTED_HEIGHT)*) + BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + => state /= "finishedFailure" + +\* this invariant candidate is false +NeverFinishPositive == + state /= "finishedSuccess" + +(** + Correctness states that all the obtained headers are exactly like in the blockchain. + + It is always the case that every verified header in LightStore was generated by + an instance of Tendermint consensus. + + [LCV-DIST-SAFE.1::CORRECTNESS-INV.1] + *) +CorrectnessInv == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" => + fetchedLightBlocks[h].header = blockchain[h] + +(** + Check that the sequence of the headers in storedLightBlocks satisfies ValidAndVerified = "SUCCESS" pairwise + This property is easily violated, whenever a header cannot be trusted anymore. + *) +StoredHeadersAreVerifiedInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: \* for every pair of different stored headers + \/ lh >= rh + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh + \* or we can verify the right one using the left one + \/ "SUCCESS" = ValidAndVerified(fetchedLightBlocks[lh], fetchedLightBlocks[rh]) + +\* An improved version of StoredHeadersAreSound, assuming that a header may be not trusted. +\* This invariant candidate is also violated, +\* as there may be some unverified blocks left in the middle. +StoredHeadersAreVerifiedOrNotTrustedInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: \* for every pair of different stored headers + \/ lh >= rh + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh + \* or we can verify the right one using the left one + \/ "SUCCESS" = ValidAndVerified(fetchedLightBlocks[lh], fetchedLightBlocks[rh]) + \* or the left header is outside the trusting period, so no guarantees + \/ ~BC!InTrustingPeriod(fetchedLightBlocks[lh].header) + +(** + * An improved version of StoredHeadersAreSoundOrNotTrusted, + * checking the property only for the verified headers. + * This invariant holds true. + *) +ProofOfChainOfTrustInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: + \* for every pair of stored headers that have been verified + \/ lh >= rh + \/ lightBlockStatus[lh] = "StateUnverified" + \/ lightBlockStatus[rh] = "StateUnverified" + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh /\ lightBlockStatus[mh] = "StateVerified" + \* or the left header is outside the trusting period, so no guarantees + \/ ~(BC!InTrustingPeriod(fetchedLightBlocks[lh].header)) + \* or we can verify the right one using the left one + \/ "SUCCESS" = ValidAndVerified(fetchedLightBlocks[lh], fetchedLightBlocks[rh]) + +(** + * When the light client terminates, there are no failed blocks. (Otherwise, someone lied to us.) + *) +NoFailedBlocksOnSuccessInv == + state = "finishedSuccess" => + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] /= "StateFailed" + +\* This property states that whenever the light client finishes with a positive outcome, +\* the trusted header is still within the trusting period. +\* We expect this property to be violated. And Apalache shows us a counterexample. +PositiveBeforeTrustedHeaderExpires == + (state = "finishedSuccess") => BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + +\* If the primary is correct and the initial trusted block has not expired, +\* then whenever the algorithm terminates, it reports "success" +CorrectPrimaryAndTimeliness == + (BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +(** + If the primary is correct and there is a trusted block that has not expired, + then whenever the algorithm terminates, it reports "success". + + [LCV-DIST-LIVE.1::SUCCESS-CORR-PRIMARY-CHAIN-OF-TRUST.1] + *) +SuccessOnCorrectPrimaryAndChainOfTrust == + (\E h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" /\ BC!InTrustingPeriod(blockchain[h]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +\* Lite Client Completeness: If header h was correctly generated by an instance +\* of Tendermint consensus (and its age is less than the trusting period), +\* then the lite client should eventually set trust(h) to true. +\* +\* Note that Completeness assumes that the lite client communicates with a correct full node. +\* +\* We decompose completeness into Termination (liveness) and Precision (safety). +\* Once again, Precision is an inverse version of the safety property in Completeness, +\* as A => B is logically equivalent to ~B => ~A. +PrecisionInv == + (state = "finishedFailure") + => \/ ~BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) \* outside of the trusting period + \/ \E h \in DOMAIN fetchedLightBlocks: + LET lightBlock == fetchedLightBlocks[h] IN + \* the full node lied to the lite client about the block header + \/ lightBlock.header /= blockchain[h] + \* the full node lied to the lite client about the commits + \/ lightBlock.Commits /= lightBlock.header.VS + +\* the old invariant that was found to be buggy by TLC +PrecisionBuggyInv == + (state = "finishedFailure") + => \/ ~BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) \* outside of the trusting period + \/ \E h \in DOMAIN fetchedLightBlocks: + LET lightBlock == fetchedLightBlocks[h] IN + \* the full node lied to the lite client about the block header + lightBlock.header /= blockchain[h] + +\* the worst complexity +Complexity == + LET N == TARGET_HEIGHT - TRUSTED_HEIGHT + 1 IN + state /= "working" => + (2 * nprobes <= N * (N - 1)) + +(* + We omit termination, as the algorithm deadlocks in the end. + So termination can be demonstrated by finding a deadlock. + Of course, one has to analyze the deadlocked state and see that + the algorithm has indeed terminated there. +*) +============================================================================= +\* Modification History +\* Last modified Fri Jun 26 12:08:28 CEST 2020 by igor +\* Created Wed Oct 02 16:39:42 CEST 2019 by igor diff --git a/spec/light-client/verification/Lightclient_003_draft.tla b/spec/light-client/verification/Lightclient_003_draft.tla new file mode 100644 index 000000000..e17a88491 --- /dev/null +++ b/spec/light-client/verification/Lightclient_003_draft.tla @@ -0,0 +1,493 @@ +-------------------------- MODULE Lightclient_003_draft ---------------------------- +(** + * A state-machine specification of the lite client verification, + * following the English spec: + * + * https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/verification.md + *) + +EXTENDS Integers, FiniteSets + +\* the parameters of Light Client +CONSTANTS + TRUSTED_HEIGHT, + (* an index of the block header that the light client trusts by social consensus *) + TARGET_HEIGHT, + (* an index of the block header that the light client tries to verify *) + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + CLOCK_DRIFT, + (* the assumed precision of the clock *) + REAL_CLOCK_DRIFT, + (* the actual clock drift, which under normal circumstances should not + be larger than CLOCK_DRIFT (otherwise, there will be a bug) *) + IS_PRIMARY_CORRECT, + (* is primary correct? *) + FAULTY_RATIO + (* a pair <> that limits that ratio of faulty validator in the blockchain + from above (exclusive). Tendermint security model prescribes 1 / 3. *) + +VARIABLES (* see TypeOK below for the variable types *) + localClock, (* the local clock of the light client *) + state, (* the current state of the light client *) + nextHeight, (* the next height to explore by the light client *) + nprobes (* the lite client iteration, or the number of block tests *) + +(* the light store *) +VARIABLES + fetchedLightBlocks, (* a function from heights to LightBlocks *) + lightBlockStatus, (* a function from heights to block statuses *) + latestVerified (* the latest verified block *) + +(* the variables of the lite client *) +lcvars == <> + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +InitMonitor(verified, current, pLocalClock, verdict) == + /\ prevVerified = verified + /\ prevCurrent = current + /\ prevLocalClock = pLocalClock + /\ prevVerdict = verdict + +NextMonitor(verified, current, pLocalClock, verdict) == + /\ prevVerified' = verified + /\ prevCurrent' = current + /\ prevLocalClock' = pLocalClock + /\ prevVerdict' = verdict + + +(******************* Blockchain instance ***********************************) + +\* the parameters that are propagated into Blockchain +CONSTANTS + AllNodes + (* a set of all nodes that can act as validators (correct and faulty) *) + +\* the state variables of Blockchain, see Blockchain.tla for the details +VARIABLES refClock, blockchain, Faulty + +\* All the variables of Blockchain. For some reason, BC!vars does not work +bcvars == <> + +(* Create an instance of Blockchain. + We could write EXTENDS Blockchain, but then all the constants and state variables + would be hidden inside the Blockchain module. + *) +ULTIMATE_HEIGHT == TARGET_HEIGHT + 1 + +BC == INSTANCE Blockchain_003_draft WITH + refClock <- refClock, blockchain <- blockchain, Faulty <- Faulty + +(************************** Lite client ************************************) + +(* the heights on which the light client is working *) +HEIGHTS == TRUSTED_HEIGHT..TARGET_HEIGHT + +(* the control states of the lite client *) +States == { "working", "finishedSuccess", "finishedFailure" } + +\* The verification functions are implemented in the API +API == INSTANCE LCVerificationApi_003_draft + + +(* + Initial states of the light client. + Initially, only the trusted light block is present. + *) +LCInit == + /\ \E tm \in Int: + tm >= 0 /\ API!IsLocalClockWithinDrift(tm, refClock) /\ localClock = tm + /\ state = "working" + /\ nextHeight = TARGET_HEIGHT + /\ nprobes = 0 \* no tests have been done so far + /\ LET trustedBlock == blockchain[TRUSTED_HEIGHT] + trustedLightBlock == [header |-> trustedBlock, Commits |-> AllNodes] + IN + \* initially, fetchedLightBlocks is a function of one element, i.e., TRUSTED_HEIGHT + /\ fetchedLightBlocks = [h \in {TRUSTED_HEIGHT} |-> trustedLightBlock] + \* initially, lightBlockStatus is a function of one element, i.e., TRUSTED_HEIGHT + /\ lightBlockStatus = [h \in {TRUSTED_HEIGHT} |-> "StateVerified"] + \* the latest verified block the the trusted block + /\ latestVerified = trustedLightBlock + /\ InitMonitor(trustedLightBlock, trustedLightBlock, localClock, "SUCCESS") + +\* block should contain a copy of the block from the reference chain, with a matching commit +CopyLightBlockFromChain(block, height) == + LET ref == blockchain[height] + lastCommit == + IF height < ULTIMATE_HEIGHT + THEN blockchain[height + 1].lastCommit + \* for the ultimate block, which we never use, as ULTIMATE_HEIGHT = TARGET_HEIGHT + 1 + ELSE blockchain[height].VS + IN + block = [header |-> ref, Commits |-> lastCommit] + +\* Either the primary is correct and the block comes from the reference chain, +\* or the block is produced by a faulty primary. +\* +\* [LCV-FUNC-FETCH.1::TLA.1] +FetchLightBlockInto(block, height) == + IF IS_PRIMARY_CORRECT + THEN CopyLightBlockFromChain(block, height) + ELSE BC!IsLightBlockAllowedByDigitalSignatures(height, block) + +\* add a block into the light store +\* +\* [LCV-FUNC-UPDATE.1::TLA.1] +LightStoreUpdateBlocks(lightBlocks, block) == + LET ht == block.header.height IN + [h \in DOMAIN lightBlocks \union {ht} |-> + IF h = ht THEN block ELSE lightBlocks[h]] + +\* update the state of a light block +\* +\* [LCV-FUNC-UPDATE.1::TLA.1] +LightStoreUpdateStates(statuses, ht, blockState) == + [h \in DOMAIN statuses \union {ht} |-> + IF h = ht THEN blockState ELSE statuses[h]] + +\* Check, whether newHeight is a possible next height for the light client. +\* +\* [LCV-FUNC-SCHEDULE.1::TLA.1] +CanScheduleTo(newHeight, pLatestVerified, pNextHeight, pTargetHeight) == + LET ht == pLatestVerified.header.height IN + \/ /\ ht = pNextHeight + /\ ht < pTargetHeight + /\ pNextHeight < newHeight + /\ newHeight <= pTargetHeight + \/ /\ ht < pNextHeight + /\ ht < pTargetHeight + /\ ht < newHeight + /\ newHeight < pNextHeight + \/ /\ ht = pTargetHeight + /\ newHeight = pTargetHeight + +\* The loop of VerifyToTarget. +\* +\* [LCV-FUNC-MAIN.1::TLA-LOOP.1] +VerifyToTargetLoop == + \* the loop condition is true + /\ latestVerified.header.height < TARGET_HEIGHT + \* pick a light block, which will be constrained later + /\ \E current \in BC!LightBlocks: + \* Get next LightBlock for verification + /\ IF nextHeight \in DOMAIN fetchedLightBlocks + THEN \* copy the block from the light store + /\ current = fetchedLightBlocks[nextHeight] + /\ UNCHANGED fetchedLightBlocks + ELSE \* retrieve a light block and save it in the light store + /\ FetchLightBlockInto(current, nextHeight) + /\ fetchedLightBlocks' = LightStoreUpdateBlocks(fetchedLightBlocks, current) + \* Record that one more probe has been done (for complexity and model checking) + /\ nprobes' = nprobes + 1 + \* Verify the current block + /\ LET verdict == API!ValidAndVerified(latestVerified, current, TRUE) IN + NextMonitor(latestVerified, current, localClock, verdict) /\ + \* Decide whether/how to continue + CASE verdict = "SUCCESS" -> + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateVerified") + /\ latestVerified' = current + /\ state' = + IF latestVerified'.header.height < TARGET_HEIGHT + THEN "working" + ELSE "finishedSuccess" + /\ \E newHeight \in HEIGHTS: + /\ CanScheduleTo(newHeight, current, nextHeight, TARGET_HEIGHT) + /\ nextHeight' = newHeight + + [] verdict = "NOT_ENOUGH_TRUST" -> + (* + do nothing: the light block current passed validation, but the validator + set is too different to verify it. We keep the state of + current at StateUnverified. For a later iteration, Schedule + might decide to try verification of that light block again. + *) + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateUnverified") + /\ \E newHeight \in HEIGHTS: + /\ CanScheduleTo(newHeight, latestVerified, nextHeight, TARGET_HEIGHT) + /\ nextHeight' = newHeight + /\ UNCHANGED <> + + [] OTHER -> + \* verdict is some error code + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateFailed") + /\ state' = "finishedFailure" + /\ UNCHANGED <> + +\* The terminating condition of VerifyToTarget. +\* +\* [LCV-FUNC-MAIN.1::TLA-LOOPCOND.1] +VerifyToTargetDone == + /\ latestVerified.header.height >= TARGET_HEIGHT + /\ state' = "finishedSuccess" + /\ UNCHANGED <> + /\ UNCHANGED <> + +(* + The local and global clocks can be updated. They can also drift from each other. + Note that the local clock can actually go backwards in time. + However, it still stays in the drift envelope + of [refClock - REAL_CLOCK_DRIFT, refClock + REAL_CLOCK_DRIFT]. + *) +AdvanceClocks == + /\ BC!AdvanceTime + /\ \E tm \in Int: + /\ tm >= 0 + /\ API!IsLocalClockWithinDrift(tm, refClock') + /\ localClock' = tm + \* if you like the clock to always grow monotonically, uncomment the next line: + \*/\ localClock' > localClock + +(********************* Lite client + Blockchain *******************) +Init == + \* the blockchain is initialized immediately to the ULTIMATE_HEIGHT + /\ BC!InitToHeight(FAULTY_RATIO) + \* the light client starts + /\ LCInit + +(* + The system step is very simple. + The light client is either executing VerifyToTarget, or it has terminated. + (In the latter case, a model checker reports a deadlock.) + Simultaneously, the global clock may advance. + *) +Next == + /\ state = "working" + /\ VerifyToTargetLoop \/ VerifyToTargetDone + /\ AdvanceClocks + +(************************* Types ******************************************) +TypeOK == + /\ state \in States + /\ localClock \in Nat + /\ refClock \in Nat + /\ nextHeight \in HEIGHTS + /\ latestVerified \in BC!LightBlocks + /\ \E HS \in SUBSET HEIGHTS: + /\ fetchedLightBlocks \in [HS -> BC!LightBlocks] + /\ lightBlockStatus + \in [HS -> {"StateVerified", "StateUnverified", "StateFailed"}] + +(************************* Properties ******************************************) + +(* The properties to check *) +\* this invariant candidate is false +NeverFinish == + state = "working" + +\* this invariant candidate is false +NeverFinishNegative == + state /= "finishedFailure" + +\* This invariant holds true, when the primary is correct. +\* This invariant candidate is false when the primary is faulty. +NeverFinishNegativeWhenTrusted == + BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + => state /= "finishedFailure" + +\* this invariant candidate is false +NeverFinishPositive == + state /= "finishedSuccess" + + +(** + Check that the target height has been reached upon successful termination. + *) +TargetHeightOnSuccessInv == + state = "finishedSuccess" => + /\ TARGET_HEIGHT \in DOMAIN fetchedLightBlocks + /\ lightBlockStatus[TARGET_HEIGHT] = "StateVerified" + +(** + Correctness states that all the obtained headers are exactly like in the blockchain. + + It is always the case that every verified header in LightStore was generated by + an instance of Tendermint consensus. + + [LCV-DIST-SAFE.1::CORRECTNESS-INV.1] + *) +CorrectnessInv == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" => + fetchedLightBlocks[h].header = blockchain[h] + +(** + No faulty block was used to construct a proof. This invariant holds, + only if FAULTY_RATIO < 1/3. + *) +NoTrustOnFaultyBlockInv == + (state = "finishedSuccess" + /\ fetchedLightBlocks[TARGET_HEIGHT].header = blockchain[TARGET_HEIGHT]) + => CorrectnessInv + +(** + Check that the sequence of the headers in storedLightBlocks satisfies ValidAndVerified = "SUCCESS" pairwise + This property is easily violated, whenever a header cannot be trusted anymore. + *) +StoredHeadersAreVerifiedInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: \* for every pair of different stored headers + \/ lh >= rh + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh + \* or we can verify the right one using the left one + \/ "SUCCESS" = API!ValidAndVerified(fetchedLightBlocks[lh], + fetchedLightBlocks[rh], FALSE) + +\* An improved version of StoredHeadersAreVerifiedInv, +\* assuming that a header may be not trusted. +\* This invariant candidate is also violated, +\* as there may be some unverified blocks left in the middle. +\* This property is violated under two conditions: +\* (1) the primary is faulty and there are at least 4 blocks, +\* (2) the primary is correct and there are at least 5 blocks. +StoredHeadersAreVerifiedOrNotTrustedInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: \* for every pair of different stored headers + \/ lh >= rh + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh + \* or we can verify the right one using the left one + \/ "SUCCESS" = API!ValidAndVerified(fetchedLightBlocks[lh], + fetchedLightBlocks[rh], FALSE) + \* or the left header is outside the trusting period, so no guarantees + \/ ~API!InTrustingPeriodLocal(fetchedLightBlocks[lh].header) + +(** + * An improved version of StoredHeadersAreSoundOrNotTrusted, + * checking the property only for the verified headers. + * This invariant holds true if CLOCK_DRIFT <= REAL_CLOCK_DRIFT. + *) +ProofOfChainOfTrustInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: + \* for every pair of stored headers that have been verified + \/ lh >= rh + \/ lightBlockStatus[lh] = "StateUnverified" + \/ lightBlockStatus[rh] = "StateUnverified" + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh /\ lightBlockStatus[mh] = "StateVerified" + \* or the left header is outside the trusting period, so no guarantees + \/ ~(API!InTrustingPeriodLocal(fetchedLightBlocks[lh].header)) + \* or we can verify the right one using the left one + \/ "SUCCESS" = API!ValidAndVerified(fetchedLightBlocks[lh], + fetchedLightBlocks[rh], FALSE) + +(** + * When the light client terminates, there are no failed blocks. (Otherwise, someone lied to us.) + *) +NoFailedBlocksOnSuccessInv == + state = "finishedSuccess" => + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] /= "StateFailed" + +\* This property states that whenever the light client finishes with a positive outcome, +\* the trusted header is still within the trusting period. +\* We expect this property to be violated. And Apalache shows us a counterexample. +PositiveBeforeTrustedHeaderExpires == + (state = "finishedSuccess") => + BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + +\* If the primary is correct and the initial trusted block has not expired, +\* then whenever the algorithm terminates, it reports "success". +\* This property fails. +CorrectPrimaryAndTimeliness == + (BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +(** + If the primary is correct and there is a trusted block that has not expired, + then whenever the algorithm terminates, it reports "success". + This property only holds true, if the local clock is always growing monotonically. + If the local clock can go backwards in the envelope + [refClock - CLOCK_DRIFT, refClock + CLOCK_DRIFT], then the property fails. + + [LCV-DIST-LIVE.1::SUCCESS-CORR-PRIMARY-CHAIN-OF-TRUST.1] + *) +SuccessOnCorrectPrimaryAndChainOfTrustLocal == + (\E h \in DOMAIN fetchedLightBlocks: + /\ lightBlockStatus[h] = "StateVerified" + /\ API!InTrustingPeriodLocal(blockchain[h]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +(** + Similar to SuccessOnCorrectPrimaryAndChainOfTrust, but using the blockchain clock. + It fails because the local clock of the client drifted away, so it rejects a block + that has not expired yet (according to the local clock). + *) +SuccessOnCorrectPrimaryAndChainOfTrustGlobal == + (\E h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" /\ BC!InTrustingPeriod(blockchain[h]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +\* Lite Client Completeness: If header h was correctly generated by an instance +\* of Tendermint consensus (and its age is less than the trusting period), +\* then the lite client should eventually set trust(h) to true. +\* +\* Note that Completeness assumes that the lite client communicates with a correct full node. +\* +\* We decompose completeness into Termination (liveness) and Precision (safety). +\* Once again, Precision is an inverse version of the safety property in Completeness, +\* as A => B is logically equivalent to ~B => ~A. +\* +\* This property holds only when CLOCK_DRIFT = 0 and REAL_CLOCK_DRIFT = 0. +PrecisionInv == + (state = "finishedFailure") + => \/ ~BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) \* outside of the trusting period + \/ \E h \in DOMAIN fetchedLightBlocks: + LET lightBlock == fetchedLightBlocks[h] IN + \* the full node lied to the lite client about the block header + \/ lightBlock.header /= blockchain[h] + \* the full node lied to the lite client about the commits + \/ lightBlock.Commits /= lightBlock.header.VS + +\* the old invariant that was found to be buggy by TLC +PrecisionBuggyInv == + (state = "finishedFailure") + => \/ ~BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) \* outside of the trusting period + \/ \E h \in DOMAIN fetchedLightBlocks: + LET lightBlock == fetchedLightBlocks[h] IN + \* the full node lied to the lite client about the block header + lightBlock.header /= blockchain[h] + +\* the worst complexity +Complexity == + LET N == TARGET_HEIGHT - TRUSTED_HEIGHT + 1 IN + state /= "working" => + (2 * nprobes <= N * (N - 1)) + +(** + If the light client has terminated, then the expected postcondition holds true. + *) +ApiPostInv == + state /= "working" => + API!VerifyToTargetPost(blockchain, IS_PRIMARY_CORRECT, + fetchedLightBlocks, lightBlockStatus, + TRUSTED_HEIGHT, TARGET_HEIGHT, state) + +(* + We omit termination, as the algorithm deadlocks in the end. + So termination can be demonstrated by finding a deadlock. + Of course, one has to analyze the deadlocked state and see that + the algorithm has indeed terminated there. +*) +============================================================================= +\* Modification History +\* Last modified Fri Jun 26 12:08:28 CEST 2020 by igor +\* Created Wed Oct 02 16:39:42 CEST 2019 by igor diff --git a/spec/light-client/verification/Lightclient_A_1.tla b/spec/light-client/verification/Lightclient_A_1.tla new file mode 100644 index 000000000..70e6caf00 --- /dev/null +++ b/spec/light-client/verification/Lightclient_A_1.tla @@ -0,0 +1,440 @@ +-------------------------- MODULE Lightclient_A_1 ---------------------------- +(** + * A state-machine specification of the lite client, following the English spec: + * + * ./verification_001_published.md + *) + +EXTENDS Integers, FiniteSets + +\* the parameters of Light Client +CONSTANTS + TRUSTED_HEIGHT, + (* an index of the block header that the light client trusts by social consensus *) + TARGET_HEIGHT, + (* an index of the block header that the light client tries to verify *) + TRUSTING_PERIOD, + (* the period within which the validators are trusted *) + IS_PRIMARY_CORRECT + (* is primary correct? *) + +VARIABLES (* see TypeOK below for the variable types *) + state, (* the current state of the light client *) + nextHeight, (* the next height to explore by the light client *) + nprobes (* the lite client iteration, or the number of block tests *) + +(* the light store *) +VARIABLES + fetchedLightBlocks, (* a function from heights to LightBlocks *) + lightBlockStatus, (* a function from heights to block statuses *) + latestVerified (* the latest verified block *) + +(* the variables of the lite client *) +lcvars == <> + +(******************* Blockchain instance ***********************************) + +\* the parameters that are propagated into Blockchain +CONSTANTS + AllNodes + (* a set of all nodes that can act as validators (correct and faulty) *) + +\* the state variables of Blockchain, see Blockchain.tla for the details +VARIABLES now, blockchain, Faulty + +\* All the variables of Blockchain. For some reason, BC!vars does not work +bcvars == <> + +(* Create an instance of Blockchain. + We could write EXTENDS Blockchain, but then all the constants and state variables + would be hidden inside the Blockchain module. + *) +ULTIMATE_HEIGHT == TARGET_HEIGHT + 1 + +BC == INSTANCE Blockchain_A_1 WITH + now <- now, blockchain <- blockchain, Faulty <- Faulty + +(************************** Lite client ************************************) + +(* the heights on which the light client is working *) +HEIGHTS == TRUSTED_HEIGHT..TARGET_HEIGHT + +(* the control states of the lite client *) +States == { "working", "finishedSuccess", "finishedFailure" } + +(** + Check the precondition of ValidAndVerified. + + [LCV-FUNC-VALID.1::TLA-PRE.1] + *) +ValidAndVerifiedPre(trusted, untrusted) == + LET thdr == trusted.header + uhdr == untrusted.header + IN + /\ BC!InTrustingPeriod(thdr) + /\ thdr.height < uhdr.height + \* the trusted block has been created earlier (no drift here) + /\ thdr.time <= uhdr.time + /\ untrusted.Commits \subseteq uhdr.VS + /\ LET TP == Cardinality(uhdr.VS) + SP == Cardinality(untrusted.Commits) + IN + 3 * SP > 2 * TP + /\ thdr.height + 1 = uhdr.height => thdr.NextVS = uhdr.VS + (* As we do not have explicit hashes we ignore these three checks of the English spec: + + 1. "trusted.Commit is a commit is for the header trusted.Header, + i.e. it contains the correct hash of the header". + 2. untrusted.Validators = hash(untrusted.Header.Validators) + 3. untrusted.NextValidators = hash(untrusted.Header.NextValidators) + *) + +(** + * Check that the commits in an untrusted block form 1/3 of the next validators + * in a trusted header. + *) +SignedByOneThirdOfTrusted(trusted, untrusted) == + LET TP == Cardinality(trusted.header.NextVS) + SP == Cardinality(untrusted.Commits \intersect trusted.header.NextVS) + IN + 3 * SP > TP + +(** + Check, whether an untrusted block is valid and verifiable w.r.t. a trusted header. + + [LCV-FUNC-VALID.1::TLA.1] + *) +ValidAndVerified(trusted, untrusted) == + IF ~ValidAndVerifiedPre(trusted, untrusted) + THEN "FAILED_VERIFICATION" + ELSE IF ~BC!InTrustingPeriod(untrusted.header) + (* We leave the following test for the documentation purposes. + The implementation should do this test, as signature verification may be slow. + In the TLA+ specification, ValidAndVerified happens in no time. + *) + THEN "FAILED_TRUSTING_PERIOD" + ELSE IF untrusted.header.height = trusted.header.height + 1 + \/ SignedByOneThirdOfTrusted(trusted, untrusted) + THEN "OK" + ELSE "CANNOT_VERIFY" + +(* + Initial states of the light client. + Initially, only the trusted light block is present. + *) +LCInit == + /\ state = "working" + /\ nextHeight = TARGET_HEIGHT + /\ nprobes = 0 \* no tests have been done so far + /\ LET trustedBlock == blockchain[TRUSTED_HEIGHT] + trustedLightBlock == [header |-> trustedBlock, Commits |-> AllNodes] + IN + \* initially, fetchedLightBlocks is a function of one element, i.e., TRUSTED_HEIGHT + /\ fetchedLightBlocks = [h \in {TRUSTED_HEIGHT} |-> trustedLightBlock] + \* initially, lightBlockStatus is a function of one element, i.e., TRUSTED_HEIGHT + /\ lightBlockStatus = [h \in {TRUSTED_HEIGHT} |-> "StateVerified"] + \* the latest verified block the the trusted block + /\ latestVerified = trustedLightBlock + +\* block should contain a copy of the block from the reference chain, with a matching commit +CopyLightBlockFromChain(block, height) == + LET ref == blockchain[height] + lastCommit == + IF height < ULTIMATE_HEIGHT + THEN blockchain[height + 1].lastCommit + \* for the ultimate block, which we never use, as ULTIMATE_HEIGHT = TARGET_HEIGHT + 1 + ELSE blockchain[height].VS + IN + block = [header |-> ref, Commits |-> lastCommit] + +\* Either the primary is correct and the block comes from the reference chain, +\* or the block is produced by a faulty primary. +\* +\* [LCV-FUNC-FETCH.1::TLA.1] +FetchLightBlockInto(block, height) == + IF IS_PRIMARY_CORRECT + THEN CopyLightBlockFromChain(block, height) + ELSE BC!IsLightBlockAllowedByDigitalSignatures(height, block) + +\* add a block into the light store +\* +\* [LCV-FUNC-UPDATE.1::TLA.1] +LightStoreUpdateBlocks(lightBlocks, block) == + LET ht == block.header.height IN + [h \in DOMAIN lightBlocks \union {ht} |-> + IF h = ht THEN block ELSE lightBlocks[h]] + +\* update the state of a light block +\* +\* [LCV-FUNC-UPDATE.1::TLA.1] +LightStoreUpdateStates(statuses, ht, blockState) == + [h \in DOMAIN statuses \union {ht} |-> + IF h = ht THEN blockState ELSE statuses[h]] + +\* Check, whether newHeight is a possible next height for the light client. +\* +\* [LCV-FUNC-SCHEDULE.1::TLA.1] +CanScheduleTo(newHeight, pLatestVerified, pNextHeight, pTargetHeight) == + LET ht == pLatestVerified.header.height IN + \/ /\ ht = pNextHeight + /\ ht < pTargetHeight + /\ pNextHeight < newHeight + /\ newHeight <= pTargetHeight + \/ /\ ht < pNextHeight + /\ ht < pTargetHeight + /\ ht < newHeight + /\ newHeight < pNextHeight + \/ /\ ht = pTargetHeight + /\ newHeight = pTargetHeight + +\* The loop of VerifyToTarget. +\* +\* [LCV-FUNC-MAIN.1::TLA-LOOP.1] +VerifyToTargetLoop == + \* the loop condition is true + /\ latestVerified.header.height < TARGET_HEIGHT + \* pick a light block, which will be constrained later + /\ \E current \in BC!LightBlocks: + \* Get next LightBlock for verification + /\ IF nextHeight \in DOMAIN fetchedLightBlocks + THEN \* copy the block from the light store + /\ current = fetchedLightBlocks[nextHeight] + /\ UNCHANGED fetchedLightBlocks + ELSE \* retrieve a light block and save it in the light store + /\ FetchLightBlockInto(current, nextHeight) + /\ fetchedLightBlocks' = LightStoreUpdateBlocks(fetchedLightBlocks, current) + \* Record that one more probe has been done (for complexity and model checking) + /\ nprobes' = nprobes + 1 + \* Verify the current block + /\ LET verdict == ValidAndVerified(latestVerified, current) IN + \* Decide whether/how to continue + CASE verdict = "OK" -> + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateVerified") + /\ latestVerified' = current + /\ state' = + IF latestVerified'.header.height < TARGET_HEIGHT + THEN "working" + ELSE "finishedSuccess" + /\ \E newHeight \in HEIGHTS: + /\ CanScheduleTo(newHeight, current, nextHeight, TARGET_HEIGHT) + /\ nextHeight' = newHeight + + [] verdict = "CANNOT_VERIFY" -> + (* + do nothing: the light block current passed validation, but the validator + set is too different to verify it. We keep the state of + current at StateUnverified. For a later iteration, Schedule + might decide to try verification of that light block again. + *) + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateUnverified") + /\ \E newHeight \in HEIGHTS: + /\ CanScheduleTo(newHeight, latestVerified, nextHeight, TARGET_HEIGHT) + /\ nextHeight' = newHeight + /\ UNCHANGED <> + + [] OTHER -> + \* verdict is some error code + /\ lightBlockStatus' = LightStoreUpdateStates(lightBlockStatus, nextHeight, "StateFailed") + /\ state' = "finishedFailure" + /\ UNCHANGED <> + +\* The terminating condition of VerifyToTarget. +\* +\* [LCV-FUNC-MAIN.1::TLA-LOOPCOND.1] +VerifyToTargetDone == + /\ latestVerified.header.height >= TARGET_HEIGHT + /\ state' = "finishedSuccess" + /\ UNCHANGED <> + +(********************* Lite client + Blockchain *******************) +Init == + \* the blockchain is initialized immediately to the ULTIMATE_HEIGHT + /\ BC!InitToHeight + \* the light client starts + /\ LCInit + +(* + The system step is very simple. + The light client is either executing VerifyToTarget, or it has terminated. + (In the latter case, a model checker reports a deadlock.) + Simultaneously, the global clock may advance. + *) +Next == + /\ state = "working" + /\ VerifyToTargetLoop \/ VerifyToTargetDone + /\ BC!AdvanceTime \* the global clock is advanced by zero or more time units + +(************************* Types ******************************************) +TypeOK == + /\ state \in States + /\ nextHeight \in HEIGHTS + /\ latestVerified \in BC!LightBlocks + /\ \E HS \in SUBSET HEIGHTS: + /\ fetchedLightBlocks \in [HS -> BC!LightBlocks] + /\ lightBlockStatus + \in [HS -> {"StateVerified", "StateUnverified", "StateFailed"}] + +(************************* Properties ******************************************) + +(* The properties to check *) +\* this invariant candidate is false +NeverFinish == + state = "working" + +\* this invariant candidate is false +NeverFinishNegative == + state /= "finishedFailure" + +\* This invariant holds true, when the primary is correct. +\* This invariant candidate is false when the primary is faulty. +NeverFinishNegativeWhenTrusted == + (*(minTrustedHeight <= TRUSTED_HEIGHT)*) + BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + => state /= "finishedFailure" + +\* this invariant candidate is false +NeverFinishPositive == + state /= "finishedSuccess" + +(** + Correctness states that all the obtained headers are exactly like in the blockchain. + + It is always the case that every verified header in LightStore was generated by + an instance of Tendermint consensus. + + [LCV-DIST-SAFE.1::CORRECTNESS-INV.1] + *) +CorrectnessInv == + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" => + fetchedLightBlocks[h].header = blockchain[h] + +(** + Check that the sequence of the headers in storedLightBlocks satisfies ValidAndVerified = "OK" pairwise + This property is easily violated, whenever a header cannot be trusted anymore. + *) +StoredHeadersAreVerifiedInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: \* for every pair of different stored headers + \/ lh >= rh + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh + \* or we can verify the right one using the left one + \/ "OK" = ValidAndVerified(fetchedLightBlocks[lh], fetchedLightBlocks[rh]) + +\* An improved version of StoredHeadersAreSound, assuming that a header may be not trusted. +\* This invariant candidate is also violated, +\* as there may be some unverified blocks left in the middle. +StoredHeadersAreVerifiedOrNotTrustedInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: \* for every pair of different stored headers + \/ lh >= rh + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh + \* or we can verify the right one using the left one + \/ "OK" = ValidAndVerified(fetchedLightBlocks[lh], fetchedLightBlocks[rh]) + \* or the left header is outside the trusting period, so no guarantees + \/ ~BC!InTrustingPeriod(fetchedLightBlocks[lh].header) + +(** + * An improved version of StoredHeadersAreSoundOrNotTrusted, + * checking the property only for the verified headers. + * This invariant holds true. + *) +ProofOfChainOfTrustInv == + state = "finishedSuccess" + => + \A lh, rh \in DOMAIN fetchedLightBlocks: + \* for every pair of stored headers that have been verified + \/ lh >= rh + \/ lightBlockStatus[lh] = "StateUnverified" + \/ lightBlockStatus[rh] = "StateUnverified" + \* either there is a header between them + \/ \E mh \in DOMAIN fetchedLightBlocks: + lh < mh /\ mh < rh /\ lightBlockStatus[mh] = "StateVerified" + \* or the left header is outside the trusting period, so no guarantees + \/ ~(BC!InTrustingPeriod(fetchedLightBlocks[lh].header)) + \* or we can verify the right one using the left one + \/ "OK" = ValidAndVerified(fetchedLightBlocks[lh], fetchedLightBlocks[rh]) + +(** + * When the light client terminates, there are no failed blocks. (Otherwise, someone lied to us.) + *) +NoFailedBlocksOnSuccessInv == + state = "finishedSuccess" => + \A h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] /= "StateFailed" + +\* This property states that whenever the light client finishes with a positive outcome, +\* the trusted header is still within the trusting period. +\* We expect this property to be violated. And Apalache shows us a counterexample. +PositiveBeforeTrustedHeaderExpires == + (state = "finishedSuccess") => BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + +\* If the primary is correct and the initial trusted block has not expired, +\* then whenever the algorithm terminates, it reports "success" +CorrectPrimaryAndTimeliness == + (BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +(** + If the primary is correct and there is a trusted block that has not expired, + then whenever the algorithm terminates, it reports "success". + + [LCV-DIST-LIVE.1::SUCCESS-CORR-PRIMARY-CHAIN-OF-TRUST.1] + *) +SuccessOnCorrectPrimaryAndChainOfTrust == + (\E h \in DOMAIN fetchedLightBlocks: + lightBlockStatus[h] = "StateVerified" /\ BC!InTrustingPeriod(blockchain[h]) + /\ state /= "working" /\ IS_PRIMARY_CORRECT) => + state = "finishedSuccess" + +\* Lite Client Completeness: If header h was correctly generated by an instance +\* of Tendermint consensus (and its age is less than the trusting period), +\* then the lite client should eventually set trust(h) to true. +\* +\* Note that Completeness assumes that the lite client communicates with a correct full node. +\* +\* We decompose completeness into Termination (liveness) and Precision (safety). +\* Once again, Precision is an inverse version of the safety property in Completeness, +\* as A => B is logically equivalent to ~B => ~A. +PrecisionInv == + (state = "finishedFailure") + => \/ ~BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) \* outside of the trusting period + \/ \E h \in DOMAIN fetchedLightBlocks: + LET lightBlock == fetchedLightBlocks[h] IN + \* the full node lied to the lite client about the block header + \/ lightBlock.header /= blockchain[h] + \* the full node lied to the lite client about the commits + \/ lightBlock.Commits /= lightBlock.header.VS + +\* the old invariant that was found to be buggy by TLC +PrecisionBuggyInv == + (state = "finishedFailure") + => \/ ~BC!InTrustingPeriod(blockchain[TRUSTED_HEIGHT]) \* outside of the trusting period + \/ \E h \in DOMAIN fetchedLightBlocks: + LET lightBlock == fetchedLightBlocks[h] IN + \* the full node lied to the lite client about the block header + lightBlock.header /= blockchain[h] + +\* the worst complexity +Complexity == + LET N == TARGET_HEIGHT - TRUSTED_HEIGHT + 1 IN + state /= "working" => + (2 * nprobes <= N * (N - 1)) + +(* + We omit termination, as the algorithm deadlocks in the end. + So termination can be demonstrated by finding a deadlock. + Of course, one has to analyze the deadlocked state and see that + the algorithm has indeed terminated there. +*) +============================================================================= +\* Modification History +\* Last modified Fri Jun 26 12:08:28 CEST 2020 by igor +\* Created Wed Oct 02 16:39:42 CEST 2019 by igor diff --git a/spec/light-client/verification/MC4_3_correct.tla b/spec/light-client/verification/MC4_3_correct.tla new file mode 100644 index 000000000..a27d8de05 --- /dev/null +++ b/spec/light-client/verification/MC4_3_correct.tla @@ -0,0 +1,26 @@ +---------------------------- MODULE MC4_3_correct --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 3 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == TRUE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================== diff --git a/spec/light-client/verification/MC4_3_faulty.tla b/spec/light-client/verification/MC4_3_faulty.tla new file mode 100644 index 000000000..74b278900 --- /dev/null +++ b/spec/light-client/verification/MC4_3_faulty.tla @@ -0,0 +1,26 @@ +---------------------------- MODULE MC4_3_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 3 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================== diff --git a/spec/light-client/verification/MC4_4_correct.tla b/spec/light-client/verification/MC4_4_correct.tla new file mode 100644 index 000000000..0a8d96b59 --- /dev/null +++ b/spec/light-client/verification/MC4_4_correct.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC4_4_correct --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == TRUE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC4_4_correct_drifted.tla b/spec/light-client/verification/MC4_4_correct_drifted.tla new file mode 100644 index 000000000..7fefe349e --- /dev/null +++ b/spec/light-client/verification/MC4_4_correct_drifted.tla @@ -0,0 +1,26 @@ +---------------------- MODULE MC4_4_correct_drifted --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 30 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == TRUE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================== diff --git a/spec/light-client/verification/MC4_4_faulty.tla b/spec/light-client/verification/MC4_4_faulty.tla new file mode 100644 index 000000000..167fa61fb --- /dev/null +++ b/spec/light-client/verification/MC4_4_faulty.tla @@ -0,0 +1,26 @@ +---------------------------- MODULE MC4_4_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================== diff --git a/spec/light-client/verification/MC4_4_faulty_drifted.tla b/spec/light-client/verification/MC4_4_faulty_drifted.tla new file mode 100644 index 000000000..e37c3cb40 --- /dev/null +++ b/spec/light-client/verification/MC4_4_faulty_drifted.tla @@ -0,0 +1,26 @@ +---------------------- MODULE MC4_4_faulty_drifted --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 30 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================== diff --git a/spec/light-client/verification/MC4_5_correct.tla b/spec/light-client/verification/MC4_5_correct.tla new file mode 100644 index 000000000..cffb22cc8 --- /dev/null +++ b/spec/light-client/verification/MC4_5_correct.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC4_5_correct --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == TRUE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC4_5_faulty.tla b/spec/light-client/verification/MC4_5_faulty.tla new file mode 100644 index 000000000..3d3a00290 --- /dev/null +++ b/spec/light-client/verification/MC4_5_faulty.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC4_5_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +IS_PRICLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +MARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC4_6_faulty.tla b/spec/light-client/verification/MC4_6_faulty.tla new file mode 100644 index 000000000..64f164854 --- /dev/null +++ b/spec/light-client/verification/MC4_6_faulty.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC4_6_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 6 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +IS_PRCLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC4_7_faulty.tla b/spec/light-client/verification/MC4_7_faulty.tla new file mode 100644 index 000000000..dc6a94eb1 --- /dev/null +++ b/spec/light-client/verification/MC4_7_faulty.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC4_7_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 7 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC5_5_correct.tla b/spec/light-client/verification/MC5_5_correct.tla new file mode 100644 index 000000000..00b4151f7 --- /dev/null +++ b/spec/light-client/verification/MC5_5_correct.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC5_5_correct --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == TRUE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC5_5_correct_peer_two_thirds_faulty.tla b/spec/light-client/verification/MC5_5_correct_peer_two_thirds_faulty.tla new file mode 100644 index 000000000..d4212032f --- /dev/null +++ b/spec/light-client/verification/MC5_5_correct_peer_two_thirds_faulty.tla @@ -0,0 +1,26 @@ +------------------- MODULE MC5_5_correct_peer_two_thirds_faulty ---------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == TRUE +FAULTY_RATIO == <<2, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC5_5_faulty.tla b/spec/light-client/verification/MC5_5_faulty.tla new file mode 100644 index 000000000..f63d175a1 --- /dev/null +++ b/spec/light-client/verification/MC5_5_faulty.tla @@ -0,0 +1,26 @@ +----------------- MODULE MC5_5_faulty --------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<2, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC5_5_faulty_peer_two_thirds_faulty.tla b/spec/light-client/verification/MC5_5_faulty_peer_two_thirds_faulty.tla new file mode 100644 index 000000000..ef9974d06 --- /dev/null +++ b/spec/light-client/verification/MC5_5_faulty_peer_two_thirds_faulty.tla @@ -0,0 +1,26 @@ +----------------- MODULE MC5_5_faulty_peer_two_thirds_faulty --------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<2, 3>> \* < 2 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC5_7_faulty.tla b/spec/light-client/verification/MC5_7_faulty.tla new file mode 100644 index 000000000..63461b0c8 --- /dev/null +++ b/spec/light-client/verification/MC5_7_faulty.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC5_7_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 7 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC7_5_faulty.tla b/spec/light-client/verification/MC7_5_faulty.tla new file mode 100644 index 000000000..860f9c0aa --- /dev/null +++ b/spec/light-client/verification/MC7_5_faulty.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC7_5_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5", "n6", "n7"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 5 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/MC7_7_faulty.tla b/spec/light-client/verification/MC7_7_faulty.tla new file mode 100644 index 000000000..79e328f14 --- /dev/null +++ b/spec/light-client/verification/MC7_7_faulty.tla @@ -0,0 +1,26 @@ +------------------------- MODULE MC7_7_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4", "n5", "n6", "n7"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 7 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +CLOCK_DRIFT == 10 \* how much we assume the local clock is drifting +REAL_CLOCK_DRIFT == 3 \* how much the local clock is actually drifting +IS_PRIMARY_CORRECT == FALSE +FAULTY_RATIO == <<1, 3>> \* < 1 / 3 faulty validators + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + nprobes, + localClock, + refClock, blockchain, Faulty + +(* the light client previous state components, used for monitoring *) +VARIABLES + prevVerified, + prevCurrent, + prevLocalClock, + prevVerdict + +INSTANCE Lightclient_003_draft +============================================================================ diff --git a/spec/light-client/verification/README.md b/spec/light-client/verification/README.md new file mode 100644 index 000000000..dd9e0bb21 --- /dev/null +++ b/spec/light-client/verification/README.md @@ -0,0 +1,577 @@ +--- +order: 1 +parent: + title: Verification + order: 2 +--- +# Core Verification + +## Problem statement + +We assume that the light client knows a (base) header `inithead` it trusts (by social consensus or because +the light client has decided to trust the header before). The goal is to check whether another header +`newhead` can be trusted based on the data in `inithead`. + +The correctness of the protocol is based on the assumption that `inithead` was generated by an instance of +Tendermint consensus. + +### Failure Model + +For the purpose of the following definitions we assume that there exists a function +`validators` that returns the corresponding validator set for the given hash. + +The light client protocol is defined with respect to the following failure model: + +Given a known bound `TRUSTED_PERIOD`, and a block `b` with header `h` generated at time `Time` +(i.e. `h.Time = Time`), a set of validators that hold more than 2/3 of the voting power +in `validators(b.Header.NextValidatorsHash)` is correct until time `b.Header.Time + TRUSTED_PERIOD`. + +*Assumption*: "correct" is defined w.r.t. realtime (some Newtonian global notion of time, i.e., wall time), +while `Header.Time` corresponds to the [BFT time](../consensus/bft-time.md). In this note, we assume that clocks of correct processes +are synchronized (for example using NTP), and therefore there is bounded clock drift (`CLOCK_DRIFT`) between local clocks and +BFT time. More precisely, for every correct light client process and every `header.Time` (i.e. BFT Time, for a header correctly +generated by the Tendermint consensus), the following inequality holds: `Header.Time < now + CLOCK_DRIFT`, +where `now` corresponds to the system clock at the light client process. + +Furthermore, we assume that `TRUSTED_PERIOD` is (several) order of magnitude bigger than `CLOCK_DRIFT` (`TRUSTED_PERIOD >> CLOCK_DRIFT`), +as `CLOCK_DRIFT` (using NTP) is in the order of milliseconds and `TRUSTED_PERIOD` is in the order of weeks. + +We expect a light client process defined in this document to be used in the context in which there is some +larger period during which misbehaving validators can be detected and punished (we normally refer to it as `UNBONDING_PERIOD` +due to the "bonding" mechanism in modern proof of stake systems). Furthermore, we assume that +`TRUSTED_PERIOD < UNBONDING_PERIOD` and that they are normally of the same order of magnitude, for example +`TRUSTED_PERIOD = UNBONDING_PERIOD / 2`. + +The specification in this document considers an implementation of the light client under the Failure Model defined above. +Mechanisms like `fork accountability` and `evidence submission` are defined in the context of `UNBONDING_PERIOD` and +they incentivize validators to follow the protocol specification defined in this document. If they don't, +and we have 1/3 (or more) faulty validators, safety may be violated. Our approach then is +to *detect* these cases (after the fact), and take suitable repair actions (automatic and social). +This is discussed in document on [Fork accountability](./accountability.md). + +The term "trusted" above indicates that the correctness of the protocol depends on +this assumption. It is in the responsibility of the user that runs the light client to make sure that the risk +of trusting a corrupted/forged `inithead` is negligible. + +*Remark*: This failure model might change to a hybrid version that takes heights into account in the future. + +### High Level Solution + +Upon initialization, the light client is given a header `inithead` it trusts (by +social consensus). When a light clients sees a new signed header `snh`, it has to decide whether to trust the new +header. Trust can be obtained by (possibly) the combination of three methods. + +1. **Uninterrupted sequence of headers.** Given a trusted header `h` and an untrusted header `h1`, +the light client trusts a header `h1` if it trusts all headers in between `h` and `h1`. + +2. **Trusted period.** Given a trusted header `h`, an untrusted header `h1 > h` and `TRUSTED_PERIOD` during which +the failure model holds, we can check whether at least one validator, that has been continuously correct +from `h.Time` until now, has signed `h1`. If this is the case, we can trust `h1`. + +3. **Bisection.** If a check according to 2. (trusted period) fails, the light client can try to +obtain a header `hp` whose height lies between `h` and `h1` in order to check whether `h` can be used to +get trust for `hp`, and `hp` can be used to get trust for `snh`. If this is the case we can trust `h1`; +if not, we continue recursively until either we found set of headers that can build (transitively) trust relation +between `h` and `h1`, or we failed as two consecutive headers don't verify against each other. + +## Definitions + +### Data structures + +In the following, only the details of the data structures needed for this specification are given. + + ```go + type Header struct { + Height int64 + Time Time // the chain time when the header (block) was generated + + LastBlockID BlockID // prev block info + ValidatorsHash []byte // hash of the validators for the current block + NextValidatorsHash []byte // hash of the validators for the next block + } + + type SignedHeader struct { + Header Header + Commit Commit // commit for the given header + } + + type ValidatorSet struct { + Validators []Validator + TotalVotingPower int64 + } + + type Validator struct { + Address Address // validator address (we assume validator's addresses are unique) + VotingPower int64 // validator's voting power + } + + type TrustedState { + SignedHeader SignedHeader + ValidatorSet ValidatorSet + } + ``` + +### Functions + +For the purpose of this light client specification, we assume that the Tendermint Full Node +exposes the following functions over Tendermint RPC: + +```go + // returns signed header: Header with Commit, for the given height + func Commit(height int64) (SignedHeader, error) + + // returns validator set for the given height + func Validators(height int64) (ValidatorSet, error) +``` + +Furthermore, we assume the following auxiliary functions: + +```go + // returns true if the commit is for the header, ie. if it contains + // the correct hash of the header; otherwise false + func matchingCommit(header Header, commit Commit) bool + + // returns the set of validators from the given validator set that + // committed the block (that correctly signed the block) + // it assumes signature verification so it can be computationally expensive + func signers(commit Commit, validatorSet ValidatorSet) []Validator + + // returns the voting power the validators in v1 have according to their voting power in set v2 + // it does not assume signature verification + func votingPowerIn(v1 []Validator, v2 ValidatorSet) int64 + + // returns hash of the given validator set + func hash(v2 ValidatorSet) []byte +``` + +In the functions below we will be using `trustThreshold` as a parameter. For simplicity +we assume that `trustThreshold` is a float between `1/3` and `2/3` and we will not be checking it +in the pseudo-code. + +**VerifySingle.** The function `VerifySingle` attempts to validate given untrusted header and the corresponding validator sets +based on a given trusted state. It ensures that the trusted state is still within its trusted period, +and that the untrusted header is within assumed `clockDrift` bound of the passed time `now`. +Note that this function is not making external (RPC) calls to the full node; the whole logic is +based on the local (given) state. This function is supposed to be used by the IBC handlers. + +```go +func VerifySingle(untrustedSh SignedHeader, + untrustedVs ValidatorSet, + untrustedNextVs ValidatorSet, + trustedState TrustedState, + trustThreshold float, + trustingPeriod Duration, + clockDrift Duration, + now Time) (TrustedState, error) { + + if untrustedSh.Header.Time > now + clockDrift { + return (trustedState, ErrInvalidHeaderTime) + } + + trustedHeader = trustedState.SignedHeader.Header + if !isWithinTrustedPeriod(trustedHeader, trustingPeriod, now) { + return (state, ErrHeaderNotWithinTrustedPeriod) + } + + // we assume that time it takes to execute verifySingle function + // is several order of magnitudes smaller than trustingPeriod + error = verifySingle( + trustedState, + untrustedSh, + untrustedVs, + untrustedNextVs, + trustThreshold) + + if error != nil return (state, error) + + // the untrusted header is now trusted + newTrustedState = TrustedState(untrustedSh, untrustedNextVs) + return (newTrustedState, nil) +} + +// return true if header is within its light client trusted period; otherwise returns false +func isWithinTrustedPeriod(header Header, + trustingPeriod Duration, + now Time) bool { + + return header.Time + trustedPeriod > now +} +``` + +Note that in case `VerifySingle` returns without an error (untrusted header +is successfully verified) then we have a guarantee that the transition of the trust +from `trustedState` to `newTrustedState` happened during the trusted period of +`trustedState.SignedHeader.Header`. + +TODO: Explain what happens in case `VerifySingle` returns with an error. + +**verifySingle.** The function `verifySingle` verifies a single untrusted header +against a given trusted state. It includes all validations and signature verification. +It is not publicly exposed since it does not check for header expiry (time constraints) +and hence it's possible to use it incorrectly. + +```go +func verifySingle(trustedState TrustedState, + untrustedSh SignedHeader, + untrustedVs ValidatorSet, + untrustedNextVs ValidatorSet, + trustThreshold float) error { + + untrustedHeader = untrustedSh.Header + untrustedCommit = untrustedSh.Commit + + trustedHeader = trustedState.SignedHeader.Header + trustedVs = trustedState.ValidatorSet + + if trustedHeader.Height >= untrustedHeader.Height return ErrNonIncreasingHeight + if trustedHeader.Time >= untrustedHeader.Time return ErrNonIncreasingTime + + // validate the untrusted header against its commit, vals, and next_vals + error = validateSignedHeaderAndVals(untrustedSh, untrustedVs, untrustedNextVs) + if error != nil return error + + // check for adjacent headers + if untrustedHeader.Height == trustedHeader.Height + 1 { + if trustedHeader.NextValidatorsHash != untrustedHeader.ValidatorsHash { + return ErrInvalidAdjacentHeaders + } + } else { + error = verifyCommitTrusting(trustedVs, untrustedCommit, untrustedVs, trustThreshold) + if error != nil return error + } + + // verify the untrusted commit + return verifyCommitFull(untrustedVs, untrustedCommit) +} + +// returns nil if header and validator sets are consistent; otherwise returns error +func validateSignedHeaderAndVals(signedHeader SignedHeader, vs ValidatorSet, nextVs ValidatorSet) error { + header = signedHeader.Header + if hash(vs) != header.ValidatorsHash return ErrInvalidValidatorSet + if hash(nextVs) != header.NextValidatorsHash return ErrInvalidNextValidatorSet + if !matchingCommit(header, signedHeader.Commit) return ErrInvalidCommitValue + return nil +} + +// returns nil if at least single correst signer signed the commit; otherwise returns error +func verifyCommitTrusting(trustedVs ValidatorSet, + commit Commit, + untrustedVs ValidatorSet, + trustLevel float) error { + + totalPower := trustedVs.TotalVotingPower + signedPower := votingPowerIn(signers(commit, untrustedVs), trustedVs) + + // check that the signers account for more than max(1/3, trustLevel) of the voting power + // this ensures that there is at least single correct validator in the set of signers + if signedPower < max(1/3, trustLevel) * totalPower return ErrInsufficientVotingPower + return nil +} + +// returns nil if commit is signed by more than 2/3 of voting power of the given validator set +// return error otherwise +func verifyCommitFull(vs ValidatorSet, commit Commit) error { + totalPower := vs.TotalVotingPower; + signedPower := votingPowerIn(signers(commit, vs), vs) + + // check the signers account for +2/3 of the voting power + if signedPower * 3 <= totalPower * 2 return ErrInvalidCommit + return nil +} +``` + +**VerifyHeaderAtHeight.** The function `VerifyHeaderAtHeight` captures high level +logic, i.e., application call to the light client module to download and verify header +for some height. + +```go +func VerifyHeaderAtHeight(untrustedHeight int64, + trustedState TrustedState, + trustThreshold float, + trustingPeriod Duration, + clockDrift Duration) (TrustedState, error)) { + + trustedHeader := trustedState.SignedHeader.Header + + now := System.Time() + if !isWithinTrustedPeriod(trustedHeader, trustingPeriod, now) { + return (trustedState, ErrHeaderNotWithinTrustedPeriod) + } + + newTrustedState, err := VerifyBisection(untrustedHeight, + trustedState, + trustThreshold, + trustingPeriod, + clockDrift, + now) + + if err != nil return (trustedState, err) + + now = System.Time() + if !isWithinTrustedPeriod(trustedHeader, trustingPeriod, now) { + return (trustedState, ErrHeaderNotWithinTrustedPeriod) + } + + return (newTrustedState, err) +} +``` + +Note that in case `VerifyHeaderAtHeight` returns without an error (untrusted header +is successfully verified) then we have a guarantee that the transition of the trust +from `trustedState` to `newTrustedState` happened during the trusted period of +`trustedState.SignedHeader.Header`. + +In case `VerifyHeaderAtHeight` returns with an error, then either (i) the full node we are talking to is faulty +or (ii) the trusted header has expired (it is outside its trusted period). In case (i) the full node is faulty so +light client should disconnect and reinitialize with new peer. In the case (ii) as the trusted header has expired, +we need to reinitialize light client with a new trusted header (that is within its trusted period), +but we don't necessarily need to disconnect from the full node we are talking to (as we haven't observed full node misbehavior in this case). + +**VerifyBisection.** The function `VerifyBisection` implements +recursive logic for checking if it is possible building trust +relationship between `trustedState` and untrusted header at the given height over +finite set of (downloaded and verified) headers. + +```go +func VerifyBisection(untrustedHeight int64, + trustedState TrustedState, + trustThreshold float, + trustingPeriod Duration, + clockDrift Duration, + now Time) (TrustedState, error) { + + untrustedSh, error := Commit(untrustedHeight) + if error != nil return (trustedState, ErrRequestFailed) + + untrustedHeader = untrustedSh.Header + + // note that we pass now during the recursive calls. This is fine as + // all other untrusted headers we download during recursion will be + // for a smaller heights, and therefore should happen before. + if untrustedHeader.Time > now + clockDrift { + return (trustedState, ErrInvalidHeaderTime) + } + + untrustedVs, error := Validators(untrustedHeight) + if error != nil return (trustedState, ErrRequestFailed) + + untrustedNextVs, error := Validators(untrustedHeight + 1) + if error != nil return (trustedState, ErrRequestFailed) + + error = verifySingle( + trustedState, + untrustedSh, + untrustedVs, + untrustedNextVs, + trustThreshold) + + if fatalError(error) return (trustedState, error) + + if error == nil { + // the untrusted header is now trusted. + newTrustedState = TrustedState(untrustedSh, untrustedNextVs) + return (newTrustedState, nil) + } + + // at this point in time we need to do bisection + pivotHeight := ceil((trustedHeader.Height + untrustedHeight) / 2) + + error, newTrustedState = VerifyBisection(pivotHeight, + trustedState, + trustThreshold, + trustingPeriod, + clockDrift, + now) + if error != nil return (newTrustedState, error) + + return VerifyBisection(untrustedHeight, + newTrustedState, + trustThreshold, + trustingPeriod, + clockDrift, + now) +} + +func fatalError(err) bool { + return err == ErrHeaderNotWithinTrustedPeriod OR + err == ErrInvalidAdjacentHeaders OR + err == ErrNonIncreasingHeight OR + err == ErrNonIncreasingTime OR + err == ErrInvalidValidatorSet OR + err == ErrInvalidNextValidatorSet OR + err == ErrInvalidCommitValue OR + err == ErrInvalidCommit +} +``` + +### The case `untrustedHeader.Height < trustedHeader.Height` + +In the use case where someone tells the light client that application data that is relevant for it +can be read in the block of height `k` and the light client trusts a more recent header, we can use the +hashes to verify headers "down the chain." That is, we iterate down the heights and check the hashes in each step. + +*Remark.* For the case were the light client trusts two headers `i` and `j` with `i < k < j`, we should +discuss/experiment whether the forward or the backward method is more effective. + +```go +func VerifyHeaderBackwards(trustedHeader Header, + untrustedHeader Header, + trustingPeriod Duration, + clockDrift Duration) error { + + if untrustedHeader.Height >= trustedHeader.Height return ErrErrNonDecreasingHeight + if untrustedHeader.Time >= trustedHeader.Time return ErrNonDecreasingTime + + now := System.Time() + if !isWithinTrustedPeriod(trustedHeader, trustingPeriod, now) { + return ErrHeaderNotWithinTrustedPeriod + } + + old := trustedHeader + for i := trustedHeader.Height - 1; i > untrustedHeader.Height; i-- { + untrustedSh, error := Commit(i) + if error != nil return ErrRequestFailed + + if (hash(untrustedSh.Header) != old.LastBlockID.Hash) { + return ErrInvalidAdjacentHeaders + } + + old := untrustedSh.Header + } + + if hash(untrustedHeader) != old.LastBlockID.Hash { + return ErrInvalidAdjacentHeaders + } + + now := System.Time() + if !isWithinTrustedPeriod(trustedHeader, trustingPeriod, now) { + return ErrHeaderNotWithinTrustedPeriod + } + + return nil + } +``` + +*Assumption*: In the following, we assume that *untrusted_h.Header.height > trusted_h.Header.height*. We will quickly discuss the other case in the next section. + +We consider the following set-up: + +- the light client communicates with one full node +- the light client locally stores all the headers that has passed basic verification and that are within light client trust period. In the pseudo code below we +write *Store.Add(header)* for this. If a header failed to verify, then +the full node we are talking to is faulty and we should disconnect from it and reinitialize with new peer. +- If `CanTrust` returns *error*, then the light client has seen a forged header or the trusted header has expired (it is outside its trusted period). + - In case of forged header, the full node is faulty so light client should disconnect and reinitialize with new peer. If the trusted header has expired, + we need to reinitialize light client with new trusted header (that is within its trusted period), but we don't necessarily need to disconnect from the full node + we are talking to (as we haven't observed full node misbehavior in this case). + +## Correctness of the Light Client Protocols + +### Definitions + +- `TRUSTED_PERIOD`: trusted period +- for realtime `t`, the predicate `correct(v,t)` is true if the validator `v` + follows the protocol until time `t` (we will see about recovery later). +- Validator fields. We will write a validator as a tuple `(v,p)` such that + - `v` is the identifier (i.e., validator address; we assume identifiers are unique in each validator set) + - `p` is its voting power +- For each header `h`, we write `trust(h) = true` if the light client trusts `h`. + +### Failure Model + +If a block `b` with a header `h` is generated at time `Time` (i.e. `h.Time = Time`), then a set of validators that +hold more than `2/3` of the voting power in `validators(h.NextValidatorsHash)` is correct until time +`h.Time + TRUSTED_PERIOD`. + +Formally, +\[ +\sum_{(v,p) \in validators(h.NextValidatorsHash) \wedge correct(v,h.Time + TRUSTED_PERIOD)} p > +2/3 \sum_{(v,p) \in validators(h.NextValidatorsHash)} p +\] + +The light client communicates with a full node and learns new headers. The goal is to locally decide whether to trust a header. Our implementation needs to ensure the following two properties: + +- *Light Client Completeness*: If a header `h` was correctly generated by an instance of Tendermint consensus (and its age is less than the trusted period), +then the light client should eventually set `trust(h)` to `true`. + +- *Light Client Accuracy*: If a header `h` was *not generated* by an instance of Tendermint consensus, then the light client should never set `trust(h)` to true. + +*Remark*: If in the course of the computation, the light client obtains certainty that some headers were forged by adversaries +(that is were not generated by an instance of Tendermint consensus), it may submit (a subset of) the headers it has seen as evidence of misbehavior. + +*Remark*: In Completeness we use "eventually", while in practice `trust(h)` should be set to true before `h.Time + TRUSTED_PERIOD`. If not, the header +cannot be trusted because it is too old. + +*Remark*: If a header `h` is marked with `trust(h)`, but it is too old at some point in time we denote with `now` (`h.Time + TRUSTED_PERIOD < now`), +then the light client should set `trust(h)` to `false` again at time `now`. + +*Assumption*: Initially, the light client has a header `inithead` that it trusts, that is, `inithead` was correctly generated by the Tendermint consensus. + +To reason about the correctness, we may prove the following invariant. + +*Verification Condition: light Client Invariant.* + For each light client `l` and each header `h`: +if `l` has set `trust(h) = true`, + then validators that are correct until time `h.Time + TRUSTED_PERIOD` have more than two thirds of the voting power in `validators(h.NextValidatorsHash)`. + + Formally, + \[ + \sum_{(v,p) \in validators(h.NextValidatorsHash) \wedge correct(v,h.Time + TRUSTED_PERIOD)} p > + 2/3 \sum_{(v,p) \in validators(h.NextValidatorsHash)} p + \] + +*Remark.* To prove the invariant, we will have to prove that the light client only trusts headers that were correctly generated by Tendermint consensus. +Then the formula above follows from the failure model. + +## Details + +**Observation 1.** If `h.Time + TRUSTED_PERIOD > now`, we trust the validator set `validators(h.NextValidatorsHash)`. + +When we say we trust `validators(h.NextValidatorsHash)` we do `not` trust that each individual validator in `validators(h.NextValidatorsHash)` +is correct, but we only trust the fact that less than `1/3` of them are faulty (more precisely, the faulty ones have less than `1/3` of the total voting power). + +*`VerifySingle` correctness arguments* + +Light Client Accuracy: + +- Assume by contradiction that `untrustedHeader` was not generated correctly and the light client sets trust to true because `verifySingle` returns without error. +- `trustedState` is trusted and sufficiently new +- by the Failure Model, less than `1/3` of the voting power held by faulty validators => at least one correct validator `v` has signed `untrustedHeader`. +- as `v` is correct up to now, it followed the Tendermint consensus protocol at least up to signing `untrustedHeader` => `untrustedHeader` was correctly generated. +We arrive at the required contradiction. + +Light Client Completeness: + +- The check is successful if sufficiently many validators of `trustedState` are still validators in the height `untrustedHeader.Height` and signed `untrustedHeader`. +- If `untrustedHeader.Height = trustedHeader.Height + 1`, and both headers were generated correctly, the test passes. + +*Verification Condition:* We may need a Tendermint invariant stating that if `untrustedSignedHeader.Header.Height = trustedHeader.Height + 1` then +`signers(untrustedSignedHeader.Commit) \subseteq validators(trustedHeader.NextValidatorsHash)`. + +*Remark*: The variable `trustThreshold` can be used if the user believes that relying on one correct validator is not sufficient. +However, in case of (frequent) changes in the validator set, the higher the `trustThreshold` is chosen, the more unlikely it becomes that +`verifySingle` returns with an error for non-adjacent headers. + +- `VerifyBisection` correctness arguments (sketch)* + +Light Client Accuracy: + +- Assume by contradiction that the header at `untrustedHeight` obtained from the full node was not generated correctly and +the light client sets trust to true because `VerifyBisection` returns without an error. +- `VerifyBisection` returns without error only if all calls to `verifySingle` in the recursion return without error (return `nil`). +- Thus we have a sequence of headers that all satisfied the `verifySingle` +- again a contradiction + +light Client Completeness: + +This is only ensured if upon `Commit(pivot)` the light client is always provided with a correctly generated header. + +*Stalling* + +With `VerifyBisection`, a faulty full node could stall a light client by creating a long sequence of headers that are queried one-by-one by the light client and look OK, +before the light client eventually detects a problem. There are several ways to address this: + +- Each call to `Commit` could be issued to a different full node +- Instead of querying header by header, the light client tells a full node which header it trusts, and the height of the header it needs. The full node responds with +the header along with a proof consisting of intermediate headers that the light client can use to verify. Roughly, `VerifyBisection` would then be executed at the full node. +- We may set a timeout how long `VerifyBisection` may take. diff --git a/spec/light-client/verification/verification_001_published.md b/spec/light-client/verification/verification_001_published.md new file mode 100644 index 000000000..90dacc749 --- /dev/null +++ b/spec/light-client/verification/verification_001_published.md @@ -0,0 +1,1178 @@ +# Light Client Verification + +The light client implements a read operation of a +[header][TMBC-HEADER-link] from the [blockchain][TMBC-SEQ-link], by +communicating with full nodes. As some full nodes may be faulty, this +functionality must be implemented in a fault-tolerant way. + +In the Tendermint blockchain, the validator set may change with every +new block. The staking and unbonding mechanism induces a [security +model][TMBC-FM-2THIRDS-link]: starting at time *Time* of the +[header][TMBC-HEADER-link], +more than two-thirds of the next validators of a new block are correct +for the duration of *TrustedPeriod*. The fault-tolerant read +operation is designed for this security model. + +The challenge addressed here is that the light client might have a +block of height *h1* and needs to read the block of height *h2* +greater than *h1*. Checking all headers of heights from *h1* to *h2* +might be too costly (e.g., in terms of energy for mobile devices). +This specification tries to reduce the number of intermediate blocks +that need to be checked, by exploiting the guarantees provided by the +[security model][TMBC-FM-2THIRDS-link]. + +# Status + +This document is thoroughly reviewed, and the protocol has been +formalized in TLA+ and model checked. + +## Issues that need to be addressed + +As it is part of the larger light node, its data structures and +functions interact with the fork dectection functionality of the light +client. As a result of the work on +[Pull Request 479](https://github.com/informalsystems/tendermint-rs/pull/479) we +established the need for an update in the data structures in [Issue 499](https://github.com/informalsystems/tendermint-rs/issues/499). This +will not change the verification logic, but it will record information +about verification that can be used in fork detection (in particular +in computing more efficiently the proof of fork). + +# Outline + +- [Part I](#part-i---tendermint-blockchain): Introduction of + relevant terms of the Tendermint +blockchain. + +- [Part II](#part-ii---sequential-definition-of-the-verification-problem): Introduction +of the problem addressed by the Lightclient Verification protocol. + - [Verification Informal Problem + statement](#Verification-Informal-Problem-statement): For the general + audience, that is, engineers who want to get an overview over what + the component is doing from a bird's eye view. + - [Sequential Problem statement](#Sequential-Problem-statement): + Provides a mathematical definition of the problem statement in + its sequential form, that is, ignoring the distributed aspect of + the implementation of the blockchain. + +- [Part III](#part-iii---light-client-as-distributed-system): Distributed + aspects of the light client, system assumptions and temporal + logic specifications. + + - [Incentives](#incentives): how faulty full nodes may benefit from + misbehaving and how correct full nodes benefit from cooperating. + + - [Computational Model](#Computational-Model): + timing and correctness assumptions. + + - [Distributed Problem Statement](#Distributed-Problem-Statement): + temporal properties that formalize safety and liveness + properties in the distributed setting. + +- [Part IV](#part-iv---light-client-verification-protocol): + Specification of the protocols. + + - [Definitions](#Definitions): Describes inputs, outputs, + variables used by the protocol, auxiliary functions + + - [Core Verification](#core-verification): gives an outline of the solution, + and details of the functions used (with preconditions, + postconditions, error conditions). + + - [Liveness Scenarios](#liveness-scenarios): when the light + client makes progress depends heavily on the changes in the + validator sets of the blockchain. We discuss some typical scenarios. + +- [Part V](#part-v---supporting-the-ibc-relayer): The above parts + focus on a common case where the last verified block has height *h1* + and the + requested height *h2* satisfies *h2 > h1*. For IBC, there are + scenarios where this might not be the case. In this part, we provide + some preliminaries for supporting this. As not all details of the + IBC requirements are clear by now, we do not provide a complete + specification at this point. We mark with "Open Question" points + that need to be addressed in order to finalize this specification. + It should be noted that the technically + most challenging case is the one specified in Part IV. + +In this document we quite extensively use tags in order to be able to +reference assumptions, invariants, etc. in future communication. In +these tags we frequently use the following short forms: + +- TMBC: Tendermint blockchain +- SEQ: for sequential specifications +- LCV: Lightclient Verification +- LIVE: liveness +- SAFE: safety +- FUNC: function +- INV: invariant +- A: assumption + +# Part I - Tendermint Blockchain + +## Header Fields necessary for the Light Client + +#### **[TMBC-HEADER.1]** + +A set of blockchain transactions is stored in a data structure called +*block*, which contains a field called *header*. (The data structure +*block* is defined [here][block]). As the header contains hashes to +the relevant fields of the block, for the purpose of this +specification, we will assume that the blockchain is a list of +headers, rather than a list of blocks. + +#### **[TMBC-HASH-UNIQUENESS.1]** + +We assume that every hash in the header identifies the data it hashes. +Therefore, in this specification, we do not distinguish between hashes and the +data they represent. + +#### **[TMBC-HEADER-FIELDS.1]** + +A header contains the following fields: + +- `Height`: non-negative integer +- `Time`: time (integer) +- `LastBlockID`: Hashvalue +- `LastCommit` DomainCommit +- `Validators`: DomainVal +- `NextValidators`: DomainVal +- `Data`: DomainTX +- `AppState`: DomainApp +- `LastResults`: DomainRes + +#### **[TMBC-SEQ.1]** + +The Tendermint blockchain is a list *chain* of headers. + +#### **[TMBC-VALIDATOR-PAIR.1]** + +Given a full node, a +*validator pair* is a pair *(peerID, voting_power)*, where + +- *peerID* is the PeerID (public key) of a full node, +- *voting_power* is an integer (representing the full node's + voting power in a certain consensus instance). + +> In the Golang implementation the data type for *validator +pair* is called `Validator` + +#### **[TMBC-VALIDATOR-SET.1]** + +A *validator set* is a set of validator pairs. For a validator set +*vs*, we write *TotalVotingPower(vs)* for the sum of the voting powers +of its validator pairs. + +#### **[TMBC-VOTE.1]** + +A *vote* contains a `prevote` or `precommit` message sent and signed by +a validator node during the execution of [consensus][arXiv]. Each +message contains the following fields + +- `Type`: prevote or precommit +- `Height`: positive integer +- `Round` a positive integer +- `BlockID` a Hashvalue of a block (not necessarily a block of the chain) + +#### **[TMBC-COMMIT.1]** + +A commit is a set of `precommit` message. + +## Tendermint Failure Model + +#### **[TMBC-AUTH-BYZ.1]** + +We assume the authenticated Byzantine fault model in which no node (faulty or +correct) may break digital signatures, but otherwise, no additional +assumption is made about the internal behavior of faulty +nodes. That is, faulty nodes are only limited in that they cannot forge +messages. + +#### **[TMBC-TIME-PARAMS.1]** + +A Tendermint blockchain has the following configuration parameters: + +- *unbondingPeriod*: a time duration. +- *trustingPeriod*: a time duration smaller than *unbondingPeriod*. + +#### **[TMBC-CORRECT.1]** + +We define a predicate *correctUntil(n, t)*, where *n* is a node and *t* is a +time point. +The predicate *correctUntil(n, t)* is true if and only if the node *n* +follows all the protocols (at least) until time *t*. + +#### **[TMBC-FM-2THIRDS.1]** + +If a block *h* is in the chain, +then there exists a subset *CorrV* +of *h.NextValidators*, such that: + +- *TotalVotingPower(CorrV) > 2/3 + TotalVotingPower(h.NextValidators)*; cf. [TMBC-VALIDATOR-SET.1] +- For every validator pair *(n,p)* in *CorrV*, it holds *correctUntil(n, + h.Time + trustingPeriod)*; cf. [TMBC-CORRECT.1] + +> The definition of correct +> [**[TMBC-CORRECT.1]**][TMBC-CORRECT-link] refers to realtime, while it +> is used here with *Time* and *trustingPeriod*, which are "hardware +> times". We do not make a distinction here. + +#### **[TMBC-CORR-FULL.1]** + +Every correct full node locally stores a prefix of the +current list of headers from [**[TMBC-SEQ.1]**][TMBC-SEQ-link]. + +## What the Light Client Checks + +> From [TMBC-FM-2THIRDS.1] we directly derive the following observation: + +#### **[TMBC-VAL-CONTAINS-CORR.1]** + +Given a (trusted) block *tb* of the blockchain, a given set of full nodes +*N* contains a correct node at a real-time *t*, if + +- *t - trustingPeriod < tb.Time < t* +- the voting power in tb.NextValidators of nodes in *N* is more + than 1/3 of *TotalVotingPower(tb.NextValidators)* + +> The following describes how a commit for a given block *b* must look +> like. + +#### **[TMBC-SOUND-DISTR-POSS-COMMIT.1]** + +For a block *b*, each element *pc* of *PossibleCommit(b)* satisfies: + +- *pc* contains only votes (cf. [TMBC-VOTE.1]) + by validators from *b.Validators* +- the sum of the voting powers in *pc* is greater than 2/3 + *TotalVotingPower(b.Validators)* +- and there is an *r* such that each vote *v* in *pc* satisfies + - v.Type = precommit + - v.Height = b.Height + - v.Round = r + - v.blockID = hash(b) + +> The following property comes from the validity of the [consensus][arXiv]: A +> correct validator node only sends `prevote` or `precommit`, if +> `BlockID` of the new (to-be-decided) block is equal to the hash of +> the last block. + +#### **[TMBC-VAL-COMMIT.1]** + +If for a block *b*, a commit *c* + +- contains at least one validator pair *(v,p)* such that *v* is a + **correct** validator node, and +- is contained in *PossibleCommit(b)* + +then the block *b* is on the blockchain. + +## Context of this document + +In this document we specify the light client verification component, +called *Core Verification*. The *Core Verification* communicates with +a full node. As full nodes may be faulty, it cannot trust the +received information, but the light client has to check whether the +header it receives coincides with the one generated by Tendermint +consensus. + +The two + properties [[TMBC-VAL-CONTAINS-CORR.1]][TMBC-VAL-CONTAINS-CORR-link] and +[[TMBC-VAL-COMMIT]][TMBC-VAL-COMMIT-link] formalize the checks done + by this specification: +Given a trusted block *tb* and an untrusted block *ub* with a commit *cub*, +one has to check that *cub* is in *PossibleCommit(ub)*, and that *cub* +contains a correct node using *tb*. + +# Part II - Sequential Definition of the Verification Problem + +## Verification Informal Problem statement + +Given a height *targetHeight* as an input, the *Verifier* eventually +stores a header *h* of height *targetHeight* locally. This header *h* +is generated by the Tendermint [blockchain][block]. In +particular, a header that was not generated by the blockchain should +never be stored. + +## Sequential Problem statement + +#### **[LCV-SEQ-LIVE.1]** + +The *Verifier* gets as input a height *targetHeight*, and eventually stores the +header of height *targetHeight* of the blockchain. + +#### **[LCV-SEQ-SAFE.1]** + +The *Verifier* never stores a header which is not in the blockchain. + +# Part III - Light Client as Distributed System + +## Incentives + +Faulty full nodes may benefit from lying to the light client, by making the +light client accept a block that deviates (e.g., contains additional +transactions) from the one generated by Tendermint consensus. +Users using the light client might be harmed by accepting a forged header. + +The [fork detector][fork-detector] of the light client may help the +correct full nodes to understand whether their header is a good one. +Hence, in combination with the light client detector, the correct full +nodes have the incentive to respond. We can thus base liveness +arguments on the assumption that correct full nodes reliably talk to +the light client. + +## Computational Model + +#### **[LCV-A-PEER.1]** + +The verifier communicates with a full node called *primary*. No assumption is made about the full node (it may be correct or faulty). + +#### **[LCV-A-COMM.1]** + +Communication between the light client and a correct full node is +reliable and bounded in time. Reliable communication means that +messages are not lost, not duplicated, and eventually delivered. There +is a (known) end-to-end delay *Delta*, such that if a message is sent +at time *t* then it is received and processes by time *t + Delta*. +This implies that we need a timeout of at least *2 Delta* for remote +procedure calls to ensure that the response of a correct peer arrives +before the timeout expires. + +#### **[LCV-A-TFM.1]** + +The Tendermint blockchain satisfies the Tendermint failure model [**[TMBC-FM-2THIRDS.1]**][TMBC-FM-2THIRDS-link]. + +#### **[LCV-A-VAL.1]** + +The system satisfies [**[TMBC-AUTH-BYZ.1]**][TMBC-Auth-Byz-link] and +[**[TMBC-FM-2THIRDS.1]**][TMBC-FM-2THIRDS-link]. Thus, there is a +blockchain that satisfies the soundness requirements (that is, the +validation rules in [[block]]). + +## Distributed Problem Statement + +### Two Kinds of Termination + +We do not assume that *primary* is correct. Under this assumption no +protocol can guarantee the combination of the sequential +properties. Thus, in the (unreliable) distributed setting, we consider +two kinds of termination (successful and failure) and we will specify +below under what (favorable) conditions *Core Verification* ensures to +terminate successfully, and satisfy the requirements of the sequential +problem statement: + +#### **[LCV-DIST-TERM.1]** + +*Core Verification* either *terminates +successfully* or it *terminates with failure*. + +### Design choices + +#### **[LCV-DIST-STORE.1]** + +*Core Verification* has a local data structure called *LightStore* that +contains light blocks (that contain a header). For each light block we +record whether it is verified. + +#### **[LCV-DIST-PRIMARY.1]** + +*Core Verification* has a local variable *primary* that contains the PeerID of a full node. + +#### **[LCV-DIST-INIT.1]** + +*LightStore* is initialized with a header *trustedHeader* that was correctly +generated by the Tendermint consensus. We say *trustedHeader* is verified. + +### Temporal Properties + +#### **[LCV-DIST-SAFE.1]** + +It is always the case that every verified header in *LightStore* was +generated by an instance of Tendermint consensus. + +#### **[LCV-DIST-LIVE.1]** + +From time to time, a new instance of *Core Verification* is called with a +height *targetHeight* greater than the height of any header in *LightStore*. +Each instance must eventually terminate. + +- If + - the *primary* is correct (and locally has the block of + *targetHeight*), and + - *LightStore* always contains a verified header whose age is less than the + trusting period, + then *Core Verification* adds a verified header *hd* with height + *targetHeight* to *LightStore* and it **terminates successfully** + +> These definitions imply that if the primary is faulty, a header may or +> may not be added to *LightStore*. In any case, +> [**[LCV-DIST-SAFE.1]**](#lcv-vc-inv) must hold. +> The invariant [**[LCV-DIST-SAFE.1]**](#lcv-dist-safe) and the liveness +> requirement [**[LCV-DIST-LIVE.1]**](#lcv-dist-life) +> allow that verified headers are added to *LightStore* whose +> height was not passed +> to the verifier (e.g., intermediate headers used in bisection; see below). +> Note that for liveness, initially having a *trustedHeader* within +> the *trustinPeriod* is not sufficient. However, as this +> specification will leave some freedom with respect to the strategy +> in which order to download intermediate headers, we do not give a +> more precise liveness specification here. After giving the +> specification of the protocol, we will discuss some liveness +> scenarios [below](#liveness-scenarios). + +### Solving the sequential specification + +This specification provides a partial solution to the sequential specification. +The *Verifier* solves the invariant of the sequential part + +[**[LCV-DIST-SAFE.1]**](#lcv-vc-inv) => [**[LCV-SEQ-SAFE.1]**](#lcv-seq-inv) + +In the case the primary is correct, and there is a recent header in *LightStore*, the verifier satisfies the liveness requirements. + +⋀ *primary is correct* +⋀ always ∃ verified header in LightStore. *header.Time* > *now* - *trustingPeriod* +⋀ [**[LCV-A-Comm.1]**](#lcv-a-comm) ⋀ ( + ( [**[TMBC-CorrFull.1]**][TMBC-CorrFull-link] ⋀ + [**[LCV-DIST-LIVE.1]**](#lcv-vc-live) ) + ⟹ [**[LCV-SEQ-LIVE.1]**](#lcv-seq-live) +) + +# Part IV - Light Client Verification Protocol + +We provide a specification for Light Client Verification. The local +code for verification is presented by a sequential function +`VerifyToTarget` to highlight the control flow of this functionality. +We note that if a different concurrency model is considered for +an implementation, the sequential flow of the function may be +implemented with mutexes, etc. However, the light client verification +is partitioned into three blocks that can be implemented and tested +independently: + +- `FetchLightBlock` is called to download a light block (header) of a + given height from a peer. +- `ValidAndVerified` is a local code that checks the header. +- `Schedule` decides which height to try to verify next. We keep this + underspecified as different implementations (currently in Goland and + Rust) may implement different optimizations here. We just provide + necessary conditions on how the height may evolve. + + + + +## Definitions + +### Data Types + +The core data structure of the protocol is the LightBlock. + +#### **[LCV-DATA-LIGHTBLOCK.1]** + +```go +type LightBlock struct { + Header Header + Commit Commit + Validators ValidatorSet +} +``` + +#### **[LCV-DATA-LIGHTSTORE.1]** + +LightBlocks are stored in a structure which stores all LightBlock from +initialization or received from peers. + +```go +type LightStore struct { + ... +} + +``` + +Each LightBlock is in one of the following states: + +```go +type VerifiedState int + +const ( + StateUnverified = iota + 1 + StateVerified + StateFailed + StateTrusted +) +``` + +> Only the detector module sets a lightBlock state to `StateTrusted` +> and only if it was `StateVerified` before. + +The LightStore exposes the following functions to query stored LightBlocks. + +#### **[LCV-FUNC-GET.1]** + +```go +func (ls LightStore) Get(height Height) (LightBlock, bool) +``` + +- Expected postcondition + - returns a LightBlock at a given height or false in the second argument if + the LightStore does not contain the specified LightBlock. + +#### **[LCV-FUNC-LATEST-VERIF.1]** + +```go +func (ls LightStore) LatestVerified() LightBlock +``` + +- Expected postcondition + - returns the highest light block whose state is `StateVerified` + or `StateTrusted` + +#### **[LCV-FUNC-UPDATE.2]** + +```go +func (ls LightStore) Update(lightBlock LightBlock, + verfiedState VerifiedState + verifiedBy Height) +``` + +- Expected postcondition + - The state of the LightBlock is set to *verifiedState*. + - verifiedBy of the Lightblock is set to *Height* + +> The following function is used only in the detector specification +> listed here for completeness. + +#### **[LCV-FUNC-LATEST-TRUSTED.1]** + +```go +func (ls LightStore) LatestTrusted() LightBlock +``` + +- Expected postcondition + - returns the highest light block that has been verified and + checked by the detector. + +#### **[LCV-FUNC-FILTER.1]** + +```go +func (ls LightStore) FilterVerified() LightSTore +``` + +- Expected postcondition + - returns only the LightBlocks with state verified. + +### Inputs + +- *lightStore*: stores light blocks that have been downloaded and that + passed verification. Initially it contains a light block with + *trustedHeader*. +- *primary*: peerID +- *targetHeight*: the height of the needed header + +### Configuration Parameters + +- *trustThreshold*: a float. Can be used if correctness should not be based on more voting power and 1/3. +- *trustingPeriod*: a time duration [**[TMBC-TIME_PARAMS.1]**][TMBC-TIME_PARAMS-link]. +- *clockDrift*: a time duration. Correction parameter dealing with only approximately synchronized clocks. + +### Variables + +- *nextHeight*: initially *targetHeight* + > *nextHeight* should be thought of the "height of the next header we need + > to download and verify" + +### Assumptions + +#### **[LCV-A-INIT.1]** + +- *trustedHeader* is from the blockchain + +- *targetHeight > LightStore.LatestVerified.Header.Height* + +### Invariants + +#### **[LCV-INV-TP.1]** + +It is always the case that *LightStore.LatestTrusted.Header.Time > now - trustingPeriod*. + +> If the invariant is violated, the light client does not have a +> header it can trust. A trusted header must be obtained externally, +> its trust can only be based on social consensus. + +### Used Remote Functions + +We use the functions `commit` and `validators` that are provided +by the [RPC client for Tendermint][RPC]. + +```go +func Commit(height int64) (SignedHeader, error) +``` + +- Implementation remark + - RPC to full node *n* + - JSON sent: + +```javascript +// POST /commit +{ + "jsonrpc": "2.0", + "id": "ccc84631-dfdb-4adc-b88c-5291ea3c2cfb", // UUID v4, unique per request + "method": "commit", + "params": { + "height": 1234 + } +} +``` + +- Expected precondition + - header of `height` exists on blockchain +- Expected postcondition + - if *n* is correct: Returns the signed header of height `height` + from the blockchain if communication is timely (no timeout) + - if *n* is faulty: Returns a signed header with arbitrary content +- Error condition + - if *n* is correct: precondition violated or timeout + - if *n* is faulty: arbitrary error + +---- + +```go +func Validators(height int64) (ValidatorSet, error) +``` + +- Implementation remark + - RPC to full node *n* + - JSON sent: + +```javascript +// POST /validators +{ + "jsonrpc": "2.0", + "id": "ccc84631-dfdb-4adc-b88c-5291ea3c2cfb", // UUID v4, unique per request + "method": "validators", + "params": { + "height": 1234 + } +} +``` + +- Expected precondition + - header of `height` exists on blockchain +- Expected postcondition + - if *n* is correct: Returns the validator set of height `height` + from the blockchain if communication is timely (no timeout) + - if *n* is faulty: Returns arbitrary validator set +- Error condition + - if *n* is correct: precondition violated or timeout + - if *n* is faulty: arbitrary error + +---- + +### Communicating Function + +#### **[LCV-FUNC-FETCH.1]** + + ```go +func FetchLightBlock(peer PeerID, height Height) LightBlock +``` + +- Implementation remark + - RPC to peer at *PeerID* + - calls `Commit` for *height* and `Validators` for *height* and *height+1* +- Expected precondition + - `height` is less than or equal to height of the peer **[LCV-IO-PRE-HEIGHT.1]** +- Expected postcondition: + - if *node* is correct: + - Returns the LightBlock *lb* of height `height` + that is consistent with the blockchain + - *lb.provider = peer* **[LCV-IO-POST-PROVIDER.1]** + - *lb.Header* is a header consistent with the blockchain + - *lb.Validators* is the validator set of the blockchain at height *nextHeight* + - *lb.NextValidators* is the validator set of the blockchain at height *nextHeight + 1* + - if *node* is faulty: Returns a LightBlock with arbitrary content + [**[TMBC-AUTH-BYZ.1]**][TMBC-Auth-Byz-link] +- Error condition + - if *n* is correct: precondition violated + - if *n* is faulty: arbitrary error + - if *lb.provider != peer* + - times out after 2 Delta (by assumption *n* is faulty) + +---- + +## Core Verification + +### Outline + +The `VerifyToTarget` is the main function and uses the following functions. + +- `FetchLightBlock` is called to download the next light block. It is + the only function that communicates with other nodes +- `ValidAndVerified` checks whether header is valid and checks if a + new lightBlock should be trusted + based on a previously verified lightBlock. +- `Schedule` decides which height to try to verify next + +In the following description of `VerifyToTarget` we do not deal with error +handling. If any of the above function returns an error, VerifyToTarget just +passes the error on. + +#### **[LCV-FUNC-MAIN.1]** + +```go +func VerifyToTarget(primary PeerID, lightStore LightStore, + targetHeight Height) (LightStore, Result) { + + nextHeight := targetHeight + + for lightStore.LatestVerified.height < targetHeight { + + // Get next LightBlock for verification + current, found := lightStore.Get(nextHeight) + if !found { + current = FetchLightBlock(primary, nextHeight) + lightStore.Update(current, StateUnverified) + } + + // Verify + verdict = ValidAndVerified(lightStore.LatestVerified, current) + + // Decide whether/how to continue + if verdict == SUCCESS { + lightStore.Update(current, StateVerified) + } + else if verdict == NOT_ENOUGH_TRUST { + // do nothing + // the light block current passed validation, but the validator + // set is too different to verify it. We keep the state of + // current at StateUnverified. For a later iteration, Schedule + // might decide to try verification of that light block again. + } + else { + // verdict is some error code + lightStore.Update(current, StateFailed) + // possibly remove all LightBlocks from primary + return (lightStore, ResultFailure) + } + nextHeight = Schedule(lightStore, nextHeight, targetHeight) + } + return (lightStore, ResultSuccess) +} +``` + +- Expected precondition + - *lightStore* contains a LightBlock within the *trustingPeriod* **[LCV-PRE-TP.1]** + - *targetHeight* is greater than the height of all the LightBlocks in *lightStore* +- Expected postcondition: + - returns *lightStore* that contains a LightBlock that corresponds to a block + of the blockchain of height *targetHeight* + (that is, the LightBlock has been added to *lightStore*) **[LCV-POST-LS.1]** +- Error conditions + - if the precondition is violated + - if `ValidAndVerified` or `FetchLightBlock` report an error + - if [**[LCV-INV-TP.1]**](#LCV-INV-TP.1) is violated + +### Details of the Functions + +#### **[LCV-FUNC-VALID.1]** + +```go +func ValidAndVerified(trusted LightBlock, untrusted LightBlock) Result +``` + +- Expected precondition: + - *untrusted* is valid, that is, satisfies the soundness [checks][block] + - *untrusted* is **well-formed**, that is, + - *untrusted.Header.Time < now + clockDrift* + - *untrusted.Validators = hash(untrusted.Header.Validators)* + - *untrusted.NextValidators = hash(untrusted.Header.NextValidators)* + - *trusted.Header.Time > now - trustingPeriod* + - *trusted.Commit* is a commit for the header + *trusted.Header*, i.e., it contains + the correct hash of the header, and +2/3 of signatures + - the `Height` and `Time` of `trusted` are smaller than the Height and + `Time` of `untrusted`, respectively + - the *untrusted.Header* is well-formed (passes the tests from + [[block]]), and in particular + - if the untrusted header `unstrusted.Header` is the immediate + successor of `trusted.Header`, then it holds that + - *trusted.Header.NextValidators = + untrusted.Header.Validators*, and + moreover, + - *untrusted.Header.Commit* + - contains signatures by more than two-thirds of the validators + - contains no signature from nodes that are not in *trusted.Header.NextValidators* +- Expected postcondition: + - Returns `SUCCESS`: + - if *untrusted* is the immediate successor of *trusted*, or otherwise, + - if the signatures of a set of validators that have more than + *max(1/3,trustThreshold)* of voting power in + *trusted.Nextvalidators* is contained in + *untrusted.Commit* (that is, header passes the tests + [**[TMBC-VAL-CONTAINS-CORR.1]**][TMBC-VAL-CONTAINS-CORR-link] + and [**[TMBC-VAL-COMMIT.1]**][TMBC-VAL-COMMIT-link]) + - Returns `NOT_ENOUGH_TRUST` if: + - *untrusted* is *not* the immediate successor of + *trusted* + and the *max(1/3,trustThreshold)* threshold is not reached + (that is, if + [**[TMBC-VAL-CONTAINS-CORR.1]**][TMBC-VAL-CONTAINS-CORR-link] + fails and header is does not violate the soundness + checks [[block]]). +- Error condition: + - if precondition violated + +---- + +#### **[LCV-FUNC-SCHEDULE.1]** + +```go +func Schedule(lightStore, nextHeight, targetHeight) Height +``` + +- Implementation remark: If picks the next height to be verified. + We keep the precise choice of the next header under-specified. It is + subject to performance optimizations that do not influence the correctness +- Expected postcondition: **[LCV-SCHEDULE-POST.1]** + Return *H* s.t. + 1. if *lightStore.LatestVerified.Height = nextHeight* and + *lightStore.LatestVerified < targetHeight* then + *nextHeight < H <= targetHeight* + 2. if *lightStore.LatestVerified.Height < nextHeight* and + *lightStore.LatestVerified.Height < targetHeight* then + *lightStore.LatestVerified.Height < H < nextHeight* + 3. if *lightStore.LatestVerified.Height = targetHeight* then + *H = targetHeight* + +> Case i. captures the case where the light block at height *nextHeight* +> has been verified, and we can choose a height closer to the *targetHeight*. +> As we get the *lightStore* as parameter, the choice of the next height can +> depend on the *lightStore*, e.g., we can pick a height for which we have +> already downloaded a light block. +> In Case ii. the header of *nextHeight* could not be verified, and we need to pick a smaller height. +> In Case iii. is a special case when we have verified the *targetHeight*. + +### Solving the distributed specification + +*trustedStore* is implemented by the light blocks in lightStore that +have the state *StateVerified*. + +#### Argument for [**[LCV-DIST-SAFE.1]**](#lcv-dist-safe) + +- `ValidAndVerified` implements the soundness checks and the checks + [**[TMBC-VAL-CONTAINS-CORR.1]**][TMBC-VAL-CONTAINS-CORR-link] and + [**[TMBC-VAL-COMMIT.1]**][TMBC-VAL-COMMIT-link] under + the assumption [**[TMBC-FM-2THIRDS.1]**][TMBC-FM-2THIRDS-link] +- Only if `ValidAndVerified` returns with `SUCCESS`, the state of a light block is + set to *StateVerified*. + +#### Argument for [**[LCV-DIST-LIVE.1]**](#lcv-dist-life) + +- If *primary* is correct, + - `FetchLightBlock` will always return a light block consistent + with the blockchain + - `ValidAndVerified` either verifies the header using the trusting + period or falls back to sequential + verification + - If [**[LCV-INV-TP.1]**](#LCV-INV-TP.1) holds, eventually every + header will be verified and core verification **terminates successfully**. + - successful termination depends on the age of *lightStore.LatestVerified* + (for instance, initially on the age of *trustedHeader*) and the + changes of the validator sets on the blockchain. + We will give some examples [below](#liveness-scenarios). +- If *primary* is faulty, + - it either provides headers that pass all the tests, and we + return with the header + - it provides one header that fails a test, core verification + **terminates with failure**. + - it times out and core verification + **terminates with failure**. + +## Liveness Scenarios + +The liveness argument above assumes [**[LCV-INV-TP.1]**](#LCV-INV-TP.1) + +which requires that there is a header that does not expire before the +target height is reached. Here we discuss scenarios to ensure this. + +Let *startHeader* be *LightStore.LatestVerified* when core +verification is called (*trustedHeader*) and *startTime* be the time +core verification is invoked. + +In order to ensure liveness, *LightStore* always needs to contain a +verified (or initially trusted) header whose time is within the +trusting period. To ensure this, core verification needs to add new +headers to *LightStore* and verify them, before all headers in +*LightStore* expire. + +#### Many changes in validator set + + Let's consider `Schedule` implements + bisection, that is, it halves the distance. + Assume the case where the validator set changes completely in each +block. Then the + method in this specification needs to +sequentially verify all headers. That is, for + +- *W = log_2 (targetHeight - startHeader.Height)*, + +*W* headers need to be downloaded and checked before the +header of height *startHeader.Height + 1* is added to *LightStore*. + +- Let *Comp* + be the local computation time needed to check headers and signatures + for one header. +- Then we need in the worst case *Comp + 2 Delta* to download and + check one header. +- Then the first time a verified header could be added to *LightStore* is + startTime + W * (Comp + 2 Delta) +- [TP.1] However, it can only be added if we still have a header in + *LightStore*, + which is not + expired, that is only the case if + - startHeader.Time > startTime + WCG * (Comp + 2 Delta) - + trustingPeriod, + - that is, if core verification is started at + startTime < startHeader.Time + trustingPeriod - WCG * (Comp + 2 Delta) + +- one may then do an inductive argument from this point on, depending + on the implementation of `Schedule`. We may have to account for the + headers that are already + downloaded, but they are checked against the new *LightStore.LatestVerified*. + +> We observe that +> the worst case time it needs to verify the header of height +> *targetHeight* depends mainly on how frequent the validator set on the +> blockchain changes. That core verification terminates successfully +> crucially depends on the check [TP.1], that is, that the headers in +> *LightStore* do not expire in the time needed to download more +> headers, which depends on the creation time of the headers in +> *LightStore*. That is, termination of core verification is highly +> depending on the data stored in the blockchain. +> The current light client core verification protocol exploits that, in +> practice, changes in the validator set are rare. For instance, +> consider the following scenario. + +#### No change in validator set + +If on the blockchain the validator set of the block at height +*targetHeight* is equal to *startHeader.NextValidators*: + +- there is one round trip in `FetchLightBlock` to download the light + block + of height + *targetHeight*, and *Comp* to check it. +- as the validator sets are equal, `Verify` returns `SUCCESS`, if + *startHeader.Time > now - trustingPeriod*. +- that is, if *startTime < startHeader.Header.Time + trustingPeriod - + 2 Delta - Comp*, then core verification terminates successfully + +# Part V - Supporting the IBC Relayer + +The above specification focuses on the most common case, which also +constitutes the most challenging task: using the Tendermint [security +model][TMBC-FM-2THIRDS-link] to verify light blocks without +downloading all intermediate blocks. To focus on this challenge, above +we have restricted ourselves to the case where *targetHeight* is +greater than the height of any trusted header. This simplified +presentation of the algorithm as initially +`lightStore.LatestVerified()` is less than *targetHeight*, and in the +process of verification `lightStore.LatestVerified()` increases until +*targetHeight* is reached. + +For [IBC][ibc-rs] it might be that some "older" header is +needed, that is, *targetHeight < lightStore.LatestVerified()*. In this section we present a preliminary design, and we mark some +remaining open questions. +If *targetHeight < lightStore.LatestVerified()* our design separates +the following cases: + +- A previous instance of `VerifyToTarget` has already downloaded the + light block of *targetHeight*. There are two cases + - the light block has been verified + - the light block has not been verified yet +- No light block of *targetHeight* had been downloaded before. There + are two cases: + - there exists a verified light block of height less than *targetHeight* + - otherwise. In this case we need to do "backwards verification" + using the hash of the previous block in the `LastBlockID` field + of a header. + +**Open Question:** what are the security assumptions for backward +verification. Should we check that the light block we verify from +(and/or the checked light block) is within the trusting period? + +The design just presents the above case +distinction as a function, and defines some auxiliary functions in the +same way the protocol was presented in +[Part IV](#part-iv---light-client-verification-protocol). + +```go +func (ls LightStore) LatestPrevious(height Height) (LightBlock, bool) +``` + +- Expected postcondition + - returns a light block *lb* that satisfies: + - *lb* is in lightStore + - *lb* is verified and not expired + - *lb.Header.Height < height* + - for all *b* in lightStore s.t. *b* is verified and not expired it + holds *lb.Header.Height >= b.Header.Height* + - *false* in the second argument if + the LightStore does not contain such an *lb*. + +```go +func (ls LightStore) MinVerified() (LightBlock, bool) +``` + +- Expected postcondition + - returns a light block *lb* that satisfies: + - *lb* is in lightStore + - *lb* is verified **Open Question:** replace by trusted? + - *lb.Header.Height* is minimal in the lightStore + - **Open Question:** according to this, it might be expired (outside the + trusting period). This approach appears safe. Are there reasons we + should not do that? + - *false* in the second argument if + the LightStore does not contain such an *lb*. + +If a height that is smaller than the smallest height in the lightstore +is required, we check the hashes backwards. This is done with the +following function: + +#### **[LCV-FUNC-BACKWARDS.1]** + +```go +func Backwards (primary PeerID, lightStore LightStore, targetHeight Height) + (LightStore, Result) { + + lb,res = lightStore.MinVerified() + if res = false { + return (lightStore, ResultFailure) + } + + latest := lb.Header + for i := lb.Header.height - 1; i >= targetHeight; i-- { + // here we download height-by-height. We might first download all + // headers down to targetHeight and then check them. + current := FetchLightBlock(primary,i) + if (hash(current) != latest.Header.LastBlockId) { + return (lightStore, ResultFailure) + } + else { + lightStore.Update(current, StateVerified) + // **Open Question:** Do we need a new state type for + // backwards verified light blocks? + } + latest = current + } + return (lightStore, ResultSuccess) +} +``` + +The following function just decided based on the required height which +method should be used. + +#### **[LCV-FUNC-IBCMAIN.1]** + +```go +func Main (primary PeerID, lightStore LightStore, targetHeight Height) + (LightStore, Result) { + + b1, r1 = lightStore.Get(targetHeight) + if r1 = true and b1.State = StateVerified { + // block already there + return (lightStore, ResultSuccess) + } + + if targetHeight > lightStore.LatestVerified.height { + // case of Part IV + return VerifyToTarget(primary, lightStore, targetHeight) + } + else { + b2, r2 = lightStore.LatestPrevious(targetHeight); + if r2 = true { + // make auxiliary lightStore auxLS to call VerifyToTarget. + // VerifyToTarget uses LatestVerified of the given lightStore + // For that we need: + // auxLS.LatestVerified = lightStore.LatestPrevious(targetHeight) + auxLS.Init; + auxLS.Update(b2,StateVerified); + if r1 = true { + // we need to verify a previously downloaded light block. + // we add it to the auxiliary store so that VerifyToTarget + // does not download it again + auxLS.Update(b1,b1.State); + } + auxLS, res2 = VerifyToTarget(primary, auxLS, targetHeight) + // move all lightblocks from auxLS to lightStore, + // maintain state + // we do that whether VerifyToTarget was successful or not + for i, s range auxLS { + lighStore.Update(s,s.State) + } + return (lightStore, res2) + } + else { + return Backwards(primary, lightStore, targetHeight) + } + } +} +``` + + + + + + + + + + + + + + + + + + + +# References + +[[block]] Specification of the block data structure. + +[[RPC]] RPC client for Tendermint + +[[fork-detector]] The specification of the light client fork detector. + +[[fullnode]] Specification of the full node API + +[[ibc-rs]] Rust implementation of IBC modules and relayer. + +[[lightclient]] The light client ADR [77d2651 on Dec 27, 2019]. + +[RPC]: https://docs.tendermint.com/master/rpc/ + +[block]: https://github.com/tendermint/spec/blob/d46cd7f573a2c6a2399fcab2cde981330aa63f37/spec/core/data_structures.md + +[TMBC-HEADER-link]: #tmbc-header1 +[TMBC-SEQ-link]: #tmbc-seq1 +[TMBC-CorrFull-link]: #tmbc-corr-full1 +[TMBC-Auth-Byz-link]: #tmbc-auth-byz1 +[TMBC-TIME_PARAMS-link]: #tmbc-time-params1 +[TMBC-FM-2THIRDS-link]: #tmbc-fm-2thirds1 +[TMBC-VAL-CONTAINS-CORR-link]: #tmbc-val-contains-corr1 +[TMBC-VAL-COMMIT-link]: #tmbc-val-commit1 +[TMBC-SOUND-DISTR-POSS-COMMIT-link]: #tmbc-sound-distr-poss-commit1 + +[lightclient]: https://github.com/interchainio/tendermint-rs/blob/e2cb9aca0b95430fca2eac154edddc9588038982/docs/architecture/adr-002-lite-client.md +[fork-detector]: https://github.com/informalsystems/tendermint-rs/blob/master/docs/spec/lightclient/detection.md +[fullnode]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md + +[ibc-rs]:https://github.com/informalsystems/ibc-rs + +[FN-LuckyCase-link]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md#fn-luckycase + +[blockchain-validator-set]: https://github.com/tendermint/spec/blob/master/spec/blockchain/blockchain.md#data-structures +[fullnode-data-structures]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md#data-structures + +[FN-ManifestFaulty-link]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md#fn-manifestfaulty + +[arXiv]: https://arxiv.org/abs/1807.04938 \ No newline at end of file diff --git a/spec/light-client/verification/verification_002_draft.md b/spec/light-client/verification/verification_002_draft.md new file mode 100644 index 000000000..9047eb417 --- /dev/null +++ b/spec/light-client/verification/verification_002_draft.md @@ -0,0 +1,1061 @@ +# Light Client Verification + +The light client implements a read operation of a +[header][TMBC-HEADER-link] from the [blockchain][TMBC-SEQ-link], by +communicating with full nodes. As some full nodes may be faulty, this +functionality must be implemented in a fault-tolerant way. + +In the Tendermint blockchain, the validator set may change with every +new block. The staking and unbonding mechanism induces a [security +model][TMBC-FM-2THIRDS-link]: starting at time *Time* of the +[header][TMBC-HEADER-link], +more than two-thirds of the next validators of a new block are correct +for the duration of *TrustedPeriod*. The fault-tolerant read +operation is designed for this security model. + +The challenge addressed here is that the light client might have a +block of height *h1* and needs to read the block of height *h2* +greater than *h1*. Checking all headers of heights from *h1* to *h2* +might be too costly (e.g., in terms of energy for mobile devices). +This specification tries to reduce the number of intermediate blocks +that need to be checked, by exploiting the guarantees provided by the +[security model][TMBC-FM-2THIRDS-link]. + +# Status + +## Previous Versions + +- [[001_published]](./verification_001_published.md) + is thoroughly reviewed, and the protocol has been +formalized in TLA+ and model checked. + +## Issues that are addressed in this revision + +As it is part of the larger light node, its data structures and +functions interact with the attack dectection functionality of the light +client. As a result of the work on + +- [attack detection](https://github.com/tendermint/spec/pull/164) for light nodes + +- attack detection for IBC and [relayer requirements](https://github.com/informalsystems/tendermint-rs/issues/497) + +- light client + [supervisor](https://github.com/tendermint/spec/pull/159) (also in + [Rust proposal](https://github.com/informalsystems/tendermint-rs/pull/509)) + +adaptations to the semantics and functions exposed by the LightStore +needed to be made. In contrast to [version +001](./verification_001_published.md) we specify the following: + +- `VerifyToTarget` and `Backwards` are called with a single lightblock + as root of trust in contrast to passing the complete lightstore. + +- During verification, we record for each lightblock which other + lightblock can be used to verify it in one step. This is needed to + generate verification traces that are needed for IBC. + +# Outline + +- [Part I](#part-i---tendermint-blockchain): Introduction of + relevant terms of the Tendermint +blockchain. + +- [Part II](#part-ii---sequential-definition-of-the-verification-problem): Introduction +of the problem addressed by the Lightclient Verification protocol. + - [Verification Informal Problem + statement](#Verification-Informal-Problem-statement): For the general + audience, that is, engineers who want to get an overview over what + the component is doing from a bird's eye view. + - [Sequential Problem statement](#Sequential-Problem-statement): + Provides a mathematical definition of the problem statement in + its sequential form, that is, ignoring the distributed aspect of + the implementation of the blockchain. + +- [Part III](#part-iii---light-client-as-distributed-system): Distributed + aspects of the light client, system assumptions and temporal + logic specifications. + + - [Incentives](#incentives): how faulty full nodes may benefit from + misbehaving and how correct full nodes benefit from cooperating. + + - [Computational Model](#Computational-Model): + timing and correctness assumptions. + + - [Distributed Problem Statement](#Distributed-Problem-Statement): + temporal properties that formalize safety and liveness + properties in the distributed setting. + +- [Part IV](#part-iv---light-client-verification-protocol): + Specification of the protocols. + + - [Definitions](#Definitions): Describes inputs, outputs, + variables used by the protocol, auxiliary functions + + - [Core Verification](#core-verification): gives an outline of the solution, + and details of the functions used (with preconditions, + postconditions, error conditions). + + - [Liveness Scenarios](#liveness-scenarios): when the light + client makes progress depends heavily on the changes in the + validator sets of the blockchain. We discuss some typical scenarios. + +- [Part V](#part-v---supporting-the-ibc-relayer): The above parts + focus on a common case where the last verified block has height *h1* + and the + requested height *h2* satisfies *h2 > h1*. For IBC, there are + scenarios where this might not be the case. In this part, we provide + some preliminaries for supporting this. As not all details of the + IBC requirements are clear by now, we do not provide a complete + specification at this point. We mark with "Open Question" points + that need to be addressed in order to finalize this specification. + It should be noted that the technically + most challenging case is the one specified in Part IV. + +In this document we quite extensively use tags in order to be able to +reference assumptions, invariants, etc. in future communication. In +these tags we frequently use the following short forms: + +- TMBC: Tendermint blockchain +- SEQ: for sequential specifications +- LCV: Lightclient Verification +- LIVE: liveness +- SAFE: safety +- FUNC: function +- INV: invariant +- A: assumption + +# Part I - Tendermint Blockchain + +## Header Fields necessary for the Light Client + +#### **[TMBC-HEADER.1]** + +A set of blockchain transactions is stored in a data structure called +*block*, which contains a field called *header*. (The data structure +*block* is defined [here][block]). As the header contains hashes to +the relevant fields of the block, for the purpose of this +specification, we will assume that the blockchain is a list of +headers, rather than a list of blocks. + +#### **[TMBC-HASH-UNIQUENESS.1]** + +We assume that every hash in the header identifies the data it hashes. +Therefore, in this specification, we do not distinguish between hashes and the +data they represent. + +#### **[TMBC-HEADER-FIELDS.2]** + +A header contains the following fields: + +- `Height`: non-negative integer +- `Time`: time (non-negative integer) +- `LastBlockID`: Hashvalue +- `LastCommit` DomainCommit +- `Validators`: DomainVal +- `NextValidators`: DomainVal +- `Data`: DomainTX +- `AppState`: DomainApp +- `LastResults`: DomainRes + +#### **[TMBC-SEQ.1]** + +The Tendermint blockchain is a list *chain* of headers. + +#### **[TMBC-VALIDATOR-PAIR.1]** + +Given a full node, a +*validator pair* is a pair *(peerID, voting_power)*, where + +- *peerID* is the PeerID (public key) of a full node, +- *voting_power* is an integer (representing the full node's + voting power in a certain consensus instance). + +> In the Golang implementation the data type for *validator +pair* is called `Validator` + +#### **[TMBC-VALIDATOR-SET.1]** + +A *validator set* is a set of validator pairs. For a validator set +*vs*, we write *TotalVotingPower(vs)* for the sum of the voting powers +of its validator pairs. + +#### **[TMBC-VOTE.1]** + +A *vote* contains a `prevote` or `precommit` message sent and signed by +a validator node during the execution of [consensus][arXiv]. Each +message contains the following fields + +- `Type`: prevote or precommit +- `Height`: positive integer +- `Round` a positive integer +- `BlockID` a Hashvalue of a block (not necessarily a block of the chain) + +#### **[TMBC-COMMIT.1]** + +A commit is a set of `precommit` message. + +## Tendermint Failure Model + +#### **[TMBC-AUTH-BYZ.1]** + +We assume the authenticated Byzantine fault model in which no node (faulty or +correct) may break digital signatures, but otherwise, no additional +assumption is made about the internal behavior of faulty +nodes. That is, faulty nodes are only limited in that they cannot forge +messages. + +#### **[TMBC-TIME-PARAMS.1]** + +A Tendermint blockchain has the following configuration parameters: + +- *unbondingPeriod*: a time duration. +- *trustingPeriod*: a time duration smaller than *unbondingPeriod*. + +#### **[TMBC-CORRECT.1]** + +We define a predicate *correctUntil(n, t)*, where *n* is a node and *t* is a +time point. +The predicate *correctUntil(n, t)* is true if and only if the node *n* +follows all the protocols (at least) until time *t*. + +#### **[TMBC-FM-2THIRDS.1]** + +If a block *h* is in the chain, +then there exists a subset *CorrV* +of *h.NextValidators*, such that: + +- *TotalVotingPower(CorrV) > 2/3 + TotalVotingPower(h.NextValidators)*; cf. [TMBC-VALIDATOR-SET.1] +- For every validator pair *(n,p)* in *CorrV*, it holds *correctUntil(n, + h.Time + trustingPeriod)*; cf. [TMBC-CORRECT.1] + +> The definition of correct +> [**[TMBC-CORRECT.1]**][TMBC-CORRECT-link] refers to realtime, while it +> is used here with *Time* and *trustingPeriod*, which are "hardware +> times". We do not make a distinction here. + +#### **[TMBC-CORR-FULL.1]** + +Every correct full node locally stores a prefix of the +current list of headers from [**[TMBC-SEQ.1]**][TMBC-SEQ-link]. + +## What the Light Client Checks + +> From [TMBC-FM-2THIRDS.1] we directly derive the following observation: + +#### **[TMBC-VAL-CONTAINS-CORR.1]** + +Given a (trusted) block *tb* of the blockchain, a given set of full nodes +*N* contains a correct node at a real-time *t*, if + +- *t - trustingPeriod < tb.Time < t* +- the voting power in tb.NextValidators of nodes in *N* is more + than 1/3 of *TotalVotingPower(tb.NextValidators)* + +> The following describes how a commit for a given block *b* must look +> like. + +#### **[TMBC-SOUND-DISTR-POSS-COMMIT.1]** + +For a block *b*, each element *pc* of *PossibleCommit(b)* satisfies: + +- *pc* contains only votes (cf. [TMBC-VOTE.1]) + by validators from *b.Validators* +- the sum of the voting powers in *pc* is greater than 2/3 + *TotalVotingPower(b.Validators)* +- and there is an *r* such that each vote *v* in *pc* satisfies + - v.Type = precommit + - v.Height = b.Height + - v.Round = r + - v.blockID = hash(b) + +> The following property comes from the validity of the [consensus][arXiv]: A +> correct validator node only sends `prevote` or `precommit`, if +> `BlockID` of the new (to-be-decided) block is equal to the hash of +> the last block. + +#### **[TMBC-VAL-COMMIT.1]** + +If for a block *b*, a commit *c* + +- contains at least one validator pair *(v,p)* such that *v* is a + **correct** validator node, and +- is contained in *PossibleCommit(b)* + +then the block *b* is on the blockchain. + +## Context of this document + +In this document we specify the light client verification component, +called *Core Verification*. The *Core Verification* communicates with +a full node. As full nodes may be faulty, it cannot trust the +received information, but the light client has to check whether the +header it receives coincides with the one generated by Tendermint +consensus. + +The two + properties [[TMBC-VAL-CONTAINS-CORR.1]][TMBC-VAL-CONTAINS-CORR-link] and +[[TMBC-VAL-COMMIT]][TMBC-VAL-COMMIT-link] formalize the checks done + by this specification: +Given a trusted block *tb* and an untrusted block *ub* with a commit *cub*, +one has to check that *cub* is in *PossibleCommit(ub)*, and that *cub* +contains a correct node using *tb*. + +# Part II - Sequential Definition of the Verification Problem + +## Verification Informal Problem statement + +Given a height *targetHeight* as an input, the *Verifier* eventually +stores a header *h* of height *targetHeight* locally. This header *h* +is generated by the Tendermint [blockchain][block]. In +particular, a header that was not generated by the blockchain should +never be stored. + +## Sequential Problem statement + +#### **[LCV-SEQ-LIVE.1]** + +The *Verifier* gets as input a height *targetHeight*, and eventually stores the +header of height *targetHeight* of the blockchain. + +#### **[LCV-SEQ-SAFE.1]** + +The *Verifier* never stores a header which is not in the blockchain. + +# Part III - Light Client as Distributed System + +## Incentives + +Faulty full nodes may benefit from lying to the light client, by making the +light client accept a block that deviates (e.g., contains additional +transactions) from the one generated by Tendermint consensus. +Users using the light client might be harmed by accepting a forged header. + +The [attack detector][attack-detector] of the light client may help the +correct full nodes to understand whether their header is a good one. +Hence, in combination with the light client detector, the correct full +nodes have the incentive to respond. We can thus base liveness +arguments on the assumption that correct full nodes reliably talk to +the light client. + +## Computational Model + +#### **[LCV-A-PEER.1]** + +The verifier communicates with a full node called *primary*. No assumption is made about the full node (it may be correct or faulty). + +#### **[LCV-A-COMM.1]** + +Communication between the light client and a correct full node is +reliable and bounded in time. Reliable communication means that +messages are not lost, not duplicated, and eventually delivered. There +is a (known) end-to-end delay *Delta*, such that if a message is sent +at time *t* then it is received and processes by time *t + Delta*. +This implies that we need a timeout of at least *2 Delta* for remote +procedure calls to ensure that the response of a correct peer arrives +before the timeout expires. + +#### **[LCV-A-TFM.1]** + +The Tendermint blockchain satisfies the Tendermint failure model [**[TMBC-FM-2THIRDS.1]**][TMBC-FM-2THIRDS-link]. + +#### **[LCV-A-VAL.1]** + +The system satisfies [**[TMBC-AUTH-BYZ.1]**][TMBC-Auth-Byz-link] and +[**[TMBC-FM-2THIRDS.1]**][TMBC-FM-2THIRDS-link]. Thus, there is a +blockchain that satisfies the soundness requirements (that is, the +validation rules in [[block]]). + +## Distributed Problem Statement + +### Two Kinds of Termination + +We do not assume that *primary* is correct. Under this assumption no +protocol can guarantee the combination of the sequential +properties. Thus, in the (unreliable) distributed setting, we consider +two kinds of termination (successful and failure) and we will specify +below under what (favorable) conditions *Core Verification* ensures to +terminate successfully, and satisfy the requirements of the sequential +problem statement: + +#### **[LCV-DIST-TERM.1]** + +*Core Verification* either *terminates +successfully* or it *terminates with failure*. + +### Design choices + +#### **[LCV-DIST-STORE.2]** + +*Core Verification* returns a data structure called *LightStore* that +contains light blocks (that contain a header). + +#### **[LCV-DIST-INIT.2]** + +*Core Verification* is called with + +- *primary*: the PeerID of a full node (with verification communicates) +- *root*: a light block (the root of trust) +- *targetHeight*: a height (the height of a header that should be obtained) + +### Temporal Properties + +#### **[LCV-DIST-SAFE.2]** + +It is always the case that every header in *LightStore* was +generated by an instance of Tendermint consensus. + +#### **[LCV-DIST-LIVE.2]** + +If a new instance of *Core Verification* is called with a +height *targetHeight* greater than root.Header.Height it must +must eventually terminate. + +- If + - the *primary* is correct (and locally has the block of + *targetHeight*), and + - the age of root is always less than the trusting period, + then *Core Verification* adds a verified header *hd* with height + *targetHeight* to *LightStore* and it **terminates successfully** + +> These definitions imply that if the primary is faulty, a header may or +> may not be added to *LightStore*. In any case, +> [**[LCV-DIST-SAFE.2]**](#lcv-dist-safe2) must hold. +> The invariant [**[LCV-DIST-SAFE.2]**](#lcv-dist-safe2) and the liveness +> requirement [**[LCV-DIST-LIVE.2]**](#lcv-dist-life) +> allow that verified headers are added to *LightStore* whose +> height was not passed +> to the verifier (e.g., intermediate headers used in bisection; see below). +> Note that for liveness, initially having a *root* within +> the *trustinPeriod* is not sufficient. However, as this +> specification will leave some freedom with respect to the strategy +> in which order to download intermediate headers, we do not give a +> more precise liveness specification here. After giving the +> specification of the protocol, we will discuss some liveness +> scenarios [below](#liveness-scenarios). + +### Solving the sequential specification + +This specification provides a partial solution to the sequential specification. +The *Verifier* solves the invariant of the sequential part + +[**[LCV-DIST-SAFE.2]**](#lcv-dist-safe2) => [**[LCV-SEQ-SAFE.1]**](#lcv-seq-safe1) + +In the case the primary is correct, and *root* is a recent header in *LightStore*, the verifier satisfies the liveness requirements. + +⋀ *primary is correct* +⋀ *root.header.Time* > *now* - *trustingPeriod* +⋀ [**[LCV-A-Comm.1]**](#lcv-a-comm) ⋀ ( + ( [**[TMBC-CorrFull.1]**][TMBC-CorrFull-link] ⋀ + [**[LCV-DIST-LIVE.2]**](#lcv-dist-live2) ) + ⟹ [**[LCV-SEQ-LIVE.1]**](#lcv-seq-live1) +) + +# Part IV - Light Client Verification Protocol + +We provide a specification for Light Client Verification. The local +code for verification is presented by a sequential function +`VerifyToTarget` to highlight the control flow of this functionality. +We note that if a different concurrency model is considered for +an implementation, the sequential flow of the function may be +implemented with mutexes, etc. However, the light client verification +is partitioned into three blocks that can be implemented and tested +independently: + +- `FetchLightBlock` is called to download a light block (header) of a + given height from a peer. +- `ValidAndVerified` is a local code that checks the header. +- `Schedule` decides which height to try to verify next. We keep this + underspecified as different implementations (currently in Goland and + Rust) may implement different optimizations here. We just provide + necessary conditions on how the height may evolve. + + + + +## Definitions + +### Data Types + +The core data structure of the protocol is the LightBlock. + +#### **[LCV-DATA-LIGHTBLOCK.1]** + +```go +type LightBlock struct { + Header Header + Commit Commit + Validators ValidatorSet +} +``` + +#### **[LCV-DATA-LIGHTSTORE.2]** + +LightBlocks are stored in a structure which stores all LightBlock from +initialization or received from peers. + +```go +type LightStore struct { + ... +} + +``` + +#### **[LCV-DATA-LS-ROOT.2]** + +For each lightblock in a lightstore we record in a field `verification-root` of +type Height. + +> `verification-root` records the height of a lightblock that can be used to verify +> the lightblock in one step + +#### **[LCV-INV-LS-ROOT.2]** + +At all times, if a lightblock *b* in a lightstore has *b.verification-root = h*, +then + +- the lightstore contains a lightblock with height *h*, or +- *b* has the minimal height of all lightblocks in lightstore, then + b.verification-root should be nil. + +The LightStore exposes the following functions to query stored LightBlocks. + +#### **[LCV-DATA-LS-STATE.1]** + +Each LightBlock is in one of the following states: + +```go +type VerifiedState int + +const ( + StateUnverified = iota + 1 + StateVerified + StateFailed + StateTrusted +) +``` + +#### **[LCV-FUNC-GET.1]** + +```go +func (ls LightStore) Get(height Height) (LightBlock, bool) +``` + +- Expected postcondition + - returns a LightBlock at a given height or false in the second argument if + the LightStore does not contain the specified LightBlock. + +#### **[LCV-FUNC-LATEST.1]** + +```go +func (ls LightStore) Latest() LightBlock +``` + +- Expected postcondition + - returns the highest light block + +#### **[LCV-FUNC-ADD.1]** + +```go +func (ls LightStore) Add(newBlock) +``` + +- Expected precondition + - the lightstore is empty +- Expected postcondition + - adds newBlock into light store + +#### **[LCV-FUNC-STORE.1]** + +```go +func (ls LightStore) store_chain(newLS LightStore) +``` + +- Expected postcondition + - adds `newLS` to the lightStore. + +#### **[LCV-FUNC-LATEST-VERIF.2]** + +```go +func (ls LightStore) LatestVerified() LightBlock +``` + +- Expected postcondition + - returns the highest light block whose state is `StateVerified` + +#### **[LCV-FUNC-FILTER.1]** + +```go +func (ls LightStore) FilterVerified() LightStore +``` + +- Expected postcondition + - returns all the lightblocks of the lightstore with state `StateVerified` + +#### **[LCV-FUNC-UPDATE.2]** + +```go +func (ls LightStore) Update(lightBlock LightBlock, verfiedState +VerifiedState, root-height Height) +``` + +- Expected postcondition + - the lightblock is part of the lightstore + - The state of the LightBlock is set to *verifiedState*. + - The verification-root of the LightBlock is set to *root-height* + +```go +func (ls LightStore) TraceTo(lightBlock LightBlock) (LightBlock, LightStore) +``` + +- Expected postcondition + - returns a **trusted** lightblock `root` from the lightstore with a height + less than `lightBlock` + - returns a lightstore that contains lightblocks that constitute a + [verification trace](TODOlinkToDetectorSpecOnceThere) from + `root` to `lightBlock` (including `lightBlock`) + +### Inputs + +- *root*: A light block that is trusted +- *primary*: peerID +- *targetHeight*: the height of the needed header + +### Configuration Parameters + +- *trustThreshold*: a float. Can be used if correctness should not be based on more voting power and 1/3. +- *trustingPeriod*: a time duration [**[TMBC-TIME_PARAMS.1]**][TMBC-TIME_PARAMS-link]. +- *clockDrift*: a time duration. Correction parameter dealing with only approximately synchronized clocks. + +### Variables + +- *nextHeight*: initially *targetHeight* + > *nextHeight* should be thought of the "height of the next header we need + > to download and verify" + +### Assumptions + +#### **[LCV-A-INIT.2]** + +- *root* is from the blockchain + +- *targetHeight > root.Header.Height* + +### Invariants + +#### **[LCV-INV-TP.1]** + +It is always the case that *LightStore.LatestTrusted.Header.Time > now - trustingPeriod*. + +> If the invariant is violated, the light client does not have a +> header it can trust. A trusted header must be obtained externally, +> its trust can only be based on social consensus. +> We use the convention that root is assumed to be verified. + +### Used Remote Functions + +We use the functions `commit` and `validators` that are provided +by the [RPC client for Tendermint][RPC]. + +```go +func Commit(height int64) (SignedHeader, error) +``` + +- Implementation remark + - RPC to full node *n* + - JSON sent: + +```javascript +// POST /commit +{ + "jsonrpc": "2.0", + "id": "ccc84631-dfdb-4adc-b88c-5291ea3c2cfb", // UUID v4, unique per request + "method": "commit", + "params": { + "height": 1234 + } +} +``` + +- Expected precondition + - header of `height` exists on blockchain +- Expected postcondition + - if *n* is correct: Returns the signed header of height `height` + from the blockchain if communication is timely (no timeout) + - if *n* is faulty: Returns a signed header with arbitrary content +- Error condition + - if *n* is correct: precondition violated or timeout + - if *n* is faulty: arbitrary error + +----; + +```go +func Validators(height int64) (ValidatorSet, error) +``` + +- Implementation remark + - RPC to full node *n* + - JSON sent: + +```javascript +// POST /validators +{ + "jsonrpc": "2.0", + "id": "ccc84631-dfdb-4adc-b88c-5291ea3c2cfb", // UUID v4, unique per request + "method": "validators", + "params": { + "height": 1234 + } +} +``` + +- Expected precondition + - header of `height` exists on blockchain +- Expected postcondition + - if *n* is correct: Returns the validator set of height `height` + from the blockchain if communication is timely (no timeout) + - if *n* is faulty: Returns arbitrary validator set +- Error condition + - if *n* is correct: precondition violated or timeout + - if *n* is faulty: arbitrary error + +----; + +### Communicating Function + +#### **[LCV-FUNC-FETCH.1]** + + ```go +func FetchLightBlock(peer PeerID, height Height) LightBlock +``` + +- Implementation remark + - RPC to peer at *PeerID* + - calls `Commit` for *height* and `Validators` for *height* and *height+1* +- Expected precondition + - `height` is less than or equal to height of the peer **[LCV-IO-PRE-HEIGHT.1]** +- Expected postcondition: + - if *node* is correct: + - Returns the LightBlock *lb* of height `height` + that is consistent with the blockchain + - *lb.provider = peer* **[LCV-IO-POST-PROVIDER.1]** + - *lb.Header* is a header consistent with the blockchain + - *lb.Validators* is the validator set of the blockchain at height *nextHeight* + - *lb.NextValidators* is the validator set of the blockchain at height *nextHeight + 1* + - if *node* is faulty: Returns a LightBlock with arbitrary content + [**[TMBC-AUTH-BYZ.1]**][TMBC-Auth-Byz-link] +- Error condition + - if *n* is correct: precondition violated + - if *n* is faulty: arbitrary error + - if *lb.provider != peer* + - times out after 2 Delta (by assumption *n* is faulty) + +----; + +## Core Verification + +### Outline + +The `VerifyToTarget` is the main function and uses the following functions. + +- `FetchLightBlock` is called to download the next light block. It is + the only function that communicates with other nodes +- `ValidAndVerified` checks whether header is valid and checks if a + new lightBlock should be trusted + based on a previously verified lightBlock. +- `Schedule` decides which height to try to verify next + +In the following description of `VerifyToTarget` we do not deal with error +handling. If any of the above function returns an error, VerifyToTarget just +passes the error on. + +#### **[LCV-FUNC-MAIN.2]** + +```go +func VerifyToTarget(primary PeerID, root LightBlock, + targetHeight Height) (LightStore, Result) { + + lightStore = new LightStore; + lightStore.Update(root, StateVerified, root.verifiedBy); + nextHeight := targetHeight; + + for lightStore.LatestVerified.height < targetHeight { + + // Get next LightBlock for verification + current, found := lightStore.Get(nextHeight) + if !found { + current = FetchLightBlock(primary, nextHeight) + lightStore.Update(current, StateUnverified, nil) + } + + // Verify + verdict = ValidAndVerified(lightStore.LatestVerified, current) + + // Decide whether/how to continue + if verdict == SUCCESS { + lightStore.Update(current, StateVerified, lightStore.LatestVerified.Height) + } + else if verdict == NOT_ENOUGH_TRUST { + // do nothing + // the light block current passed validation, but the validator + // set is too different to verify it. We keep the state of + // current at StateUnverified. For a later iteration, Schedule + // might decide to try verification of that light block again. + } + else { + // verdict is some error code + lightStore.Update(current, StateFailed, nil) + return (nil, ResultFailure) + } + nextHeight = Schedule(lightStore, nextHeight, targetHeight) + } + return (lightStore.FilterVerified, ResultSuccess) +} +``` + +- Expected precondition + - *root* is within the *trustingPeriod* **[LCV-PRE-TP.1]** + - *targetHeight* is greater than the height of *root* +- Expected postcondition: + - returns *lightStore* that contains a LightBlock that corresponds to a block + of the blockchain of height *targetHeight* + (that is, the LightBlock has been added to *lightStore*) **[LCV-POST-LS.1]** +- Error conditions + - if the precondition is violated + - if `ValidAndVerified` or `FetchLightBlock` report an error + - if [**[LCV-INV-TP.1]**](#LCV-INV-TP.1) is violated + +### Details of the Functions + +#### **[LCV-FUNC-VALID.2]** + +```go +func ValidAndVerified(trusted LightBlock, untrusted LightBlock) Result +``` + +- Expected precondition: + - *untrusted* is valid, that is, satisfies the soundness [checks][block] + - *untrusted* is **well-formed**, that is, + - *untrusted.Header.Time < now + clockDrift* + - *untrusted.Validators = hash(untrusted.Header.Validators)* + - *untrusted.NextValidators = hash(untrusted.Header.NextValidators)* + - *trusted.Header.Time > now - trustingPeriod* + - the `Height` and `Time` of `trusted` are smaller than the Height and + `Time` of `untrusted`, respectively + - the *untrusted.Header* is well-formed (passes the tests from + [[block]]), and in particular + - if the untrusted header `unstrusted.Header` is the immediate + successor of `trusted.Header`, then it holds that + - *trusted.Header.NextValidators = + untrusted.Header.Validators*, and + moreover, + - *untrusted.Header.Commit* + - contains signatures by more than two-thirds of the validators + - contains no signature from nodes that are not in *trusted.Header.NextValidators* +- Expected postcondition: + - Returns `SUCCESS`: + - if *untrusted* is the immediate successor of *trusted*, or otherwise, + - if the signatures of a set of validators that have more than + *max(1/3,trustThreshold)* of voting power in + *trusted.Nextvalidators* is contained in + *untrusted.Commit* (that is, header passes the tests + [**[TMBC-VAL-CONTAINS-CORR.1]**][TMBC-VAL-CONTAINS-CORR-link] + and [**[TMBC-VAL-COMMIT.1]**][TMBC-VAL-COMMIT-link]) + - Returns `NOT_ENOUGH_TRUST` if: + - *untrusted* is *not* the immediate successor of + *trusted* + and the *max(1/3,trustThreshold)* threshold is not reached + (that is, if + [**[TMBC-VAL-CONTAINS-CORR.1]**][TMBC-VAL-CONTAINS-CORR-link] + fails and header is does not violate the soundness + checks [[block]]). +- Error condition: + - if precondition violated + +----; + +#### **[LCV-FUNC-SCHEDULE.1]** + +```go +func Schedule(lightStore, nextHeight, targetHeight) Height +``` + +- Implementation remark: If picks the next height to be verified. + We keep the precise choice of the next header under-specified. It is + subject to performance optimizations that do not influence the correctness +- Expected postcondition: **[LCV-SCHEDULE-POST.1]** + Return *H* s.t. + 1. if *lightStore.LatestVerified.Height = nextHeight* and + *lightStore.LatestVerified < targetHeight* then + *nextHeight < H <= targetHeight* + 2. if *lightStore.LatestVerified.Height < nextHeight* and + *lightStore.LatestVerified.Height < targetHeight* then + *lightStore.LatestVerified.Height < H < nextHeight* + 3. if *lightStore.LatestVerified.Height = targetHeight* then + *H = targetHeight* + +> Case i. captures the case where the light block at height *nextHeight* +> has been verified, and we can choose a height closer to the *targetHeight*. +> As we get the *lightStore* as parameter, the choice of the next height can +> depend on the *lightStore*, e.g., we can pick a height for which we have +> already downloaded a light block. +> In Case ii. the header of *nextHeight* could not be verified, and we need to pick a smaller height. +> In Case iii. is a special case when we have verified the *targetHeight*. + +### Solving the distributed specification + +Analogous to [[001_published]](./verification_001_published.md#solving-the-distributed-specification) + +## Liveness Scenarios + +Analogous to [[001_published]](./verification_001_published.md#liveness-scenarios) + +# Part V - Supporting the IBC Relayer + +The above specification focuses on the most common case, which also +constitutes the most challenging task: using the Tendermint [security +model][TMBC-FM-2THIRDS-link] to verify light blocks without +downloading all intermediate blocks. To focus on this challenge, above +we have restricted ourselves to the case where *targetHeight* is +greater than the height of any trusted header. This simplified +presentation of the algorithm as initially +`lightStore.LatestVerified()` is less than *targetHeight*, and in the +process of verification `lightStore.LatestVerified()` increases until +*targetHeight* is reached. + +For [IBC][ibc-rs] there are two additional challenges: + +1. it might be that some "older" header is needed, that is, +*targetHeight < lightStore.LatestVerified()*. The +[supervisor](../supervisor/supervisor.md) checks whether it is in this +case by calling `LatestPrevious` and `MinVerified` and if so it calls +`Backwards`. All these functions are specified below. + +2. In order to submit proof of a light client attack, a relayer may + need to submit a verification trace. This it is important to + compute such a trace efficiently. That it can be done is based on + the invariant [[LCV-INV-LS-ROOT.2]](#LCV-INV-LS-ROOT2) that needs + to be maintained by the light client. In particular + `VerifyToTarget` and `Backwards` need to take care of setting + `verification-root`. + +#### **[LCV-FUNC-LATEST-PREV.2]** + +```go +func (ls LightStore) LatestPrevious(height Height) (LightBlock, bool) +``` + +- Expected postcondition + - returns a light block *lb* that satisfies: + - *lb* is in lightStore + - *lb* is in StateTrusted + - *lb* is not expired + - *lb.Header.Height < height* + - for all *b* in lightStore s.t. *b* is trusted and not expired it + holds *lb.Header.Height >= b.Header.Height* + - *false* in the second argument if + the LightStore does not contain such an *lb*. + +----; + +#### **[LCV-FUNC-LOWEST.2]** + +```go +func (ls LightStore) Lowest() (LightBlock) +``` + +- Expected postcondition + - returns the lowest trusted light block within trusting period + +----; + +#### **[LCV-FUNC-MIN.2]** + +```go +func (ls LightStore) MinVerified() (LightBlock, bool) +``` + +- Expected postcondition + - returns a light block *lb* that satisfies: + - *lb* is in lightStore + - *lb.Header.Height* is minimal in the lightStore + - *false* in the second argument if + the LightStore does not contain such an *lb*. + +If a height that is smaller than the smallest height in the lightstore +is required, we check the hashes backwards. This is done with the +following function: + +#### **[LCV-FUNC-BACKWARDS.2]** + +```go +func Backwards (primary PeerID, root LightBlock, targetHeight Height) + (LightStore, Result) { + + lb := root; + lightStore := new LightStore; + lightStore.Update(lb, StateTrusted, lb.verifiedBy) + + latest := lb.Header + for i := lb.Header.height - 1; i >= targetHeight; i-- { + // here we download height-by-height. We might first download all + // headers down to targetHeight and then check them. + current := FetchLightBlock(primary,i) + if (hash(current) != latest.Header.LastBlockId) { + return (nil, ResultFailure) + } + else { + // latest and current are linked together by LastBlockId + // therefore it is not relevant which we verified first + // for consistency, we store latest was veried using + // current so that the verifiedBy is always pointing down + // the chain + lightStore.Update(current, StateTrusted, nil) + lightStore.Update(latest, StateTrusted, current.Header.Height) + } + latest = current + } + return (lightStore, ResultSuccess) +} +``` + +# References + +[[block]] Specification of the block data structure. + +[[RPC]] RPC client for Tendermint + +[[attack-detector]] The specification of the light client attack detector. + +[[fullnode]] Specification of the full node API + +[[ibc-rs]] Rust implementation of IBC modules and relayer. + +[[lightclient]] The light client ADR [77d2651 on Dec 27, 2019]. + +[RPC]: https://docs.tendermint.com/master/rpc/ + +[block]: https://github.com/tendermint/spec/blob/d46cd7f573a2c6a2399fcab2cde981330aa63f37/spec/core/data_structures.md + +[TMBC-HEADER-link]: #tmbc-header1 +[TMBC-SEQ-link]: #tmbc-seq1 +[TMBC-CorrFull-link]: #tmbc-corr-full1 +[TMBC-Auth-Byz-link]: #tmbc-auth-byz1 +[TMBC-TIME_PARAMS-link]: #tmbc-time-params1 +[TMBC-FM-2THIRDS-link]: #tmbc-fm-2thirds1 +[TMBC-VAL-CONTAINS-CORR-link]: #tmbc-val-contains-corr1 +[TMBC-VAL-COMMIT-link]: #tmbc-val-commit1 +[TMBC-SOUND-DISTR-POSS-COMMIT-link]: #tmbc-sound-distr-poss-commit1 + +[lightclient]: https://github.com/interchainio/tendermint-rs/blob/e2cb9aca0b95430fca2eac154edddc9588038982/docs/architecture/adr-002-lite-client.md +[attack-detector]: https://github.com/tendermint/spec/blob/master/rust-spec/lightclient/detection/detection_001_reviewed.md +[fullnode]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md + +[ibc-rs]:https://github.com/informalsystems/ibc-rs + +[blockchain-validator-set]: https://github.com/tendermint/spec/blob/master/spec/blockchain/blockchain.md#data-structures +[fullnode-data-structures]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md#data-structures + +[FN-ManifestFaulty-link]: https://github.com/tendermint/spec/blob/master/spec/blockchain/fullnode.md#fn-manifestfaulty + +[arXiv]: https://arxiv.org/abs/1807.04938 diff --git a/spec/light-client/verification/verification_003_draft.md b/spec/light-client/verification/verification_003_draft.md new file mode 100644 index 000000000..cd38e7e96 --- /dev/null +++ b/spec/light-client/verification/verification_003_draft.md @@ -0,0 +1,76 @@ +# Light Client Verificaiton + +#### **[LCV-FUNC-VERIFYCOMMITLIGHT.1]** + +VerifyCommitLight verifies that 2/3+ of the signatures for a validator set were for +a given blockID. The function will finish early and thus may not check all signatures. + +```go +func VerifyCommitLight(chainID string, vals *ValidatorSet, blockID BlockID, +height int64, commit *Commit) error { + // run a basic validation of the arguments + if err := verifyBasicValsAndCommit(vals, commit, height, blockID); err != nil { + return err + } + + // calculate voting power needed + votingPowerNeeded := vals.TotalVotingPower() * 2 / 3 + + var ( + val *Validator + valIdx int32 + seenVals = make(map[int32]int, len(commit.Signatures)) + talliedVotingPower int64 = 0 + voteSignBytes []byte + ) + for idx, commitSig := range commit.Signatures { + // ignore all commit signatures that are not for the block + if !commitSig.ForBlock() { + continue + } + + // If the vals and commit have a 1-to-1 correspondance we can retrieve + // them by index else we need to retrieve them by address + if lookUpByIndex { + val = vals.Validators[idx] + } else { + valIdx, val = vals.GetByAddress(commitSig.ValidatorAddress) + + // if the signature doesn't belong to anyone in the validator set + // then we just skip over it + if val == nil { + continue + } + + // because we are getting validators by address we need to make sure + // that the same validator doesn't commit twice + if firstIndex, ok := seenVals[valIdx]; ok { + secondIndex := idx + return fmt.Errorf("double vote from %v (%d and %d)", val, firstIndex, secondIndex) + } + seenVals[valIdx] = idx + } + + voteSignBytes = commit.VoteSignBytes(chainID, int32(idx)) + + if !val.PubKey.VerifySignature(voteSignBytes, commitSig.Signature) { + return fmt.Errorf("wrong signature (#%d): %X", idx, commitSig.Signature) + } + + // Add the voting power of the validator + // to the tally + talliedVotingPower += val.VotingPower + + // check if we have enough signatures and can thus exit early + if talliedVotingPower > votingPowerNeeded { + return nil + } + } + + if got, needed := talliedVotingPower, votingPowerNeeded; got <= needed { + return ErrNotEnoughVotingPowerSigned{Got: got, Needed: needed} + } + + return nil +} +``` \ No newline at end of file diff --git a/spec/p2p/config.md b/spec/p2p/config.md new file mode 100644 index 000000000..b63c04f28 --- /dev/null +++ b/spec/p2p/config.md @@ -0,0 +1,49 @@ +# P2P Config + +Here we describe configuration options around the Peer Exchange. +These can be set using flags or via the `$TMHOME/config/config.toml` file. + +## Seed Mode + +`--p2p.seed_mode` + +The node operates in seed mode. In seed mode, a node continuously crawls the network for peers, +and upon incoming connection shares some peers and disconnects. + +## Seeds + +`--p2p.seeds “id100000000000000000000000000000000@1.2.3.4:26656,id200000000000000000000000000000000@2.3.4.5:4444”` + +Dials these seeds when we need more peers. They should return a list of peers and then disconnect. +If we already have enough peers in the address book, we may never need to dial them. + +## Persistent Peers + +`--p2p.persistent_peers “id100000000000000000000000000000000@1.2.3.4:26656,id200000000000000000000000000000000@2.3.4.5:26656”` + +Dial these peers and auto-redial them if the connection fails. +These are intended to be trusted persistent peers that can help +anchor us in the p2p network. The auto-redial uses exponential +backoff and will give up after a day of trying to connect. + +But If `persistent_peers_max_dial_period` is set greater than zero, +pause between each dial to each persistent peer will not exceed `persistent_peers_max_dial_period` +during exponential backoff and we keep trying again without giving up + +**Note:** If `seeds` and `persistent_peers` intersect, +the user will be warned that seeds may auto-close connections +and that the node may not be able to keep the connection persistent. + +## Private Peers + +`--p2p.private_peer_ids “id100000000000000000000000000000000,id200000000000000000000000000000000”` + +These are IDs of the peers that we do not add to the address book or gossip to +other peers. They stay private to us. + +## Unconditional Peers + +`--p2p.unconditional_peer_ids “id100000000000000000000000000000000,id200000000000000000000000000000000”` + +These are IDs of the peers which are allowed to be connected by both inbound or outbound regardless of +`max_num_inbound_peers` or `max_num_outbound_peers` of user's node reached or not. diff --git a/spec/p2p/connection.md b/spec/p2p/connection.md new file mode 100644 index 000000000..33178f479 --- /dev/null +++ b/spec/p2p/connection.md @@ -0,0 +1,111 @@ +# P2P Multiplex Connection + +## MConnection + +`MConnection` is a multiplex connection that supports multiple independent streams +with distinct quality of service guarantees atop a single TCP connection. +Each stream is known as a `Channel` and each `Channel` has a globally unique _byte id_. +Each `Channel` also has a relative priority that determines the quality of service +of the `Channel` compared to other `Channel`s. +The _byte id_ and the relative priorities of each `Channel` are configured upon +initialization of the connection. + +The `MConnection` supports three packet types: + +- Ping +- Pong +- Msg + +### Ping and Pong + +The ping and pong messages consist of writing a single byte to the connection; 0x1 and 0x2, respectively. + +When we haven't received any messages on an `MConnection` in time `pingTimeout`, we send a ping message. +When a ping is received on the `MConnection`, a pong is sent in response only if there are no other messages +to send and the peer has not sent us too many pings (TODO). + +If a pong or message is not received in sufficient time after a ping, the peer is disconnected from. + +### Msg + +Messages in channels are chopped into smaller `msgPacket`s for multiplexing. + +```go +type msgPacket struct { + ChannelID byte + EOF byte // 1 means message ends here. + Bytes []byte +} +``` + +The `msgPacket` is serialized using [Proto3](https://developers.google.com/protocol-buffers/docs/proto3). +The received `Bytes` of a sequential set of packets are appended together +until a packet with `EOF=1` is received, then the complete serialized message +is returned for processing by the `onReceive` function of the corresponding channel. + +### Multiplexing + +Messages are sent from a single `sendRoutine`, which loops over a select statement and results in the sending +of a ping, a pong, or a batch of data messages. The batch of data messages may include messages from multiple channels. +Message bytes are queued for sending in their respective channel, with each channel holding one unsent message at a time. +Messages are chosen for a batch one at a time from the channel with the lowest ratio of recently sent bytes to channel priority. + +## Sending Messages + +There are two methods for sending messages: + +```go +func (m MConnection) Send(chID byte, msg interface{}) bool {} +func (m MConnection) TrySend(chID byte, msg interface{}) bool {} +``` + +`Send(chID, msg)` is a blocking call that waits until `msg` is successfully queued +for the channel with the given id byte `chID`. The message `msg` is serialized +using the `tendermint/go-amino` submodule's `WriteBinary()` reflection routine. + +`TrySend(chID, msg)` is a nonblocking call that queues the message msg in the channel +with the given id byte chID if the queue is not full; otherwise it returns false immediately. + +`Send()` and `TrySend()` are also exposed for each `Peer`. + +## Peer + +Each peer has one `MConnection` instance, and includes other information such as whether the connection +was outbound, whether the connection should be recreated if it closes, various identity information about the node, +and other higher level thread-safe data used by the reactors. + +## Switch/Reactor + +The `Switch` handles peer connections and exposes an API to receive incoming messages +on `Reactors`. Each `Reactor` is responsible for handling incoming messages of one +or more `Channels`. So while sending outgoing messages is typically performed on the peer, +incoming messages are received on the reactor. + +```go +// Declare a MyReactor reactor that handles messages on MyChannelID. +type MyReactor struct{} + +func (reactor MyReactor) GetChannels() []*ChannelDescriptor { + return []*ChannelDescriptor{ChannelDescriptor{ID:MyChannelID, Priority: 1}} +} + +func (reactor MyReactor) Receive(chID byte, peer *Peer, msgBytes []byte) { + r, n, err := bytes.NewBuffer(msgBytes), new(int64), new(error) + msgString := ReadString(r, n, err) + fmt.Println(msgString) +} + +// Other Reactor methods omitted for brevity +... + +switch := NewSwitch([]Reactor{MyReactor{}}) + +... + +// Send a random message to all outbound connections +for _, peer := range switch.Peers().List() { + if peer.IsOutbound() { + peer.Send(MyChannelID, "Here's a random message") + } +} +``` diff --git a/spec/p2p/messages/README.md b/spec/p2p/messages/README.md new file mode 100644 index 000000000..1b5f5c60d --- /dev/null +++ b/spec/p2p/messages/README.md @@ -0,0 +1,19 @@ +--- +order: 1 +parent: + title: Messages + order: 1 +--- + +# Messages + +An implementation of the spec consists of many components. While many parts of these components are implementation specific, the p2p messages are not. In this section we will be covering all the p2p messages of components. + +There are two parts to the P2P messages, the message and the channel. The channel is message specific and messages are specific to components of Tendermint. When a node connect to a peer it will tell the other node which channels are available. This notifies the peer what services the connecting node offers. You can read more on channels in [connection.md](../connection.md#mconnection) + +- [Block Sync](./block-sync.md) +- [Mempool](./mempool.md) +- [Evidence](./evidence.md) +- [State Sync](./state-sync.md) +- [Pex](./pex.md) +- [Consensus](./consensus.md) diff --git a/spec/p2p/messages/block-sync.md b/spec/p2p/messages/block-sync.md new file mode 100644 index 000000000..48aa6155f --- /dev/null +++ b/spec/p2p/messages/block-sync.md @@ -0,0 +1,68 @@ +--- +order: 2 +--- + +# Block Sync + +## Channel + +Block sync has one channel. + +| Name | Number | +|-------------------|--------| +| BlockchainChannel | 64 | + +## Message Types + +There are multiple message types for Block Sync + +### BlockRequest + +BlockRequest asks a peer for a block at the height specified. + +| Name | Type | Description | Field Number | +|--------|-------|---------------------------|--------------| +| Height | int64 | Height of requested block | 1 | + +### NoBlockResponse + +NoBlockResponse notifies the peer requesting a block that the node does not contain it. + +| Name | Type | Description | Field Number | +|--------|-------|---------------------------|--------------| +| Height | int64 | Height of requested block | 1 | + +### BlockResponse + +BlockResponse contains the block requested. + +| Name | Type | Description | Field Number | +|-------|----------------------------------------------|-----------------|--------------| +| Block | [Block](../../core/data_structures.md#block) | Requested Block | 1 | + +### StatusRequest + +StatusRequest is an empty message that notifies the peer to respond with the highest and lowest blocks it has stored. + +> Empty message. + +### StatusResponse + +StatusResponse responds to a peer with the highest and lowest block stored. + +| Name | Type | Description | Field Number | +|--------|-------|-------------------------------------------------------------------|--------------| +| Height | int64 | Current Height of a node | 1 | +| base | int64 | First known block, if pruning is enabled it will be higher than 1 | 1 | + +### Message + +Message is a [`oneof` protobuf type](https://developers.google.com/protocol-buffers/docs/proto#oneof). The `oneof` consists of five messages. + +| Name | Type | Description | Field Number | +|-------------------|----------------------------------|--------------------------------------------------------------|--------------| +| block_request | [BlockRequest](#blockrequest) | Request a block from a peer | 1 | +| no_block_response | [NoBlockResponse](#noblockresponse) | Response saying it doe snot have the requested block | 2 | +| block_response | [BlockResponse](#blockresponse) | Response with requested block | 3 | +| status_request | [StatusRequest](#statusrequest) | Request the highest and lowest block numbers from a peer | 4 | +| status_response | [StatusResponse](#statusresponse) | Response with the highest and lowest block numbers the store | 5 | diff --git a/spec/p2p/messages/consensus.md b/spec/p2p/messages/consensus.md new file mode 100644 index 000000000..19c103750 --- /dev/null +++ b/spec/p2p/messages/consensus.md @@ -0,0 +1,149 @@ +--- +order: 7 +--- + +# Consensus + +## Channel + +Consensus has four separate channels. The channel identifiers are listed below. + +| Name | Number | +|--------------------|--------| +| StateChannel | 32 | +| DataChannel | 33 | +| VoteChannel | 34 | +| VoteSetBitsChannel | 35 | + +## Message Types + +### Proposal + +Proposal is sent when a new block is proposed. It is a suggestion of what the +next block in the blockchain should be. + +| Name | Type | Description | Field Number | +|----------|----------------------------------------------------|----------------------------------------|--------------| +| proposal | [Proposal](../../core/data_structures.md#proposal) | Proposed Block to come to consensus on | 1 | + +### Vote + +Vote is sent to vote for some block (or to inform others that a process does not vote in the +current round). Vote is defined in the +[Blockchain](https://github.com/tendermint/spec/blob/master/spec/core/data_structures.md#blockidd) +section and contains validator's +information (validator address and index), height and round for which the vote is sent, vote type, +blockID if process vote for some block (`nil` otherwise) and a timestamp when the vote is sent. The +message is signed by the validator private key. + +| Name | Type | Description | Field Number | +|------|--------------------------------------------|---------------------------|--------------| +| vote | [Vote](../../core/data_structures.md#vote) | Vote for a proposed Block | 1 | + +### BlockPart + +BlockPart is sent when gossiping a piece of the proposed block. It contains height, round +and the block part. + +| Name | Type | Description | Field Number | +|--------|--------------------------------------------|----------------------------------------|--------------| +| height | int64 | Height of corresponding block. | 1 | +| round | int32 | Round of voting to finalize the block. | 2 | +| part | [Part](../../core/data_structures.md#part) | A part of the block. | 3 | + +### NewRoundStep + +NewRoundStep is sent for every step transition during the core consensus algorithm execution. +It is used in the gossip part of the Tendermint protocol to inform peers about a current +height/round/step a process is in. + +| Name | Type | Description | Field Number | +|--------------------------|--------|----------------------------------------|--------------| +| height | int64 | Height of corresponding block | 1 | +| round | int32 | Round of voting to finalize the block. | 2 | +| step | uint32 | | 3 | +| seconds_since_start_time | int64 | | 4 | +| last_commit_round | int32 | | 5 | + +### NewValidBlock + +NewValidBlock is sent when a validator observes a valid block B in some round r, +i.e., there is a Proposal for block B and 2/3+ prevotes for the block B in the round r. +It contains height and round in which valid block is observed, block parts header that describes +the valid block and is used to obtain all +block parts, and a bit array of the block parts a process currently has, so its peers can know what +parts it is missing so they can send them. +In case the block is also committed, then IsCommit flag is set to true. + +| Name | Type | Description | Field Number | +|-----------------------|--------------------------------------------------------------|----------------------------------------|--------------| +| height | int64 | Height of corresponding block | 1 | +| round | int32 | Round of voting to finalize the block. | 2 | +| block_part_set_header | [PartSetHeader](../../core/data_structures.md#partsetheader) | | 3 | +| block_parts | int32 | | 4 | +| is_commit | bool | | 5 | + +### ProposalPOL + +ProposalPOL is sent when a previous block is re-proposed. +It is used to inform peers in what round the process learned for this block (ProposalPOLRound), +and what prevotes for the re-proposed block the process has. + +| Name | Type | Description | Field Number | +|--------------------|----------|-------------------------------|--------------| +| height | int64 | Height of corresponding block | 1 | +| proposal_pol_round | int32 | | 2 | +| proposal_pol | bitarray | | 3 | + +### ReceivedVote + +ReceivedVote is sent to indicate that a particular vote has been received. It contains height, +round, vote type and the index of the validator that is the originator of the corresponding vote. + +| Name | Type | Description | Field Number | +|--------|------------------------------------------------------------------|----------------------------------------|--------------| +| height | int64 | Height of corresponding block | 1 | +| round | int32 | Round of voting to finalize the block. | 2 | +| type | [SignedMessageType](../../core/data_structures.md#signedmsgtype) | | 3 | +| index | int32 | | 4 | + +### VoteSetMaj23 + +VoteSetMaj23 is sent to indicate that a process has seen +2/3 votes for some BlockID. +It contains height, round, vote type and the BlockID. + +| Name | Type | Description | Field Number | +|--------|------------------------------------------------------------------|----------------------------------------|--------------| +| height | int64 | Height of corresponding block | 1 | +| round | int32 | Round of voting to finalize the block. | 2 | +| type | [SignedMessageType](../../core/data_structures.md#signedmsgtype) | | 3 | + +### VoteSetBits + +VoteSetBits is sent to communicate the bit-array of votes a process has seen for a given +BlockID. It contains height, round, vote type, BlockID and a bit array of +the votes a process has. + +| Name | Type | Description | Field Number | +|----------|------------------------------------------------------------------|----------------------------------------|--------------| +| height | int64 | Height of corresponding block | 1 | +| round | int32 | Round of voting to finalize the block. | 2 | +| type | [SignedMessageType](../../core/data_structures.md#signedmsgtype) | | 3 | +| block_id | [BlockID](../../core/data_structures.md#blockid) | | 4 | +| votes | BitArray | Round of voting to finalize the block. | 5 | + +### Message + +Message is a [`oneof` protobuf type](https://developers.google.com/protocol-buffers/docs/proto#oneof). + +| Name | Type | Description | Field Number | +|-----------------|---------------------------------|----------------------------------------|--------------| +| new_round_step | [NewRoundStep](#newroundstep) | Height of corresponding block | 1 | +| new_valid_block | [NewValidBlock](#newvalidblock) | Round of voting to finalize the block. | 2 | +| proposal | [Proposal](#proposal) | | 3 | +| proposal_pol | [ProposalPOL](#proposalpol) | | 4 | +| block_part | [BlockPart](#blockpart) | | 5 | +| vote | [Vote](#vote) | | 6 | +| received_vote | [ReceivedVote](#ReceivedVote) | | 7 | +| vote_set_maj23 | [VoteSetMaj23](#votesetmaj23) | | 8 | +| vote_set_bits | [VoteSetBits](#votesetbits) | | 9 | diff --git a/spec/p2p/messages/evidence.md b/spec/p2p/messages/evidence.md new file mode 100644 index 000000000..34fc40a91 --- /dev/null +++ b/spec/p2p/messages/evidence.md @@ -0,0 +1,23 @@ +--- +order: 3 +--- + +# Evidence + +## Channel + +Evidence has one channel. The channel identifier is listed below. + +| Name | Number | +|-----------------|--------| +| EvidenceChannel | 56 | + +## Message Types + +### EvidenceList + +EvidenceList consists of a list of verified evidence. This evidence will already have been propagated throughout the network. EvidenceList is used in two places, as a p2p message and within the block [block](../../core/data_structures.md#block) as well. + +| Name | Type | Description | Field Number | +|----------|-------------------------------------------------------------|------------------------|--------------| +| evidence | repeated [Evidence](../../core/data_structures.md#evidence) | List of valid evidence | 1 | diff --git a/spec/p2p/messages/mempool.md b/spec/p2p/messages/mempool.md new file mode 100644 index 000000000..8f3925cad --- /dev/null +++ b/spec/p2p/messages/mempool.md @@ -0,0 +1,33 @@ +--- +order: 4 +--- +# Mempool + +## Channel + +Mempool has one channel. The channel identifier is listed below. + +| Name | Number | +|----------------|--------| +| MempoolChannel | 48 | + +## Message Types + +There is currently only one message that Mempool broadcasts and receives over +the p2p gossip network (via the reactor): `TxsMessage` + +### Txs + +A list of transactions. These transactions have been checked against the application for validity. This does not mean that the transactions are valid, it is up to the application to check this. + +| Name | Type | Description | Field Number | +|------|----------------|----------------------|--------------| +| txs | repeated bytes | List of transactions | 1 | + +### Message + +Message is a [`oneof` protobuf type](https://developers.google.com/protocol-buffers/docs/proto#oneof). The one of consists of one message [`Txs`](#txs). + +| Name | Type | Description | Field Number | +|------|-------------|-----------------------|--------------| +| txs | [Txs](#txs) | List of transactions | 1 | diff --git a/spec/p2p/messages/pex.md b/spec/p2p/messages/pex.md new file mode 100644 index 000000000..e12a076e5 --- /dev/null +++ b/spec/p2p/messages/pex.md @@ -0,0 +1,76 @@ +--- +order: 6 +--- + +# Peer Exchange + +## Channels + +Pex has one channel. The channel identifier is listed below. + +| Name | Number | +|------------|--------| +| PexChannel | 0 | + +## Message Types + +The current PEX service has two versions. The first uses IP/port pair but since the p2p stack is moving towards a transport agnostic approach, +node endpoints require a `Protocol` and `Path` hence the V2 version uses a [url](https://golang.org/pkg/net/url/#URL) instead. + +### PexRequest + +PexRequest is an empty message requesting a list of peers. + +> EmptyRequest + +### PexResponse + +PexResponse is an list of net addresses provided to a peer to dial. + +| Name | Type | Description | Field Number | +|-------|------------------------------------|------------------------------------------|--------------| +| addresses | repeated [PexAddress](#PexAddress) | List of peer addresses available to dial | 1 | + +### PexAddress + +PexAddress provides needed information for a node to dial a peer. + +| Name | Type | Description | Field Number | +|------|--------|------------------|--------------| +| id | string | NodeID of a peer | 1 | +| ip | string | The IP of a node | 2 | +| port | port | Port of a peer | 3 | + + +### PexRequestV2 + +PexRequest is an empty message requesting a list of peers. + +> EmptyRequest + +### PexResponseV2 + +PexResponse is an list of net addresses provided to a peer to dial. + +| Name | Type | Description | Field Number | +|-------|------------------------------------|------------------------------------------|--------------| +| addresses | repeated [PexAddressV2](#PexAddressV2) | List of peer addresses available to dial | 1 | + +### PexAddressV2 + +PexAddress provides needed information for a node to dial a peer. + +| Name | Type | Description | Field Number | +|------|--------|------------------|--------------| +| url | string | See [golang url](https://golang.org/pkg/net/url/#URL) | 1 | + +### Message + +Message is a [`oneof` protobuf type](https://developers.google.com/protocol-buffers/docs/proto#oneof). The one of consists of two messages. + +| Name | Type | Description | Field Number | +|--------------|---------------------------|------------------------------------------------------|--------------| +| pex_request | [PexRequest](#PexRequest) | Empty request asking for a list of addresses to dial | 1 | +| pex_response | [PexResponse](#PexResponse) | List of addresses to dial | 2 | +| pex_request_v2 | [PexRequestV2](#PexRequestV2) | Empty request asking for a list of addresses to dial | 3 | +| pex_response_v2 | [PexRespinseV2](#PexResponseV2) | List of addresses to dial | 4 | diff --git a/spec/p2p/messages/state-sync.md b/spec/p2p/messages/state-sync.md new file mode 100644 index 000000000..71e3ae71b --- /dev/null +++ b/spec/p2p/messages/state-sync.md @@ -0,0 +1,133 @@ +--- +order: 5 +--- + +# State Sync + +## Channels + +State sync has four distinct channels. The channel identifiers are listed below. + +| Name | Number | +|-------------------|--------| +| SnapshotChannel | 96 | +| ChunkChannel | 97 | +| LightBlockChannel | 98 | +| ParamsChannel | 99 | + +## Message Types + +### SnapshotRequest + +When a new node begin state syncing, it will ask all peers it encounters if it has any +available snapshots: + +| Name | Type | Description | Field Number | +|----------|--------|-------------|--------------| + +### SnapShotResponse + +The receiver will query the local ABCI application via `ListSnapshots`, and send a message +containing snapshot metadata (limited to 4 MB) for each of the 10 most recent snapshots: and stored at the application layer. When a peer is starting it will request snapshots. + +| Name | Type | Description | Field Number | +|----------|--------|-----------------------------------------------------------|--------------| +| height | uint64 | Height at which the snapshot was taken | 1 | +| format | uint32 | Format of the snapshot. | 2 | +| chunks | uint32 | How many chunks make up the snapshot | 3 | +| hash | bytes | Arbitrary snapshot hash | 4 | +| metadata | bytes | Arbitrary application data. **May be non-deterministic.** | 5 | + +### ChunkRequest + +The node running state sync will offer these snapshots to the local ABCI application via +`OfferSnapshot` ABCI calls, and keep track of which peers contain which snapshots. Once a snapshot +is accepted, the state syncer will request snapshot chunks from appropriate peers: + +| Name | Type | Description | Field Number | +|--------|--------|-------------------------------------------------------------|--------------| +| height | uint64 | Height at which the chunk was created | 1 | +| format | uint32 | Format chosen for the chunk. **May be non-deterministic.** | 2 | +| index | uint32 | Index of the chunk within the snapshot. | 3 | + +### ChunkResponse + +The receiver will load the requested chunk from its local application via `LoadSnapshotChunk`, +and respond with it (limited to 16 MB): + +| Name | Type | Description | Field Number | +|---------|--------|-------------------------------------------------------------|--------------| +| height | uint64 | Height at which the chunk was created | 1 | +| format | uint32 | Format chosen for the chunk. **May be non-deterministic.** | 2 | +| index | uint32 | Index of the chunk within the snapshot. | 3 | +| hash | bytes | Arbitrary snapshot hash | 4 | +| missing | bool | Arbitrary application data. **May be non-deterministic.** | 5 | + +Here, `Missing` is used to signify that the chunk was not found on the peer, since an empty +chunk is a valid (although unlikely) response. + +The returned chunk is given to the ABCI application via `ApplySnapshotChunk` until the snapshot +is restored. If a chunk response is not returned within some time, it will be re-requested, +possibly from a different peer. + +The ABCI application is able to request peer bans and chunk refetching as part of the ABCI protocol. + +### LightBlockRequest + +To verify state and to provide state relevant information for consensus, the node will ask peers for +light blocks at specified heights. + +| Name | Type | Description | Field Number | +|----------|--------|----------------------------|--------------| +| height | uint64 | Height of the light block | 1 | + +### LightBlockResponse + +The receiver will retrieve and construct the light block from both the block and state stores. The +receiver will verify the data by comparing the hashes and store the header, commit and validator set +if necessary. The light block at the height of the snapshot will be used to verify the `AppHash`. + +| Name | Type | Description | Field Number | +|---------------|---------------------------------------------------------|--------------------------------------|--------------| +| light_block | [LightBlock](../../core/data_structures.md#lightblock) | Light block at the height requested | 1 | + +State sync will use [light client verification](../../light-client/verification.README.md) to verify +the light blocks. + + +If no state sync is in progress (i.e. during normal operation), any unsolicited response messages +are discarded. + +### ParamsRequest + +In order to build tendermint state, the state provider will request the params at the height of the snapshot and use the header to verify it. + +| Name | Type | Description | Field Number | +|----------|--------|----------------------------|--------------| +| height | uint64 | Height of the consensus params | 1 | + + +### ParamsResponse + +A reciever to the request will use the state store to fetch the consensus params at that height and return it to the sender. + +| Name | Type | Description | Field Number | +|----------|--------|---------------------------------|--------------| +| height | uint64 | Height of the consensus params | 1 | +| consensus_params | [ConsensusParams](../../core/data_structures.md#ConsensusParams) | Consensus params at the height requested | 2 | + + +### Message + +Message is a [`oneof` protobuf type](https://developers.google.com/protocol-buffers/docs/proto#oneof). The `oneof` consists of eight messages. + +| Name | Type | Description | Field Number | +|----------------------|--------------------------------------------|----------------------------------------------|--------------| +| snapshots_request | [SnapshotRequest](#snapshotrequest) | Request a recent snapshot from a peer | 1 | +| snapshots_response | [SnapshotResponse](#snapshotresponse) | Respond with the most recent snapshot stored | 2 | +| chunk_request | [ChunkRequest](#chunkrequest) | Request chunks of the snapshot. | 3 | +| chunk_response | [ChunkRequest](#chunkresponse) | Response of chunks used to recreate state. | 4 | +| light_block_request | [LightBlockRequest](#lightblockrequest) | Request a light block. | 5 | +| light_block_response | [LightBlockResponse](#lightblockresponse) | Respond with a light block | 6 | +| params_request | [ParamsRequest](#paramsrequest) | Request the consensus params at a height. | 7 | +| params_response | [ParamsResponse](#paramsresponse) | Respond with the consensus params | 8 | diff --git a/spec/p2p/node.md b/spec/p2p/node.md new file mode 100644 index 000000000..0bbcf0b5b --- /dev/null +++ b/spec/p2p/node.md @@ -0,0 +1,67 @@ +# Peer Discovery + +A Tendermint P2P network has different kinds of nodes with different requirements for connectivity to one another. +This document describes what kind of nodes Tendermint should enable and how they should work. + +## Seeds + +Seeds are the first point of contact for a new node. +They return a list of known active peers and then disconnect. + +Seeds should operate full nodes with the PEX reactor in a "crawler" mode +that continuously explores to validate the availability of peers. + +Seeds should only respond with some top percentile of the best peers it knows about. +See [the peer-exchange docs](https://github.com/tendermint/spec/blob/master/spec/reactors/pex/pex.md) for + details on peer quality. + +## New Full Node + +A new node needs a few things to connect to the network: + +- a list of seeds, which can be provided to Tendermint via config file or flags, + or hardcoded into the software by in-process apps +- a `ChainID`, also called `Network` at the p2p layer +- a recent block height, H, and hash, HASH for the blockchain. + +The values `H` and `HASH` must be received and corroborated by means external to Tendermint, and specific to the user - ie. via the user's trusted social consensus. +This requirement to validate `H` and `HASH` out-of-band and via social consensus +is the essential difference in security models between Proof-of-Work and Proof-of-Stake blockchains. + +With the above, the node then queries some seeds for peers for its chain, +dials those peers, and runs the Tendermint protocols with those it successfully connects to. + +When the peer catches up to height H, it ensures the block hash matches HASH. +If not, Tendermint will exit, and the user must try again - either they are connected +to bad peers or their social consensus is invalid. + +## Restarted Full Node + +A node checks its address book on startup and attempts to connect to peers from there. +If it can't connect to any peers after some time, it falls back to the seeds to find more. + +Restarted full nodes can run the `blockchain` or `consensus` reactor protocols to sync up +to the latest state of the blockchain from wherever they were last. +In a Proof-of-Stake context, if they are sufficiently far behind (greater than the length +of the unbonding period), they will need to validate a recent `H` and `HASH` out-of-band again +so they know they have synced the correct chain. + +## Validator Node + +A validator node is a node that interfaces with a validator signing key. +These nodes require the highest security, and should not accept incoming connections. +They should maintain outgoing connections to a controlled set of "Sentry Nodes" that serve +as their proxy shield to the rest of the network. + +Validators that know and trust each other can accept incoming connections from one another and maintain direct private connectivity via VPN. + +## Sentry Node + +Sentry nodes are guardians of a validator node and provide it access to the rest of the network. +They should be well connected to other full nodes on the network. +Sentry nodes may be dynamic, but should maintain persistent connections to some evolving random subset of each other. +They should always expect to have direct incoming connections from the validator node and its backup(s). +They do not report the validator node's address in the PEX and +they may be more strict about the quality of peers they keep. + +Sentry nodes belonging to validators that trust each other may wish to maintain persistent connections via VPN with one another, but only report each other sparingly in the PEX. diff --git a/spec/p2p/peer.md b/spec/p2p/peer.md new file mode 100644 index 000000000..52d00b584 --- /dev/null +++ b/spec/p2p/peer.md @@ -0,0 +1,130 @@ +# Peers + +This document explains how Tendermint Peers are identified and how they connect to one another. + +For details on peer discovery, see the [peer exchange (PEX) reactor doc](https://github.com/tendermint/spec/blob/master/spec/reactors/pex/pex.md). + +## Peer Identity + +Tendermint peers are expected to maintain long-term persistent identities in the form of a public key. +Each peer has an ID defined as `peer.ID == peer.PubKey.Address()`, where `Address` uses the scheme defined in `crypto` package. + +A single peer ID can have multiple IP addresses associated with it, but a node +will only ever connect to one at a time. + +When attempting to connect to a peer, we use the PeerURL: `@:`. +We will attempt to connect to the peer at IP:PORT, and verify, +via authenticated encryption, that it is in possession of the private key +corresponding to ``. This prevents man-in-the-middle attacks on the peer layer. + +## Connections + +All p2p connections use TCP. +Upon establishing a successful TCP connection with a peer, +two handshakes are performed: one for authenticated encryption, and one for Tendermint versioning. +Both handshakes have configurable timeouts (they should complete quickly). + +### Authenticated Encryption Handshake + +Tendermint implements the Station-to-Station protocol +using X25519 keys for Diffie-Helman key-exchange and chacha20poly1305 for encryption. + +Previous versions of this protocol (0.32 and below) suffered from malleability attacks whereas an active man +in the middle attacker could compromise confidentiality as described in [Prime, Order Please! +Revisiting Small Subgroup and Invalid Curve Attacks on +Protocols using Diffie-Hellman](https://eprint.iacr.org/2019/526.pdf). + +We have added dependency on the Merlin a keccak based transcript hashing protocol to ensure non-malleability. + +It goes as follows: + +- generate an ephemeral X25519 keypair +- send the ephemeral public key to the peer +- wait to receive the peer's ephemeral public key +- create a new Merlin Transcript with the string "TENDERMINT_SECRET_CONNECTION_TRANSCRIPT_HASH" +- Sort the ephemeral keys and add the high labeled "EPHEMERAL_UPPER_PUBLIC_KEY" and the low keys labeled "EPHEMERAL_LOWER_PUBLIC_KEY" to the Merlin transcript. +- compute the Diffie-Hellman shared secret using the peers ephemeral public key and our ephemeral private key +- add the DH secret to the transcript labeled DH_SECRET. +- generate two keys to use for encryption (sending and receiving) and a challenge for authentication as follows: + - create a hkdf-sha256 instance with the key being the diffie hellman shared secret, and info parameter as + `TENDERMINT_SECRET_CONNECTION_KEY_AND_CHALLENGE_GEN` + - get 64 bytes of output from hkdf-sha256 + - if we had the smaller ephemeral pubkey, use the first 32 bytes for the key for receiving, the second 32 bytes for sending; else the opposite. +- use a separate nonce for receiving and sending. Both nonces start at 0, and should support the full 96 bit nonce range +- all communications from now on are encrypted in 1400 byte frames (plus encoding overhead), + using the respective secret and nonce. Each nonce is incremented by one after each use. +- we now have an encrypted channel, but still need to authenticate +- extract a 32 bytes challenge from merlin transcript with the label "SECRET_CONNECTION_MAC" +- sign the common challenge obtained from the hkdf with our persistent private key +- send the amino encoded persistent pubkey and signature to the peer +- wait to receive the persistent public key and signature from the peer +- verify the signature on the challenge using the peer's persistent public key + +If this is an outgoing connection (we dialed the peer) and we used a peer ID, +then finally verify that the peer's persistent public key corresponds to the peer ID we dialed, +ie. `peer.PubKey.Address() == `. + +The connection has now been authenticated. All traffic is encrypted. + +Note: only the dialer can authenticate the identity of the peer, +but this is what we care about since when we join the network we wish to +ensure we have reached the intended peer (and are not being MITMd). + +### Peer Filter + +Before continuing, we check if the new peer has the same ID as ourselves or +an existing peer. If so, we disconnect. + +We also check the peer's address and public key against +an optional whitelist which can be managed through the ABCI app - +if the whitelist is enabled and the peer does not qualify, the connection is +terminated. + +### Tendermint Version Handshake + +The Tendermint Version Handshake allows the peers to exchange their NodeInfo: + +```golang +type NodeInfo struct { + Version p2p.Version + ID p2p.ID + ListenAddr string + + Network string + SoftwareVersion string + Channels []int8 + + Moniker string + Other NodeInfoOther +} + +type Version struct { + P2P uint64 + Block uint64 + App uint64 +} + +type NodeInfoOther struct { + TxIndex string + RPCAddress string +} +``` + +The connection is disconnected if: + +- `peer.NodeInfo.ID` is not equal `peerConn.ID` +- `peer.NodeInfo.Version.Block` does not match ours +- `peer.NodeInfo.Network` is not the same as ours +- `peer.Channels` does not intersect with our known Channels. +- `peer.NodeInfo.ListenAddr` is malformed or is a DNS host that cannot be + resolved + +At this point, if we have not disconnected, the peer is valid. +It is added to the switch and hence all reactors via the `AddPeer` method. +Note that each reactor may handle multiple channels. + +## Connection Activity + +Once a peer is added, incoming messages for a given reactor are handled through +that reactor's `Receive` method, and output messages are sent directly by the Reactors +on each peer. A typical reactor maintains per-peer go-routine(s) that handle this. diff --git a/spec/p2p/readme.md b/spec/p2p/readme.md new file mode 100644 index 000000000..96867aad0 --- /dev/null +++ b/spec/p2p/readme.md @@ -0,0 +1,6 @@ +--- +order: 1 +parent: + title: P2P + order: 6 +--- diff --git a/spec/rpc/README.md b/spec/rpc/README.md new file mode 100644 index 000000000..7cdf417dc --- /dev/null +++ b/spec/rpc/README.md @@ -0,0 +1,1264 @@ +--- +order: 1 +parent: + title: RPC + order: 6 +--- + +# RPC spec + +This file defines the JSON-RPC spec of Tendermint. This is meant to be implemented by all clients. + +## Support + + | | [Tendermint-Go](https://github.com/tendermint/tendermint/) | [endermint-Rs](https://github.com/informalsystems/tendermint-rs) | + |--------------|:----------------------------------------------------------:|:----------------------------------------------------------------:| + | JSON-RPC 2.0 | ✅ | ✅ | + | HTTP | ✅ | ✅ | + | HTTPS | ✅ | ❌ | + | WS | ✅ | ✅ | + + | Routes | [Tendermint-Go](https://github.com/tendermint/tendermint/) | [Tendermint-Rs](https://github.com/informalsystems/tendermint-rs) | + |-----------------------------------------|:----------------------------------------------------------:|:-----------------------------------------------------------------:| + | [Health](#health) | ✅ | ✅ | + | [Status](#status) | ✅ | ✅ | + | [NetInfo](#netinfo) | ✅ | ✅ | + | [Blockchain](#blockchain) | ✅ | ✅ | + | [Block](#block) | ✅ | ✅ | + | [BlockByHash](#blockbyhash) | ✅ | ❌ | + | [BlockResults](#blockresults) | ✅ | ✅ | + | [Commit](#commit) | ✅ | ✅ | + | [Validators](#validators) | ✅ | ✅ | + | [Genesis](#genesis) | ✅ | ✅ | + | [GenesisChunked](#genesischunked) | ✅ | ❌ | + | [ConsensusParams](#consensusparams) | ✅ | ❌ | + | [UnconfirmedTxs](#unconfirmedtxs) | ✅ | ❌ | + | [NumUnconfirmedTxs](#numunconfirmedtxs) | ✅ | ❌ | + | [Tx](#tx) | ✅ | ❌ | + | [BroadCastTxSync](#broadcasttxsync) | ✅ | ✅ | + | [BroadCastTxAsync](#broadcasttxasync) | ✅ | ✅ | + | [ABCIInfo](#abciinfo) | ✅ | ✅ | + | [ABCIQuery](#abciquery) | ✅ | ✅ | + | [BroadcastTxAsync](#broadcasttxasync) | ✅ | ✅ | + | [BroadcastEvidence](#broadcastevidence) | ✅ | ✅ | + +## Timestamps + +Timestamps in the RPC layer of Tendermint follows RFC3339Nano. The RFC3339Nano format removes trailing zeros from the seconds field. + +This means if a block has a timestamp like: `1985-04-12T23:20:50.5200000Z`, the value returned in the RPC will be `1985-04-12T23:20:50.52Z`. + + + +## Info Routes + +### Health + +Node heartbeat + +#### Parameters + +None + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/health +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"health\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": -1, + "result": {} +} +``` + +### Status + +Get Tendermint status including node info, pubkey, latest block hash, app hash, block height and time. + +#### Parameters + +None + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/status +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"status\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": -1, + "result": { + "node_info": { + "protocol_version": { + "p2p": "8", + "block": "11", + "app": "0" + }, + "id": "b93270b358a72a2db30089f3856475bb1f918d6d", + "listen_addr": "tcp://0.0.0.0:26656", + "network": "cosmoshub-4", + "version": "v0.34.8", + "channels": "40202122233038606100", + "moniker": "aib-hub-node", + "other": { + "tx_index": "on", + "rpc_address": "tcp://0.0.0.0:26657" + } + }, + "sync_info": { + "latest_block_hash": "50F03C0EAACA8BCA7F9C14189ACE9C05A9A1BBB5268DB63DC6A3C848D1ECFD27", + "latest_app_hash": "2316CFF7644219F4F15BEE456435F280E2B38955EEA6D4617CCB6D7ABF781C22", + "latest_block_height": "5622165", + "latest_block_time": "2021-03-25T14:00:43.356134226Z", + "earliest_block_hash": "1455A0C15AC49BB506992EC85A3CD4D32367E53A087689815E01A524231C3ADF", + "earliest_app_hash": "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855", + "earliest_block_height": "5200791", + "earliest_block_time": "2019-12-11T16:11:34Z", + "catching_up": false + }, + "validator_info": { + "address": "38FB765D0092470989360ECA1C89CD06C2C1583C", + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "Z+8kntVegi1sQiWLYwFSVLNWqdAUGEy7lskL78gxLZI=" + }, + "voting_power": "0" + } + } +} +``` + +### NetInfo + +Network information + +#### Parameters + +None + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/net_info +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"net_info\"}" +``` + +#### Response + +```json +{ + "id": 0, + "jsonrpc": "2.0", + "result": { + "listening": true, + "listeners": [ + "Listener(@)" + ], + "n_peers": "1", + "peers": [ + { + "node_id": "5576458aef205977e18fd50b274e9b5d9014525a", + "url": "tcp://5576458aef205977e18fd50b274e9b5d9014525a@95.179.155.35:26656" + } + ] + } +} +``` + +### Blockchain + +Get block headers. Returned in descending order. May be limited in quantity. + +#### Parameters + +- `minHeight (integer)`: The lowest block to be returned in the response +- `maxHeight (integer)`: The highest block to be returned in the response + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/blockchain + +curl http://127.0.0.1:26657/blockchain?minHeight=1&maxHeight=2 +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"blockchain\",\"params\":{\"minHeight\":\"1\", \"maxHeight\":\"2\"}}" +``` + +#### Response + +```json +{ + "id": 0, + "jsonrpc": "2.0", + "result": { + "last_height": "1276718", + "block_metas": [ + { + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "block_size": 1000000, + "header": { + "version": { + "block": "10", + "app": "0" + }, + "chain_id": "cosmoshub-2", + "height": "12", + "time": "2019-04-22T17:01:51.701356223Z", + "last_block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "last_commit_hash": "21B9BC845AD2CB2C4193CDD17BFC506F1EBE5A7402E84AD96E64171287A34812", + "data_hash": "970886F99E77ED0D60DA8FCE0447C2676E59F2F77302B0C4AA10E1D02F18EF73", + "validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "next_validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "consensus_hash": "0F2908883A105C793B74495EB7D6DF2EEA479ED7FC9349206A65CB0F9987A0B8", + "app_hash": "223BF64D4A01074DC523A80E76B9BBC786C791FB0A1893AC5B14866356FCFD6C", + "last_results_hash": "", + "evidence_hash": "", + "proposer_address": "D540AB022088612AC74B287D076DBFBC4A377A2E" + }, + "num_txs": "54" + } + ] + } +} +``` + +### Block + +Get block at a specified height. + +#### Parameters + +- `height (integer)`: height of the requested block. If no height is specified the latest block will be used. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/block + +curl http://127.0.0.1:26657/block?height=1 +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"block\",\"params\":{\"height\":\"1\"}}" +``` + +#### Response + +```json +{ + "id": 0, + "jsonrpc": "2.0", + "result": { + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "block": { + "header": { + "version": { + "block": "10", + "app": "0" + }, + "chain_id": "cosmoshub-2", + "height": "12", + "time": "2019-04-22T17:01:51.701356223Z", + "last_block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "last_commit_hash": "21B9BC845AD2CB2C4193CDD17BFC506F1EBE5A7402E84AD96E64171287A34812", + "data_hash": "970886F99E77ED0D60DA8FCE0447C2676E59F2F77302B0C4AA10E1D02F18EF73", + "validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "next_validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "consensus_hash": "0F2908883A105C793B74495EB7D6DF2EEA479ED7FC9349206A65CB0F9987A0B8", + "app_hash": "223BF64D4A01074DC523A80E76B9BBC786C791FB0A1893AC5B14866356FCFD6C", + "last_results_hash": "", + "evidence_hash": "", + "proposer_address": "D540AB022088612AC74B287D076DBFBC4A377A2E" + }, + "data": [ + "yQHwYl3uCkKoo2GaChRnd+THLQ2RM87nEZrE19910Z28ABIUWW/t8AtIMwcyU0sT32RcMDI9GF0aEAoFdWF0b20SBzEwMDAwMDASEwoNCgV1YXRvbRIEMzEwMRCd8gEaagom61rphyEDoJPxlcjRoNDtZ9xMdvs+lRzFaHe2dl2P5R2yVCWrsHISQKkqX5H1zXAIJuC57yw0Yb03Fwy75VRip0ZBtLiYsUqkOsPUoQZAhDNP+6LY+RUwz/nVzedkF0S29NZ32QXdGv0=" + ], + "evidence": [ + { + "type": "string", + "height": 0, + "time": 0, + "total_voting_power": 0, + "validator": { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "A6DoBUypNtUAyEHWtQ9bFjfNg8Bo9CrnkUGl6k6OHN4=" + }, + "voting_power": 0, + "address": "string" + } + } + ], + "last_commit": { + "height": 0, + "round": 0, + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "signatures": [ + { + "type": 2, + "height": "1262085", + "round": 0, + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "timestamp": "2019-08-01T11:39:38.867269833Z", + "validator_address": "000001E443FD237E4B616E2FA69DF4EE3D49A94F", + "validator_index": 0, + "signature": "DBchvucTzAUEJnGYpNvMdqLhBAHG4Px8BsOBB3J3mAFCLGeuG7uJqy+nVngKzZdPhPi8RhmE/xcw/M9DOJjEDg==" + } + ] + } + } + } +} +``` + +### BlockByHash + +#### Parameters + +- `hash (string)`: Hash of the block to query for. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/block_by_hash?hash=0xD70952032620CC4E2737EB8AC379806359D8E0B17B0488F627997A0B043ABDED +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"block_by_hash\",\"params\":{\"hash\":\"0xD70952032620CC4E2737EB8AC379806359D8E0B17B0488F627997A0B043ABDED\"}}" +``` + +#### Response + +```json +{ + "id": 0, + "jsonrpc": "2.0", + "result": { + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "block": { + "header": { + "version": { + "block": "10", + "app": "0" + }, + "chain_id": "cosmoshub-2", + "height": "12", + "time": "2019-04-22T17:01:51.701356223Z", + "last_block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "last_commit_hash": "21B9BC845AD2CB2C4193CDD17BFC506F1EBE5A7402E84AD96E64171287A34812", + "data_hash": "970886F99E77ED0D60DA8FCE0447C2676E59F2F77302B0C4AA10E1D02F18EF73", + "validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "next_validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "consensus_hash": "0F2908883A105C793B74495EB7D6DF2EEA479ED7FC9349206A65CB0F9987A0B8", + "app_hash": "223BF64D4A01074DC523A80E76B9BBC786C791FB0A1893AC5B14866356FCFD6C", + "last_results_hash": "", + "evidence_hash": "", + "proposer_address": "D540AB022088612AC74B287D076DBFBC4A377A2E" + }, + "data": [ + "yQHwYl3uCkKoo2GaChRnd+THLQ2RM87nEZrE19910Z28ABIUWW/t8AtIMwcyU0sT32RcMDI9GF0aEAoFdWF0b20SBzEwMDAwMDASEwoNCgV1YXRvbRIEMzEwMRCd8gEaagom61rphyEDoJPxlcjRoNDtZ9xMdvs+lRzFaHe2dl2P5R2yVCWrsHISQKkqX5H1zXAIJuC57yw0Yb03Fwy75VRip0ZBtLiYsUqkOsPUoQZAhDNP+6LY+RUwz/nVzedkF0S29NZ32QXdGv0=" + ], + "evidence": [ + { + "type": "string", + "height": 0, + "time": 0, + "total_voting_power": 0, + "validator": { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "A6DoBUypNtUAyEHWtQ9bFjfNg8Bo9CrnkUGl6k6OHN4=" + }, + "voting_power": 0, + "address": "string" + } + } + ], + "last_commit": { + "height": 0, + "round": 0, + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "signatures": [ + { + "type": 2, + "height": "1262085", + "round": 0, + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "timestamp": "2019-08-01T11:39:38.867269833Z", + "validator_address": "000001E443FD237E4B616E2FA69DF4EE3D49A94F", + "validator_index": 0, + "signature": "DBchvucTzAUEJnGYpNvMdqLhBAHG4Px8BsOBB3J3mAFCLGeuG7uJqy+nVngKzZdPhPi8RhmE/xcw/M9DOJjEDg==" + } + ] + } + } + } +} +``` + +### BlockResults + +### Parameters + +- `height (integer)`: Height of the block which contains the results. If no height is specified, the latest block height will be used + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/block_results + + +curl http://127.0.0.1:26657/block_results?height=1 +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"block_results\",\"params\":{\"height\":\"1\"}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "height": "12", + "total_gas_used": "100", + "txs_results": [ + { + "code": "0", + "data": "", + "log": "not enough gas", + "info": "", + "gas_wanted": "100", + "gas_used": "100", + "events": [ + { + "type": "app", + "attributes": [ + { + "key": "YWN0aW9u", + "value": "c2VuZA==", + "index": false + } + ] + } + ], + "codespace": "ibc" + } + ], + "begin_block_events": [ + { + "type": "app", + "attributes": [ + { + "key": "YWN0aW9u", + "value": "c2VuZA==", + "index": false + } + ] + } + ], + "end_block": [ + { + "type": "app", + "attributes": [ + { + "key": "YWN0aW9u", + "value": "c2VuZA==", + "index": false + } + ] + } + ], + "validator_updates": [ + { + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "9tK9IT+FPdf2qm+5c2qaxi10sWP+3erWTKgftn2PaQM=" + }, + "power": "300" + } + ], + "consensus_params_updates": { + "block": { + "max_bytes": "22020096", + "max_gas": "1000", + "time_iota_ms": "1000" + }, + "evidence": { + "max_age": "100000" + }, + "validator": { + "pub_key_types": [ + "ed25519" + ] + } + } + } +} +``` + +### Commit + +#### Parameters + +- `height (integer)`: Height of the block the requested commit pertains to. If no height is set the latest commit will be returned. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/commit + + +curl http://127.0.0.1:26657/commit?height=1 +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"commit\",\"params\":{\"height\":\"1\"}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "signed_header": { + "header": { + "version": { + "block": "10", + "app": "0" + }, + "chain_id": "cosmoshub-2", + "height": "12", + "time": "2019-04-22T17:01:51.701356223Z", + "last_block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "last_commit_hash": "21B9BC845AD2CB2C4193CDD17BFC506F1EBE5A7402E84AD96E64171287A34812", + "data_hash": "970886F99E77ED0D60DA8FCE0447C2676E59F2F77302B0C4AA10E1D02F18EF73", + "validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "next_validators_hash": "D658BFD100CA8025CFD3BECFE86194322731D387286FBD26E059115FD5F2BCA0", + "consensus_hash": "0F2908883A105C793B74495EB7D6DF2EEA479ED7FC9349206A65CB0F9987A0B8", + "app_hash": "223BF64D4A01074DC523A80E76B9BBC786C791FB0A1893AC5B14866356FCFD6C", + "last_results_hash": "", + "evidence_hash": "", + "proposer_address": "D540AB022088612AC74B287D076DBFBC4A377A2E" + }, + "commit": { + "height": "1311801", + "round": 0, + "block_id": { + "hash": "112BC173FD838FB68EB43476816CD7B4C6661B6884A9E357B417EE957E1CF8F7", + "parts": { + "total": 1, + "hash": "38D4B26B5B725C4F13571EFE022C030390E4C33C8CF6F88EDD142EA769642DBD" + } + }, + "signatures": [ + { + "block_id_flag": 2, + "validator_address": "000001E443FD237E4B616E2FA69DF4EE3D49A94F", + "timestamp": "2019-04-22T17:01:58.376629719Z", + "signature": "14jaTQXYRt8kbLKEhdHq7AXycrFImiLuZx50uOjs2+Zv+2i7RTG/jnObD07Jo2ubZ8xd7bNBJMqkgtkd0oQHAw==" + } + ] + } + }, + "canonical": true + } +} +``` + +### Validators + +#### Parameters + +- `height (integer)`: Block height at which the validators were present on. If no height is set the latest commit will be returned. +- `page (integer)`: +- `per_page (integer)`: + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/validators +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"validators\",\"params\":{\"height\":\"1\", \"page\":\"1\", \"per_page\":\"20\"}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "block_height": "55", + "validators": [ + { + "address": "000001E443FD237E4B616E2FA69DF4EE3D49A94F", + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "9tK9IT+FPdf2qm+5c2qaxi10sWP+3erWTKgftn2PaQM=" + }, + "voting_power": "239727", + "proposer_priority": "-11896414" + } + ], + "count": "1", + "total": "25" + } +} +``` + +### Genesis + +Get Genesis of the chain. If the response is large, this operation +will return an error: use `genesis_chunked` instead. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/genesis +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"genesis\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "genesis": { + "genesis_time": "2019-04-22T17:00:00Z", + "chain_id": "cosmoshub-2", + "initial_height": "2", + "consensus_params": { + "block": { + "max_bytes": "22020096", + "max_gas": "1000", + "time_iota_ms": "1000" + }, + "evidence": { + "max_age": "100000" + }, + "validator": { + "pub_key_types": [ + "ed25519" + ] + } + }, + "validators": [ + { + "address": "B00A6323737F321EB0B8D59C6FD497A14B60938A", + "pub_key": { + "type": "tendermint/PubKeyEd25519", + "value": "cOQZvh/h9ZioSeUMZB/1Vy1Xo5x2sjrVjlE/qHnYifM=" + }, + "power": "9328525", + "name": "Certus One" + } + ], + "app_hash": "", + "app_state": {} + } + } +} +``` + +### GenesisChunked + +Get the genesis document in a chunks to support easily transfering larger documents. + +#### Parameters + +- `chunk` (integer): the index number of the chunk that you wish to + fetch. These IDs are 0 indexed. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/genesis_chunked?chunk=0 +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"genesis_chunked\",\"params\":{\"chunk\":0}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "chunk": 0, + "total": 10, + "data": "dGVuZGVybWludAo=" + } +} +``` + +### ConsensusParams + +Get the consensus parameters. + +#### Parameters + +- `height (integer)`: Block height at which the consensus params would like to be fetched for. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/consensus_params +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"consensus_params\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "block_height": "1", + "consensus_params": { + "block": { + "max_bytes": "22020096", + "max_gas": "1000", + "time_iota_ms": "1000" + }, + "evidence": { + "max_age": "100000" + }, + "validator": { + "pub_key_types": [ + "ed25519" + ] + } + } + } +} +``` + +### UnconfirmedTxs + +Get a list of unconfirmed transactions. + +#### Parameters + +- `limit (integer)` The amount of txs to respond with. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/unconfirmed_txs +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"unconfirmed_txs\, \"params\":{\"limit\":\"20\"}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "n_txs": "82", + "total": "82", + "total_bytes": "19974", + "txs": [ + "gAPwYl3uCjCMTXENChSMnIkb5ZpYHBKIZqecFEV2tuZr7xIUA75/FmYq9WymsOBJ0XSJ8yV8zmQKMIxNcQ0KFIyciRvlmlgcEohmp5wURXa25mvvEhQbrvwbvlNiT+Yjr86G+YQNx7kRVgowjE1xDQoUjJyJG+WaWBwSiGannBRFdrbma+8SFK2m+1oxgILuQLO55n8mWfnbIzyPCjCMTXENChSMnIkb5ZpYHBKIZqecFEV2tuZr7xIUQNGfkmhTNMis4j+dyMDIWXdIPiYKMIxNcQ0KFIyciRvlmlgcEohmp5wURXa25mvvEhS8sL0D0wwgGCItQwVowak5YB38KRIUCg4KBXVhdG9tEgUxMDA1NBDoxRgaagom61rphyECn8x7emhhKdRCB2io7aS/6Cpuq5NbVqbODmqOT3jWw6kSQKUresk+d+Gw0BhjiggTsu8+1voW+VlDCQ1GRYnMaFOHXhyFv7BCLhFWxLxHSAYT8a5XqoMayosZf9mANKdXArA=" + ] + } +} +``` + +### NumUnconfirmedTxs + +Get data about unconfirmed transactions. + +#### Parameters + +None + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/num_unconfirmed_txs +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"num_unconfirmed_txs\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "n_txs": "31", + "total": "82", + "total_bytes": "19974" + } +} +``` + +### Tx + +#### Parameters + +- `hash (string)`: The hash of the transaction +- `prove (bool)`: If the response should include proof the transaction was included in a block. + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/num_unconfirmed_txs +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"num_unconfirmed_txs\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "hash": "D70952032620CC4E2737EB8AC379806359D8E0B17B0488F627997A0B043ABDED", + "height": "1000", + "index": 0, + "tx_result": { + "log": "[{\"msg_index\":\"0\",\"success\":true,\"log\":\"\"}]", + "gas_wanted": "200000", + "gas_used": "28596", + "tags": [ + { + "key": "YWN0aW9u", + "value": "c2VuZA==", + "index": false + } + ] + }, + "tx": "5wHwYl3uCkaoo2GaChQmSIu8hxpJxLcCuIi8fiHN4TMwrRIU/Af1cEG7Rcs/6LjTl7YjRSymJfYaFAoFdWF0b20SCzE0OTk5OTk1MDAwEhMKDQoFdWF0b20SBDUwMDAQwJoMGmoKJuta6YchAwswBShaB1wkZBctLIhYqBC3JrAI28XGzxP+rVEticGEEkAc+khTkKL9CDE47aDvjEHvUNt+izJfT4KVF2v2JkC+bmlH9K08q3PqHeMI9Z5up+XMusnTqlP985KF+SI5J3ZOIhhNYWRlIGJ5IENpcmNsZSB3aXRoIGxvdmU=" + } +} +``` + +## Transaction Routes + +### BroadCastTxSync + +Returns with the response from CheckTx. Does not wait for DeliverTx result. + +#### Parameters + +- `tx (string)`: The transaction encoded + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/broadcast_tx_sync?tx=encoded_tx +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"broadcast_tx_sync\",\"params\":{\"tx\":\"a/encoded_tx/c\"}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "code": "0", + "data": "", + "log": "", + "codespace": "ibc", + "hash": "0D33F2F03A5234F38706E43004489E061AC40A2E" + }, + "error": "" +} +``` + +### BroadCastTxAsync + +Returns right away, with no response. Does not wait for CheckTx nor DeliverTx results. + +#### Parameters + +- `tx (string)`: The transaction encoded + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/broadcast_tx_async?tx=encoded_tx +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"broadcast_tx_async\",\"params\":{\"tx\":\"a/encoded_tx/c\"}}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "code": "0", + "data": "", + "log": "", + "codespace": "ibc", + "hash": "0D33F2F03A5234F38706E43004489E061AC40A2E" + }, + "error": "" +} +``` + +### CheckTx + +Checks the transaction without executing it. + +#### Parameters + +- `tx (string)`: String of the encoded transaction + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/check_tx?tx=encoded_tx +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"check_tx\",\"params\":{\"tx\":\"a/encoded_tx/c\"}}" +``` + +#### Response + +```json +{ + "id": 0, + "jsonrpc": "2.0", + "error": "", + "result": { + "code": "0", + "data": "", + "log": "", + "info": "", + "gas_wanted": "1", + "gas_used": "0", + "events": [ + { + "type": "app", + "attributes": [ + { + "key": "YWN0aW9u", + "value": "c2VuZA==", + "index": false + } + ] + } + ], + "codespace": "bank" + } +} +``` + +## ABCI Routes + +### ABCIInfo + +Get some info about the application. + +#### Parameters + +None + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/abci_info +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"abci_info\"}" +``` + +#### Response + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "response": { + "data": "{\"size\":0}", + "version": "0.16.1", + "app_version": "1314126" + } + } +} +``` + +### ABCIQuery + +Query the application for some information. + +#### Parameters + +- `path (string)`: Path to the data. This is defined by the application. +- `data (string)`: The data requested +- `height (integer)`: Height at which the data is being requested for. +- `prove (bool)`: Include proofs of the transactions inclusion in the block + +#### Request + +##### HTTP + +```sh +curl http://127.0.0.1:26657/abci_query?path="a/b/c"=IHAVENOIDEA&height=1&prove=true +``` + +##### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"abci_query\",\"params\":{\"path\":\"a/b/c\", \"height\":\"1\", \"bool\":\"true\"}}" +``` + +#### Response + +```json +{ + "error": "", + "result": { + "response": { + "log": "exists", + "height": "0", + "proof": "010114FED0DAD959F36091AD761C922ABA3CBF1D8349990101020103011406AA2262E2F448242DF2C2607C3CDC705313EE3B0001149D16177BC71E445476174622EA559715C293740C", + "value": "61626364", + "key": "61626364", + "index": "-1", + "code": "0" + } + }, + "id": 0, + "jsonrpc": "2.0" +} +``` + +## Evidence Routes + +### BroadcastEvidence + +Broadcast evidence of the misbehavior. + +#### Parameters + +- `evidence (string)`: + +#### Request + +##### HTTP + +```sh +curl http://localhost:26657/broadcast_evidence?evidence=JSON_EVIDENCE_encoded +``` + +#### JSONRPC + +```sh +curl -X POST https://localhost:26657 -d "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"broadcast_evidence\",\"params\":{\"evidence\":\"JSON_EVIDENCE_encoded\"}}" +``` + +#### Response + +```json +{ + "error": "", + "result": "", + "id": 0, + "jsonrpc": "2.0" +} +``` diff --git a/state/execution_test.go b/state/execution_test.go index 1bb09399e..1e7feaed9 100644 --- a/state/execution_test.go +++ b/state/execution_test.go @@ -40,7 +40,7 @@ var ( func TestApplyBlock(t *testing.T) { app := &testApp{} cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -78,7 +78,7 @@ func TestApplyBlock(t *testing.T) { func TestBeginBlockValidators(t *testing.T) { app := &testApp{} cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // no need to check error again @@ -141,7 +141,7 @@ func TestBeginBlockValidators(t *testing.T) { func TestBeginBlockByzantineValidators(t *testing.T) { app := &testApp{} cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -254,7 +254,7 @@ func TestProcessProposal(t *testing.T) { app.On("ProcessProposal", mock.Anything).Return(abci.ResponseProcessProposal{Status: abci.ResponseProcessProposal_ACCEPT}) cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -458,7 +458,7 @@ func TestUpdateValidators(t *testing.T) { func TestEndBlockValidatorUpdates(t *testing.T) { app := &testApp{} cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -543,7 +543,7 @@ func TestEndBlockValidatorUpdates(t *testing.T) { func TestEndBlockValidatorUpdatesResultingInEmptySet(t *testing.T) { app := &testApp{} cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.Nil(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -580,7 +580,7 @@ func TestEmptyPrepareProposal(t *testing.T) { app := abcimocks.NewBaseMock() cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -643,7 +643,7 @@ func TestPrepareProposalErrorOnNonExistingRemoved(t *testing.T) { app.On("PrepareProposal", mock.Anything).Return(rpp) cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -691,7 +691,7 @@ func TestPrepareProposalRemoveTxs(t *testing.T) { TxRecords: trs, }) cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -742,7 +742,7 @@ func TestPrepareProposalAddedTxsIncluded(t *testing.T) { TxRecords: trs, }) cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -791,7 +791,7 @@ func TestPrepareProposalReorderTxs(t *testing.T) { }) cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -844,7 +844,7 @@ func TestPrepareProposalErrorOnTooManyTxs(t *testing.T) { }) cc := proxy.NewLocalClientCreator(app) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests @@ -890,7 +890,7 @@ func TestPrepareProposalErrorOnPrepareProposalError(t *testing.T) { cm.On("Stop").Return(nil) cc := &pmocks.ClientCreator{} cc.On("NewABCIClient").Return(cm, nil) - proxyApp := proxy.NewAppConns(cc) + proxyApp := proxy.NewAppConns(cc, proxy.NopMetrics()) err := proxyApp.Start() require.NoError(t, err) defer proxyApp.Stop() //nolint:errcheck // ignore for tests diff --git a/state/helpers_test.go b/state/helpers_test.go index 48a862f6b..d293cdeb4 100644 --- a/state/helpers_test.go +++ b/state/helpers_test.go @@ -29,7 +29,7 @@ type paramsChangeTestCase struct { func newTestApp() proxy.AppConns { app := &testApp{} cc := proxy.NewLocalClientCreator(app) - return proxy.NewAppConns(cc) + return proxy.NewAppConns(cc, proxy.NopMetrics()) } func makeAndCommitGoodBlock( diff --git a/state/indexer/block.go b/state/indexer/block.go index b12c1f671..9c4bb0e54 100644 --- a/state/indexer/block.go +++ b/state/indexer/block.go @@ -7,7 +7,7 @@ import ( "github.com/tendermint/tendermint/types" ) -//go:generate mockery --case underscore --name BlockIndexer +//go:generate ../../scripts/mockery_generate.sh BlockIndexer // BlockIndexer defines an interface contract for indexing block events. type BlockIndexer interface { diff --git a/state/indexer/mocks/block_indexer.go b/state/indexer/mocks/block_indexer.go index a7ef256a4..2c0f0ecb0 100644 --- a/state/indexer/mocks/block_indexer.go +++ b/state/indexer/mocks/block_indexer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/state/indexer/sink/psql/psql_test.go b/state/indexer/sink/psql/psql_test.go index b40ba395e..d3281b99f 100644 --- a/state/indexer/sink/psql/psql_test.go +++ b/state/indexer/sink/psql/psql_test.go @@ -5,7 +5,6 @@ import ( "database/sql" "flag" "fmt" - "io/ioutil" "log" "os" "os/signal" @@ -227,7 +226,7 @@ func newTestBlockHeader() types.EventDataNewBlockHeader { // readSchema loads the indexing database schema file func readSchema() ([]*schema.Migration, error) { const filename = "schema.sql" - contents, err := ioutil.ReadFile(filename) + contents, err := os.ReadFile(filename) if err != nil { return nil, fmt.Errorf("failed to read sql file from '%s': %w", filename, err) } diff --git a/state/metrics.gen.go b/state/metrics.gen.go new file mode 100644 index 000000000..288f1d7cf --- /dev/null +++ b/state/metrics.gen.go @@ -0,0 +1,32 @@ +// Code generated by metricsgen. DO NOT EDIT. + +package state + +import ( + "github.com/go-kit/kit/metrics/discard" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { + labels := []string{} + for i := 0; i < len(labelsAndValues); i += 2 { + labels = append(labels, labelsAndValues[i]) + } + return &Metrics{ + BlockProcessingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_processing_time", + Help: "Time between BeginBlock and EndBlock in ms.", + + Buckets: stdprometheus.LinearBuckets(1, 10, 10), + }, labels).With(labelsAndValues...), + } +} + +func NopMetrics() *Metrics { + return &Metrics{ + BlockProcessingTime: discard.NewHistogram(), + } +} diff --git a/state/metrics.go b/state/metrics.go index bcd713f5f..72712144f 100644 --- a/state/metrics.go +++ b/state/metrics.go @@ -2,9 +2,6 @@ package state import ( "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/discard" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" ) const ( @@ -13,34 +10,10 @@ const ( MetricsSubsystem = "state" ) +//go:generate go run ../scripts/metricsgen -struct=Metrics + // Metrics contains metrics exposed by this package. type Metrics struct { - // Time between BeginBlock and EndBlock. - BlockProcessingTime metrics.Histogram -} - -// PrometheusMetrics returns Metrics build using Prometheus client library. -// Optionally, labels can be provided along with their values ("foo", -// "fooValue"). -func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { - labels := []string{} - for i := 0; i < len(labelsAndValues); i += 2 { - labels = append(labels, labelsAndValues[i]) - } - return &Metrics{ - BlockProcessingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "block_processing_time", - Help: "Time between BeginBlock and EndBlock in ms.", - Buckets: stdprometheus.LinearBuckets(1, 10, 10), - }, labels).With(labelsAndValues...), - } -} - -// NopMetrics returns no-op Metrics. -func NopMetrics() *Metrics { - return &Metrics{ - BlockProcessingTime: discard.NewHistogram(), - } + // Time between BeginBlock and EndBlock in ms. + BlockProcessingTime metrics.Histogram `metrics_buckettype:"lin" metrics_bucketsizes:"1, 10, 10"` } diff --git a/state/mocks/block_store.go b/state/mocks/block_store.go index 70ee53839..4493a6e3f 100644 --- a/state/mocks/block_store.go +++ b/state/mocks/block_store.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/state/mocks/evidence_pool.go b/state/mocks/evidence_pool.go index 6f41e120f..7279d36f7 100644 --- a/state/mocks/evidence_pool.go +++ b/state/mocks/evidence_pool.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/state/mocks/store.go b/state/mocks/store.go index 4308d4ead..13474c023 100644 --- a/state/mocks/store.go +++ b/state/mocks/store.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/state/services.go b/state/services.go index 2cc376e85..2b6c16fed 100644 --- a/state/services.go +++ b/state/services.go @@ -12,7 +12,7 @@ import ( //------------------------------------------------------ // blockstore -//go:generate mockery --case underscore --name BlockStore +//go:generate ../scripts/mockery_generate.sh BlockStore // BlockStore defines the interface used by the ConsensusState. type BlockStore interface { @@ -38,7 +38,7 @@ type BlockStore interface { //----------------------------------------------------------------------------- // evidence pool -//go:generate mockery --case underscore --name EvidencePool +//go:generate ../scripts/mockery_generate.sh EvidencePool // EvidencePool defines the EvidencePool interface used by State. type EvidencePool interface { diff --git a/state/state.go b/state/state.go index 8082da7b8..f67ded422 100644 --- a/state/state.go +++ b/state/state.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "os" "time" "github.com/gogo/protobuf/proto" @@ -303,7 +303,7 @@ func MakeGenesisStateFromFile(genDocFile string) (State, error) { // MakeGenesisDocFromFile reads and unmarshals genesis doc from the given file. func MakeGenesisDocFromFile(genDocFile string) (*types.GenesisDoc, error) { - genDocJSON, err := ioutil.ReadFile(genDocFile) + genDocJSON, err := os.ReadFile(genDocFile) if err != nil { return nil, fmt.Errorf("couldn't read GenesisDoc file: %v", err) } diff --git a/state/store.go b/state/store.go index b46fe56b2..428460db8 100644 --- a/state/store.go +++ b/state/store.go @@ -39,7 +39,7 @@ func calcABCIResponsesKey(height int64) []byte { //---------------------- -//go:generate mockery --case underscore --name Store +//go:generate ../scripts/mockery_generate.sh Store // Store defines the state store interface // diff --git a/state/txindex/indexer.go b/state/txindex/indexer.go index 03474f43d..5fa50ac2f 100644 --- a/state/txindex/indexer.go +++ b/state/txindex/indexer.go @@ -10,7 +10,7 @@ import ( // XXX/TODO: These types should be moved to the indexer package. -//go:generate mockery --case underscore --name TxIndexer +//go:generate ../../scripts/mockery_generate.sh TxIndexer // TxIndexer interface defines methods to index and search transactions. type TxIndexer interface { diff --git a/state/txindex/kv/kv_bench_test.go b/state/txindex/kv/kv_bench_test.go index fdfe550f3..85491aad5 100644 --- a/state/txindex/kv/kv_bench_test.go +++ b/state/txindex/kv/kv_bench_test.go @@ -4,7 +4,7 @@ import ( "context" "crypto/rand" "fmt" - "io/ioutil" + "os" "testing" dbm "github.com/tendermint/tm-db" @@ -15,7 +15,7 @@ import ( ) func BenchmarkTxSearch(b *testing.B) { - dbDir, err := ioutil.TempDir("", "benchmark_tx_search_test") + dbDir, err := os.MkdirTemp("", "benchmark_tx_search_test") if err != nil { b.Errorf("failed to create temporary directory: %s", err) } diff --git a/state/txindex/kv/kv_test.go b/state/txindex/kv/kv_test.go index 9b15c1971..cfaacf67a 100644 --- a/state/txindex/kv/kv_test.go +++ b/state/txindex/kv/kv_test.go @@ -3,7 +3,6 @@ package kv import ( "context" "fmt" - "io/ioutil" "os" "testing" @@ -329,7 +328,7 @@ func txResultWithEvents(events []abci.Event) *abci.TxResult { } func benchmarkTxIndex(txsCount int64, b *testing.B) { - dir, err := ioutil.TempDir("", "tx_index_db") + dir, err := os.MkdirTemp("", "tx_index_db") require.NoError(b, err) defer os.RemoveAll(dir) diff --git a/state/txindex/mocks/tx_indexer.go b/state/txindex/mocks/tx_indexer.go index 64dd50ec7..93d0eb9c2 100644 --- a/state/txindex/mocks/tx_indexer.go +++ b/state/txindex/mocks/tx_indexer.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/statesync/chunks.go b/statesync/chunks.go index 028c863b9..d3f8d33c6 100644 --- a/statesync/chunks.go +++ b/statesync/chunks.go @@ -3,7 +3,6 @@ package statesync import ( "errors" "fmt" - "io/ioutil" "os" "path/filepath" "strconv" @@ -42,7 +41,7 @@ type chunkQueue struct { // newChunkQueue creates a new chunk queue for a snapshot, using a temp dir for storage. // Callers must call Close() when done. func newChunkQueue(snapshot *snapshot, tempDir string) (*chunkQueue, error) { - dir, err := ioutil.TempDir(tempDir, "tm-statesync") + dir, err := os.MkdirTemp(tempDir, "tm-statesync") if err != nil { return nil, fmt.Errorf("unable to create temp dir for state sync chunks: %w", err) } @@ -84,7 +83,7 @@ func (q *chunkQueue) Add(chunk *chunk) (bool, error) { } path := filepath.Join(q.dir, strconv.FormatUint(uint64(chunk.Index), 10)) - err := ioutil.WriteFile(path, chunk.Chunk, 0600) + err := os.WriteFile(path, chunk.Chunk, 0600) if err != nil { return false, fmt.Errorf("failed to save chunk %v to file %v: %w", chunk.Index, path, err) } @@ -209,7 +208,7 @@ func (q *chunkQueue) load(index uint32) (*chunk, error) { if !ok { return nil, nil } - body, err := ioutil.ReadFile(path) + body, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("failed to load chunk %v: %w", index, err) } diff --git a/statesync/chunks_test.go b/statesync/chunks_test.go index 2b9a5d751..3bc42e6aa 100644 --- a/statesync/chunks_test.go +++ b/statesync/chunks_test.go @@ -1,7 +1,6 @@ package statesync import ( - "io/ioutil" "os" "testing" @@ -36,20 +35,20 @@ func TestNewChunkQueue_TempDir(t *testing.T) { Hash: []byte{7}, Metadata: nil, } - dir, err := ioutil.TempDir("", "newchunkqueue") + dir, err := os.MkdirTemp("", "newchunkqueue") require.NoError(t, err) defer os.RemoveAll(dir) queue, err := newChunkQueue(snapshot, dir) require.NoError(t, err) - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) require.NoError(t, err) assert.Len(t, files, 1) err = queue.Close() require.NoError(t, err) - files, err = ioutil.ReadDir(dir) + files, err = os.ReadDir(dir) require.NoError(t, err) assert.Len(t, files, 0) } diff --git a/statesync/mocks/state_provider.go b/statesync/mocks/state_provider.go index 7ff78561f..f52b9e33d 100644 --- a/statesync/mocks/state_provider.go +++ b/statesync/mocks/state_provider.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery. DO NOT EDIT. package mocks diff --git a/statesync/stateprovider.go b/statesync/stateprovider.go index ad6036468..2ed7b93a2 100644 --- a/statesync/stateprovider.go +++ b/statesync/stateprovider.go @@ -22,7 +22,7 @@ import ( "github.com/tendermint/tendermint/version" ) -//go:generate mockery --case underscore --name StateProvider +//go:generate ../scripts/mockery_generate.sh StateProvider // StateProvider is a provider of trusted state data for bootstrapping a node. This refers // to the state.State object, not the state machine. diff --git a/test/README.md b/test/README.md index 230b7339c..18839f0cd 100644 --- a/test/README.md +++ b/test/README.md @@ -10,8 +10,6 @@ and run the following tests in docker containers: - includes test coverage - app tests - kvstore app over socket - - counter app over socket - - counter app over grpc - persistence tests - crash tendermint at each of many predefined points, restart, and ensure it syncs properly with the app diff --git a/test/app/test.sh b/test/app/test.sh index dc60bfc1f..9a83045ce 100755 --- a/test/app/test.sh +++ b/test/app/test.sh @@ -2,9 +2,6 @@ set -ex #- kvstore over socket, curl -#- counter over socket, curl -#- counter over grpc, curl -#- counter over grpc, grpc # TODO: install everything @@ -45,57 +42,6 @@ function kvstore_over_socket_reorder(){ kill -9 $pid_kvstore $pid_tendermint } - -function counter_over_socket() { - rm -rf $TMHOME - tendermint init - echo "Starting counter_over_socket" - abci-cli counter --serial > /dev/null & - pid_counter=$! - tendermint node > tendermint.log & - pid_tendermint=$! - sleep 5 - - echo "running test" - bash test/app/counter_test.sh "Counter over Socket" - - kill -9 $pid_counter $pid_tendermint -} - -function counter_over_grpc() { - rm -rf $TMHOME - tendermint init - echo "Starting counter_over_grpc" - abci-cli counter --serial --abci grpc > /dev/null & - pid_counter=$! - tendermint node --abci grpc > tendermint.log & - pid_tendermint=$! - sleep 5 - - echo "running test" - bash test/app/counter_test.sh "Counter over GRPC" - - kill -9 $pid_counter $pid_tendermint -} - -function counter_over_grpc_grpc() { - rm -rf $TMHOME - tendermint init - echo "Starting counter_over_grpc_grpc (ie. with grpc broadcast_tx)" - abci-cli counter --serial --abci grpc > /dev/null & - pid_counter=$! - sleep 1 - GRPC_PORT=36656 - tendermint node --abci grpc --rpc.grpc_laddr tcp://localhost:$GRPC_PORT > tendermint.log & - pid_tendermint=$! - sleep 5 - - echo "running test" - GRPC_BROADCAST_TX=true bash test/app/counter_test.sh "Counter over GRPC via GRPC BroadcastTx" - - kill -9 $pid_counter $pid_tendermint -} - case "$1" in "kvstore_over_socket") kvstore_over_socket @@ -103,25 +49,10 @@ case "$1" in "kvstore_over_socket_reorder") kvstore_over_socket_reorder ;; - "counter_over_socket") - counter_over_socket - ;; -"counter_over_grpc") - counter_over_grpc - ;; - "counter_over_grpc_grpc") - counter_over_grpc_grpc - ;; *) echo "Running all" kvstore_over_socket echo "" kvstore_over_socket_reorder echo "" - counter_over_socket - echo "" - counter_over_grpc - echo "" - counter_over_grpc_grpc esac - diff --git a/test/e2e/Makefile b/test/e2e/Makefile index c751aab89..8bafe7ca9 100644 --- a/test/e2e/Makefile +++ b/test/e2e/Makefile @@ -8,11 +8,6 @@ docker: # ABCI testing). node: go build -o build/node -tags badgerdb,boltdb,cleveldb,rocksdb ./node - -# To be used primarily by the e2e docker instance. If you want to produce this binary -# elsewhere, then run go build in the maverick directory. -maverick: - go build -o build/maverick -tags badgerdb,boltdb,cleveldb,rocksdb ../maverick generator: go build -o build/generator ./generator @@ -20,4 +15,4 @@ generator: runner: go build -o build/runner ./runner -.PHONY: all node docker generator maverick runner +.PHONY: all node docker generator runner diff --git a/test/e2e/README.md b/test/e2e/README.md index 09b6d554e..4f89d2885 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -54,7 +54,7 @@ Auxiliary commands: * `logs`: outputs all node logs. -* `tail`: tails (follows) node logs until cancelled. +* `tail`: tails (follows) node logs until canceled. ## Tests diff --git a/test/e2e/app/snapshots.go b/test/e2e/app/snapshots.go index 4ef20375f..38ecd9237 100644 --- a/test/e2e/app/snapshots.go +++ b/test/e2e/app/snapshots.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "math" "os" "path/filepath" @@ -45,7 +44,7 @@ func (s *SnapshotStore) loadMetadata() error { file := filepath.Join(s.dir, "metadata.json") metadata := []abci.Snapshot{} - bz, err := ioutil.ReadFile(file) + bz, err := os.ReadFile(file) switch { case errors.Is(err, os.ErrNotExist): case err != nil: @@ -72,7 +71,7 @@ func (s *SnapshotStore) saveMetadata() error { // save the file to a new file and move it to make saving atomic. newFile := filepath.Join(s.dir, "metadata.json.new") file := filepath.Join(s.dir, "metadata.json") - err = ioutil.WriteFile(newFile, bz, 0644) // nolint: gosec + err = os.WriteFile(newFile, bz, 0644) // nolint: gosec if err != nil { return err } @@ -93,7 +92,7 @@ func (s *SnapshotStore) Create(state *State) (abci.Snapshot, error) { Hash: hashItems(state.Values), Chunks: byteChunks(bz), } - err = ioutil.WriteFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", state.Height)), bz, 0644) + err = os.WriteFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", state.Height)), bz, 0644) if err != nil { return abci.Snapshot{}, err } @@ -122,7 +121,7 @@ func (s *SnapshotStore) LoadChunk(height uint64, format uint32, chunk uint32) ([ defer s.RUnlock() for _, snapshot := range s.metadata { if snapshot.Height == height && snapshot.Format == format { - bz, err := ioutil.ReadFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", height))) + bz, err := os.ReadFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", height))) if err != nil { return nil, err } diff --git a/test/e2e/app/state.go b/test/e2e/app/state.go index 1ede6fb4c..e82a22539 100644 --- a/test/e2e/app/state.go +++ b/test/e2e/app/state.go @@ -6,7 +6,6 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "os" "path/filepath" "sort" @@ -52,11 +51,11 @@ func NewState(dir string, persistInterval uint64) (*State, error) { // load loads state from disk. It does not take out a lock, since it is called // during construction. func (s *State) load() error { - bz, err := ioutil.ReadFile(s.currentFile) + bz, err := os.ReadFile(s.currentFile) if err != nil { // if the current state doesn't exist then we try recover from the previous state if errors.Is(err, os.ErrNotExist) { - bz, err = ioutil.ReadFile(s.previousFile) + bz, err = os.ReadFile(s.previousFile) if err != nil { return fmt.Errorf("failed to read both current and previous state (%q): %w", s.previousFile, err) @@ -82,7 +81,7 @@ func (s *State) save() error { // We write the state to a separate file and move it to the destination, to // make it atomic. newFile := fmt.Sprintf("%v.new", s.currentFile) - err = ioutil.WriteFile(newFile, bz, 0644) + err = os.WriteFile(newFile, bz, 0644) if err != nil { return fmt.Errorf("failed to write state to %q: %w", s.currentFile, err) } @@ -160,7 +159,7 @@ func (s *State) Commit() (uint64, []byte, error) { } func (s *State) Rollback() error { - bz, err := ioutil.ReadFile(s.previousFile) + bz, err := os.ReadFile(s.previousFile) if err != nil { return fmt.Errorf("failed to read state from %q: %w", s.previousFile, err) } diff --git a/test/e2e/docker/Dockerfile b/test/e2e/docker/Dockerfile index efe2183e9..0eaad525c 100644 --- a/test/e2e/docker/Dockerfile +++ b/test/e2e/docker/Dockerfile @@ -1,7 +1,7 @@ # We need to build in a Linux environment to support C libraries, e.g. RocksDB. # We use Debian instead of Alpine, so that we can use binary database packages # instead of spending time compiling them. -FROM golang:1.15 +FROM golang:1.18 RUN apt-get -qq update -y && apt-get -qq upgrade -y >/dev/null RUN apt-get -qq install -y libleveldb-dev librocksdb-dev >/dev/null @@ -18,7 +18,6 @@ RUN go mod download COPY . . RUN make build && cp build/tendermint /usr/bin/tendermint COPY test/e2e/docker/entrypoint* /usr/bin/ -RUN cd test/e2e && make maverick && cp build/maverick /usr/bin/maverick RUN cd test/e2e && make node && cp build/node /usr/bin/app # Set up runtime directory. We don't use a separate runtime image since we need diff --git a/test/e2e/docker/entrypoint-maverick b/test/e2e/docker/entrypoint-maverick deleted file mode 100755 index 9469e2447..000000000 --- a/test/e2e/docker/entrypoint-maverick +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -# Forcibly remove any stray UNIX sockets left behind from previous runs -rm -rf /var/run/privval.sock /var/run/app.sock - -/usr/bin/app /tendermint/config/app.toml & - -sleep 1 - -/usr/bin/maverick "$@" diff --git a/test/e2e/generator/generate.go b/test/e2e/generator/generate.go index f0c6fef7d..e203e4d6b 100644 --- a/test/e2e/generator/generate.go +++ b/test/e2e/generator/generate.go @@ -4,7 +4,6 @@ import ( "fmt" "math/rand" "sort" - "strconv" "strings" e2e "github.com/tendermint/tendermint/test/e2e/pkg" @@ -27,10 +26,9 @@ var ( nodeDatabases = uniformChoice{"goleveldb", "cleveldb", "rocksdb", "boltdb", "badgerdb"} ipv6 = uniformChoice{false, true} // FIXME: grpc disabled due to https://github.com/tendermint/tendermint/issues/5439 - nodeABCIProtocols = uniformChoice{"unix", "tcp", "builtin"} // "grpc" - nodePrivvalProtocols = uniformChoice{"file", "unix", "tcp"} - // FIXME: v2 disabled due to flake - nodeFastSyncs = uniformChoice{"v0"} // "v2" + nodeABCIProtocols = uniformChoice{"unix", "tcp", "builtin"} // "grpc" + nodePrivvalProtocols = uniformChoice{"file", "unix", "tcp"} + nodeFastSyncs = uniformChoice{false, true} nodeStateSyncs = uniformChoice{false, true} nodeMempools = uniformChoice{"v0", "v1"} nodePersistIntervals = uniformChoice{0, 1, 5} @@ -42,13 +40,6 @@ var ( "kill": 0.1, "restart": 0.1, } - nodeMisbehaviors = weightedChoice{ - // FIXME: evidence disabled due to node panicing when not - // having sufficient block history to process evidence. - // https://github.com/tendermint/tendermint/issues/5617 - // misbehaviorOption{"double-prevote"}: 1, - misbehaviorOption{}: 9, - } ) // Generate generates random testnets using the given RNG. @@ -210,7 +201,7 @@ func generateNode( StartAt: startAt, Database: nodeDatabases.Choose(r).(string), PrivvalProtocol: nodePrivvalProtocols.Choose(r).(string), - FastSync: nodeFastSyncs.Choose(r).(string), + FastSync: nodeFastSyncs.Choose(r).(bool), Mempool: nodeMempools.Choose(r).(string), StateSync: nodeStateSyncs.Choose(r).(bool) && startAt > 0, PersistInterval: ptrUint64(uint64(nodePersistIntervals.Choose(r).(int))), @@ -226,17 +217,6 @@ func generateNode( node.SnapshotInterval = 3 } - if node.Mode == string(e2e.ModeValidator) { - misbehaveAt := startAt + 5 + int64(r.Intn(10)) - if startAt == 0 { - misbehaveAt += initialHeight - 1 - } - node.Misbehaviors = nodeMisbehaviors.Choose(r).(misbehaviorOption).atHeight(misbehaveAt) - if len(node.Misbehaviors) != 0 { - node.PrivvalProtocol = "file" - } - } - // If a node which does not persist state also does not retain blocks, randomly // choose to either persist state or retain all blocks. if node.PersistInterval != nil && *node.PersistInterval == 0 && node.RetainBlocks > 0 { @@ -274,16 +254,3 @@ func generateLightNode(r *rand.Rand, startAt int64, providers []string) *e2e.Man func ptrUint64(i uint64) *uint64 { return &i } - -type misbehaviorOption struct { - misbehavior string -} - -func (m misbehaviorOption) atHeight(height int64) map[string]string { - misbehaviorMap := make(map[string]string) - if m.misbehavior == "" { - return misbehaviorMap - } - misbehaviorMap[strconv.Itoa(int(height))] = m.misbehavior - return misbehaviorMap -} diff --git a/test/e2e/generator/random.go b/test/e2e/generator/random.go index ec59a01b2..f21502118 100644 --- a/test/e2e/generator/random.go +++ b/test/e2e/generator/random.go @@ -56,28 +56,6 @@ func (uc uniformChoice) Choose(r *rand.Rand) interface{} { return uc[r.Intn(len(uc))] } -// weightedChoice chooses a single random key from a map of keys and weights. -type weightedChoice map[interface{}]uint - -func (wc weightedChoice) Choose(r *rand.Rand) interface{} { - total := 0 - choices := make([]interface{}, 0, len(wc)) - for choice, weight := range wc { - total += int(weight) - choices = append(choices, choice) - } - - rem := r.Intn(total) - for _, choice := range choices { - rem -= int(wc[choice]) - if rem <= 0 { - return choice - } - } - - return nil -} - // probSetChoice picks a set of strings based on each string's probability (0-1). type probSetChoice map[string]float64 diff --git a/test/e2e/networks/ci.toml b/test/e2e/networks/ci.toml index 7ed1049df..1fff574ca 100644 --- a/test/e2e/networks/ci.toml +++ b/test/e2e/networks/ci.toml @@ -37,7 +37,6 @@ seeds = ["seed01"] seeds = ["seed01"] snapshot_interval = 5 perturb = ["disconnect"] -misbehaviors = { 1018 = "double-prevote" } [node.validator02] seeds = ["seed02"] @@ -67,7 +66,7 @@ perturb = ["pause"] start_at = 1005 # Becomes part of the validator set at 1010 seeds = ["seed02"] database = "cleveldb" -fast_sync = "v0" +fast_sync = true mempool_version = "v1" # FIXME: should be grpc, disabled due to https://github.com/tendermint/tendermint/issues/5439 #abci_protocol = "grpc" @@ -77,8 +76,7 @@ perturb = ["kill", "pause", "disconnect", "restart"] [node.full01] start_at = 1010 mode = "full" -# FIXME: should be v2, disabled due to flake -fast_sync = "v0" +fast_sync = true persistent_peers = ["validator01", "validator02", "validator03", "validator04", "validator05"] retain_blocks = 1 perturb = ["restart"] @@ -86,8 +84,7 @@ perturb = ["restart"] [node.full02] start_at = 1015 mode = "full" -# FIXME: should be v2, disabled due to flake -fast_sync = "v0" +fast_sync = true state_sync = true seeds = ["seed01"] perturb = ["restart"] diff --git a/test/e2e/node/main.go b/test/e2e/node/main.go index b109d5397..9a701dcc4 100644 --- a/test/e2e/node/main.go +++ b/test/e2e/node/main.go @@ -7,7 +7,6 @@ import ( "net/http" "os" "path/filepath" - "strconv" "strings" "time" @@ -30,8 +29,6 @@ import ( rpcserver "github.com/tendermint/tendermint/rpc/jsonrpc/server" "github.com/tendermint/tendermint/test/e2e/app" e2e "github.com/tendermint/tendermint/test/e2e/pkg" - mcs "github.com/tendermint/tendermint/test/maverick/consensus" - maverick "github.com/tendermint/tendermint/test/maverick/node" ) var logger = log.NewTMLogger(log.NewSyncWriter(os.Stdout)) @@ -75,14 +72,10 @@ func run(configFile string) error { case "socket", "grpc": err = startApp(cfg) case "builtin": - if len(cfg.Misbehaviors) == 0 { - if cfg.Mode == string(e2e.ModeLight) { - err = startLightClient(cfg) - } else { - err = startNode(cfg) - } + if cfg.Mode == string(e2e.ModeLight) { + err = startLightClient(cfg) } else { - err = startMaverick(cfg) + err = startNode(cfg) } default: err = fmt.Errorf("invalid protocol %q", cfg.Protocol) @@ -202,43 +195,6 @@ func startLightClient(cfg *Config) error { return nil } -// FIXME: Temporarily disconnected maverick until it is redesigned -// startMaverick starts a Maverick node that runs the application directly. It assumes the Tendermint -// configuration is in $TMHOME/config/tendermint.toml. -func startMaverick(cfg *Config) error { - app, err := app.NewApplication(cfg.App()) - if err != nil { - return err - } - - tmcfg, logger, nodeKey, err := setupNode() - if err != nil { - return fmt.Errorf("failed to setup config: %w", err) - } - - misbehaviors := make(map[int64]mcs.Misbehavior, len(cfg.Misbehaviors)) - for heightString, misbehaviorString := range cfg.Misbehaviors { - height, _ := strconv.ParseInt(heightString, 10, 64) - misbehaviors[height] = mcs.MisbehaviorList[misbehaviorString] - } - - n, err := maverick.NewNode(tmcfg, - maverick.LoadOrGenFilePV(tmcfg.PrivValidatorKeyFile(), tmcfg.PrivValidatorStateFile()), - nodeKey, - proxy.NewLocalClientCreator(app), - maverick.DefaultGenesisDocProviderFunc(tmcfg), - maverick.DefaultDBProvider, - maverick.DefaultMetricsProvider(tmcfg.Instrumentation), - logger, - misbehaviors, - ) - if err != nil { - return err - } - - return n.Start() -} - // startSigner starts a signer server connecting to the given endpoint. func startSigner(cfg *Config) error { filePV := privval.LoadFilePV(cfg.PrivValKey, cfg.PrivValState) diff --git a/test/e2e/pkg/manifest.go b/test/e2e/pkg/manifest.go index 65c0cf84b..44c8e9118 100644 --- a/test/e2e/pkg/manifest.go +++ b/test/e2e/pkg/manifest.go @@ -88,9 +88,8 @@ type ManifestNode struct { // runner will wait for the network to reach at least this block height. StartAt int64 `toml:"start_at"` - // FastSync specifies the fast sync mode: "" (disable), "v0", "v1", or "v2". - // Defaults to disabled. - FastSync string `toml:"fast_sync"` + // FastSync specifies whether to enable the fast sync protocol. + FastSync bool `toml:"fast_sync"` // Mempool specifies which version of mempool to use. Either "v0" or "v1" // This defaults to v0. @@ -124,16 +123,6 @@ type ManifestNode struct { // pause: temporarily pauses (freezes) the node // restart: restarts the node, shutting it down with SIGTERM Perturb []string `toml:"perturb"` - - // Misbehaviors sets how a validator behaves during consensus at a - // certain height. Multiple misbehaviors at different heights can be used - // - // An example of misbehaviors - // { 10 = "double-prevote", 20 = "double-prevote"} - // - // For more information, look at the readme in the maverick folder. - // A list of all behaviors can be found in ../maverick/consensus/behavior.go - Misbehaviors map[string]string `toml:"misbehaviors"` } // Save saves the testnet manifest to a file. diff --git a/test/e2e/pkg/testnet.go b/test/e2e/pkg/testnet.go index 4d46de922..1123c26a9 100644 --- a/test/e2e/pkg/testnet.go +++ b/test/e2e/pkg/testnet.go @@ -16,7 +16,6 @@ import ( "github.com/tendermint/tendermint/crypto/ed25519" "github.com/tendermint/tendermint/crypto/secp256k1" rpchttp "github.com/tendermint/tendermint/rpc/client/http" - mcs "github.com/tendermint/tendermint/test/maverick/consensus" ) const ( @@ -73,7 +72,7 @@ type Node struct { IP net.IP ProxyPort uint32 StartAt int64 - FastSync string + FastSync bool StateSync bool Mempool string Database string @@ -85,7 +84,6 @@ type Node struct { Seeds []*Node PersistentPeers []*Node Perturbations []Perturbation - Misbehaviors map[int64]string } // LoadTestnet loads a testnet from a manifest file, using the filename to @@ -164,7 +162,6 @@ func LoadTestnet(file string) (*Testnet, error) { SnapshotInterval: nodeManifest.SnapshotInterval, RetainBlocks: nodeManifest.RetainBlocks, Perturbations: []Perturbation{}, - Misbehaviors: make(map[int64]string), } if node.StartAt == testnet.InitialHeight { node.StartAt = 0 // normalize to 0 for initial nodes, since code expects this @@ -187,13 +184,6 @@ func LoadTestnet(file string) (*Testnet, error) { for _, p := range nodeManifest.Perturb { node.Perturbations = append(node.Perturbations, Perturbation(p)) } - for heightString, misbehavior := range nodeManifest.Misbehaviors { - height, err := strconv.ParseInt(heightString, 10, 64) - if err != nil { - return nil, fmt.Errorf("unable to parse height %s to int64: %w", heightString, err) - } - node.Misbehaviors[height] = misbehavior - } testnet.Nodes = append(testnet.Nodes, node) } @@ -307,12 +297,6 @@ func (n Node) Validate(testnet Testnet) error { } } } - switch n.FastSync { - case "", "v0", "v1", "v2": - default: - return fmt.Errorf("invalid fast sync setting %q", n.FastSync) - - } switch n.Mempool { case "", "v0", "v1": default: @@ -362,30 +346,6 @@ func (n Node) Validate(testnet Testnet) error { } } - if (n.PrivvalProtocol != "file" || n.Mode != "validator") && len(n.Misbehaviors) != 0 { - return errors.New("must be using \"file\" privval protocol to implement misbehaviors") - } - - for height, misbehavior := range n.Misbehaviors { - if height < n.StartAt { - return fmt.Errorf("misbehavior height %d is below node start height %d", - height, n.StartAt) - } - if height < testnet.InitialHeight { - return fmt.Errorf("misbehavior height %d is below network initial height %d", - height, testnet.InitialHeight) - } - exists := false - for possibleBehaviors := range mcs.MisbehaviorList { - if possibleBehaviors == misbehavior { - exists = true - } - } - if !exists { - return fmt.Errorf("misbehavior %s does not exist", misbehavior) - } - } - return nil } @@ -437,19 +397,6 @@ func (t Testnet) HasPerturbations() bool { return false } -// LastMisbehaviorHeight returns the height of the last misbehavior. -func (t Testnet) LastMisbehaviorHeight() int64 { - lastHeight := int64(0) - for _, node := range t.Nodes { - for height := range node.Misbehaviors { - if height > lastHeight { - lastHeight = height - } - } - } - return lastHeight -} - // Address returns a P2P endpoint address for the node. func (n Node) AddressP2P(withID bool) string { ip := n.IP.String() diff --git a/test/e2e/runner/main.go b/test/e2e/runner/main.go index a9ee31985..80048ee98 100644 --- a/test/e2e/runner/main.go +++ b/test/e2e/runner/main.go @@ -71,14 +71,6 @@ func NewCLI() *CLI { return err } - if lastMisbehavior := cli.testnet.LastMisbehaviorHeight(); lastMisbehavior > 0 { - // wait for misbehaviors before starting perturbations. We do a separate - // wait for another 5 blocks, since the last misbehavior height may be - // in the past depending on network startup ordering. - if err := WaitUntil(cli.testnet, lastMisbehavior); err != nil { - return err - } - } if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through return err } diff --git a/test/e2e/runner/setup.go b/test/e2e/runner/setup.go index 919e4542f..2dd84eaf2 100644 --- a/test/e2e/runner/setup.go +++ b/test/e2e/runner/setup.go @@ -7,7 +7,6 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "os" "path/filepath" "regexp" @@ -53,7 +52,7 @@ func Setup(testnet *e2e.Testnet) error { if err != nil { return err } - err = ioutil.WriteFile(filepath.Join(testnet.Dir, "docker-compose.yml"), compose, 0644) + err = os.WriteFile(filepath.Join(testnet.Dir, "docker-compose.yml"), compose, 0644) if err != nil { return err } @@ -92,7 +91,7 @@ func Setup(testnet *e2e.Testnet) error { if err != nil { return err } - err = ioutil.WriteFile(filepath.Join(nodeDir, "config", "app.toml"), appCfg, 0644) + err = os.WriteFile(filepath.Join(nodeDir, "config", "app.toml"), appCfg, 0644) if err != nil { return err } @@ -168,9 +167,6 @@ services: image: tendermint/e2e-node {{- if eq .ABCIProtocol "builtin" }} entrypoint: /usr/bin/entrypoint-builtin -{{- else if .Misbehaviors }} - entrypoint: /usr/bin/entrypoint-maverick - command: ["node", "--misbehaviors", "{{ misbehaviorsToString .Misbehaviors }}"] {{- end }} init: true ports: @@ -288,11 +284,7 @@ func MakeConfig(node *e2e.Node) (*config.Config, error) { cfg.Mempool.Version = node.Mempool } - if node.FastSync == "" { - cfg.FastSyncMode = false - } else { - cfg.FastSync.Version = node.FastSync - } + cfg.FastSyncMode = node.FastSync if node.StateSync { cfg.StateSync.Enable = true @@ -369,12 +361,6 @@ func MakeAppConfig(node *e2e.Node) ([]byte, error) { } } - misbehaviors := make(map[string]string) - for height, misbehavior := range node.Misbehaviors { - misbehaviors[strconv.Itoa(int(height))] = misbehavior - } - cfg["misbehaviors"] = misbehaviors - if len(node.Testnet.ValidatorUpdates) > 0 { validatorUpdates := map[string]map[string]int64{} for height, validators := range node.Testnet.ValidatorUpdates { @@ -401,11 +387,11 @@ func UpdateConfigStateSync(node *e2e.Node, height int64, hash []byte) error { // FIXME Apparently there's no function to simply load a config file without // involving the entire Viper apparatus, so we'll just resort to regexps. - bz, err := ioutil.ReadFile(cfgPath) + bz, err := os.ReadFile(cfgPath) if err != nil { return err } bz = regexp.MustCompile(`(?m)^trust_height =.*`).ReplaceAll(bz, []byte(fmt.Sprintf(`trust_height = %v`, height))) bz = regexp.MustCompile(`(?m)^trust_hash =.*`).ReplaceAll(bz, []byte(fmt.Sprintf(`trust_hash = "%X"`, hash))) - return ioutil.WriteFile(cfgPath, bz, 0644) + return os.WriteFile(cfgPath, bz, 0644) } diff --git a/test/e2e/tests/evidence_test.go b/test/e2e/tests/evidence_test.go deleted file mode 100644 index ea24b51e5..000000000 --- a/test/e2e/tests/evidence_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package e2e_test - -import ( - "bytes" - "testing" - - "github.com/stretchr/testify/require" - - e2e "github.com/tendermint/tendermint/test/e2e/pkg" - "github.com/tendermint/tendermint/types" -) - -// assert that all nodes that have blocks at the height of a misbehavior has evidence -// for that misbehavior -func TestEvidence_Misbehavior(t *testing.T) { - blocks := fetchBlockChain(t) - testNode(t, func(t *testing.T, node e2e.Node) { - seenEvidence := make(map[int64]struct{}) - for _, block := range blocks { - // Find any evidence blaming this node in this block - var nodeEvidence types.Evidence - for _, evidence := range block.Evidence.Evidence { - switch evidence := evidence.(type) { - case *types.DuplicateVoteEvidence: - if bytes.Equal(evidence.VoteA.ValidatorAddress, node.PrivvalKey.PubKey().Address()) { - nodeEvidence = evidence - } - default: - t.Fatalf("unexpected evidence type %T", evidence) - } - } - if nodeEvidence == nil { - continue // no evidence for the node at this height - } - - // Check that evidence was as expected - misbehavior, ok := node.Misbehaviors[nodeEvidence.Height()] - require.True(t, ok, "found unexpected evidence %v in height %v", - nodeEvidence, block.Height) - - switch misbehavior { - case "double-prevote": - require.IsType(t, &types.DuplicateVoteEvidence{}, nodeEvidence, "unexpected evidence type") - default: - t.Fatalf("unknown misbehavior %v", misbehavior) - } - - seenEvidence[nodeEvidence.Height()] = struct{}{} - } - // see if there is any evidence that we were expecting but didn't see - for height, misbehavior := range node.Misbehaviors { - _, ok := seenEvidence[height] - require.True(t, ok, "expected evidence for %v misbehavior at height %v by node but was never found", - misbehavior, height) - } - }) -} diff --git a/test/fuzz/mempool/v0/fuzz_test.go b/test/fuzz/mempool/v0/fuzz_test.go index 52ac92f24..d371ee3ff 100644 --- a/test/fuzz/mempool/v0/fuzz_test.go +++ b/test/fuzz/mempool/v0/fuzz_test.go @@ -1,7 +1,7 @@ package v0_test import ( - "io/ioutil" + "io" "os" "path/filepath" "testing" @@ -26,7 +26,7 @@ func TestMempoolTestdataCases(t *testing.T) { }() f, err := os.Open(filepath.Join(testdataCasesDir, entry.Name())) require.NoError(t, err) - input, err := ioutil.ReadAll(f) + input, err := io.ReadAll(f) require.NoError(t, err) mempoolv0.Fuzz(input) }) diff --git a/test/fuzz/mempool/v1/fuzz_test.go b/test/fuzz/mempool/v1/fuzz_test.go index 52ec0fb5a..000f6df3c 100644 --- a/test/fuzz/mempool/v1/fuzz_test.go +++ b/test/fuzz/mempool/v1/fuzz_test.go @@ -1,7 +1,7 @@ package v1_test import ( - "io/ioutil" + "io" "os" "path/filepath" "testing" @@ -26,7 +26,7 @@ func TestMempoolTestdataCases(t *testing.T) { }() f, err := os.Open(filepath.Join(testdataCasesDir, entry.Name())) require.NoError(t, err) - input, err := ioutil.ReadAll(f) + input, err := io.ReadAll(f) require.NoError(t, err) mempoolv1.Fuzz(input) }) diff --git a/test/fuzz/p2p/addrbook/init-corpus/main.go b/test/fuzz/p2p/addrbook/init-corpus/main.go index d5cc3a9a9..2185eab4b 100644 --- a/test/fuzz/p2p/addrbook/init-corpus/main.go +++ b/test/fuzz/p2p/addrbook/init-corpus/main.go @@ -5,7 +5,6 @@ import ( "encoding/json" "flag" "fmt" - "io/ioutil" "log" "net" "os" @@ -49,7 +48,7 @@ func initCorpus(baseDir string) { log.Fatalf("can't marshal %v: %v", addr, err) } - if err := ioutil.WriteFile(filename, bz, 0644); err != nil { + if err := os.WriteFile(filename, bz, 0644); err != nil { log.Fatalf("can't write %v to %q: %v", addr, filename, err) } diff --git a/test/fuzz/p2p/pex/init-corpus/main.go b/test/fuzz/p2p/pex/init-corpus/main.go index 2fe09c0dc..d96740306 100644 --- a/test/fuzz/p2p/pex/init-corpus/main.go +++ b/test/fuzz/p2p/pex/init-corpus/main.go @@ -4,7 +4,6 @@ package main import ( "flag" "fmt" - "io/ioutil" "log" "math/rand" "os" @@ -73,7 +72,7 @@ func initCorpus(rootDir string) { filename := filepath.Join(rootDir, "corpus", fmt.Sprintf("%d", n)) - if err := ioutil.WriteFile(filename, bz, 0644); err != nil { + if err := os.WriteFile(filename, bz, 0644); err != nil { log.Fatalf("can't write %X to %q: %v", bz, filename, err) } diff --git a/test/fuzz/p2p/secret_connection/init-corpus/main.go b/test/fuzz/p2p/secret_connection/init-corpus/main.go index 635f2d99f..3a2537ff7 100644 --- a/test/fuzz/p2p/secret_connection/init-corpus/main.go +++ b/test/fuzz/p2p/secret_connection/init-corpus/main.go @@ -4,7 +4,6 @@ package main import ( "flag" "fmt" - "io/ioutil" "log" "os" "path/filepath" @@ -39,7 +38,7 @@ func initCorpus(baseDir string) { for i, datum := range data { filename := filepath.Join(corpusDir, fmt.Sprintf("%d", i)) - if err := ioutil.WriteFile(filename, []byte(datum), 0644); err != nil { + if err := os.WriteFile(filename, []byte(datum), 0644); err != nil { log.Fatalf("can't write %v to %q: %v", datum, filename, err) } diff --git a/test/fuzz/rpc/jsonrpc/server/handler.go b/test/fuzz/rpc/jsonrpc/server/handler.go index 98c75d511..8189bcccc 100644 --- a/test/fuzz/rpc/jsonrpc/server/handler.go +++ b/test/fuzz/rpc/jsonrpc/server/handler.go @@ -3,7 +3,7 @@ package handler import ( "bytes" "encoding/json" - "io/ioutil" + "io" "net/http" "net/http/httptest" @@ -29,7 +29,7 @@ func Fuzz(data []byte) int { rec := httptest.NewRecorder() mux.ServeHTTP(rec, req) res := rec.Result() - blob, err := ioutil.ReadAll(res.Body) + blob, err := io.ReadAll(res.Body) if err != nil { panic(err) } diff --git a/test/maverick/README.md b/test/maverick/README.md deleted file mode 100644 index 308275536..000000000 --- a/test/maverick/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Maverick - -![](https://assets.rollingstone.com/assets/2015/article/tom-cruise-to-fight-drones-in-top-gun-sequel-20150629/201166/large_rect/1435581755/1401x788-Top-Gun-3.jpg) - -A byzantine node used to test Tendermint consensus against a plethora of different faulty misbehaviors. Designed to easily create new faulty misbehaviors to examine how a Tendermint network reacts to the misbehavior. Can also be used for fuzzy testing with different network arrangements. - -## Misbehaviors - -A misbehavior allows control at the following stages as highlighted by the struct below - -```go -type Misbehavior struct { - String string - - EnterPropose func(cs *State, height int64, round int32) - - EnterPrevote func(cs *State, height int64, round int32) - - EnterPrecommit func(cs *State, height int64, round int32) - - ReceivePrevote func(cs *State, prevote *types.Vote) - - ReceivePrecommit func(cs *State, precommit *types.Vote) - - ReceiveProposal func(cs *State, proposal *types.Proposal) error -} -``` - -At each of these events, the node can exhibit a different misbehavior. To create a new misbehavior define a function that builds off the existing default misbehavior and then overrides one or more of these functions. Then append it to the misbehaviors list so the node recognizes it like so: - -```go -var MisbehaviorList = map[string]Misbehavior{ - "double-prevote": DoublePrevoteMisbehavior(), -} -``` - -## Setup - -The maverick node takes most of the functionality from the existing Tendermint CLI. To install this, in the directory of this readme, run: - -```bash -go build -``` - -Use `maverick init` to initialize a single node and `maverick node` to run it. This will run it normally unless you use the misbehaviors flag as follows: - -```bash -maverick node --proxy_app persistent_kvstore --misbehaviors double-vote,10 -``` - -This would cause the node to vote twice in every round at height 10. To add more misbehaviors at different heights, append the next misbehavior and height after the first (with comma separation). diff --git a/test/maverick/consensus/misbehavior.go b/test/maverick/consensus/misbehavior.go deleted file mode 100644 index e8fe42f1d..000000000 --- a/test/maverick/consensus/misbehavior.go +++ /dev/null @@ -1,400 +0,0 @@ -package consensus - -import ( - "fmt" - - tmcon "github.com/tendermint/tendermint/consensus" - cstypes "github.com/tendermint/tendermint/consensus/types" - "github.com/tendermint/tendermint/libs/log" - tmproto "github.com/tendermint/tendermint/proto/tendermint/types" - "github.com/tendermint/tendermint/types" -) - -// MisbehaviorList encompasses a list of all possible behaviors -var MisbehaviorList = map[string]Misbehavior{ - "double-prevote": DoublePrevoteMisbehavior(), -} - -type Misbehavior struct { - Name string - - EnterPropose func(cs *State, height int64, round int32) - - EnterPrevote func(cs *State, height int64, round int32) - - EnterPrecommit func(cs *State, height int64, round int32) - - ReceivePrevote func(cs *State, prevote *types.Vote) - - ReceivePrecommit func(cs *State, precommit *types.Vote) - - ReceiveProposal func(cs *State, proposal *types.Proposal) error -} - -// BEHAVIORS - -func DefaultMisbehavior() Misbehavior { - return Misbehavior{ - Name: "default", - EnterPropose: defaultEnterPropose, - EnterPrevote: defaultEnterPrevote, - EnterPrecommit: defaultEnterPrecommit, - ReceivePrevote: defaultReceivePrevote, - ReceivePrecommit: defaultReceivePrecommit, - ReceiveProposal: defaultReceiveProposal, - } -} - -// DoublePrevoteMisbehavior will make a node prevote both nil and a block in the same -// height and round. -func DoublePrevoteMisbehavior() Misbehavior { - b := DefaultMisbehavior() - b.Name = "double-prevote" - b.EnterPrevote = func(cs *State, height int64, round int32) { - - // If a block is locked, prevote that. - if cs.LockedBlock != nil { - cs.Logger.Debug("enterPrevote: already locked on a block, prevoting locked block") - cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) - return - } - - // If ProposalBlock is nil, prevote nil. - if cs.ProposalBlock == nil { - cs.Logger.Debug("enterPrevote: ProposalBlock is nil") - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - // Validate proposal block - err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) - if err != nil { - // ProposalBlock is invalid, prevote nil. - cs.Logger.Error("enterPrevote: ProposalBlock is invalid", "err", err) - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - if cs.sw == nil { - cs.Logger.Error("nil switch") - return - } - - prevote, err := cs.signVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) - if err != nil { - cs.Logger.Error("enterPrevote: Unable to sign block", "err", err) - } - - nilPrevote, err := cs.signVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - if err != nil { - cs.Logger.Error("enterPrevote: Unable to sign block", "err", err) - } - - // add our own vote - cs.sendInternalMessage(msgInfo{&tmcon.VoteMessage{Vote: prevote}, ""}) - - cs.Logger.Info("Sending conflicting votes") - peers := cs.sw.Peers().List() - // there has to be at least two other peers connected else this behavior works normally - for idx, peer := range peers { - if idx%2 == 0 { // sign the proposal block - peer.Send(VoteChannel, tmcon.MustEncode(&tmcon.VoteMessage{Vote: prevote})) - } else { // sign a nil block - peer.Send(VoteChannel, tmcon.MustEncode(&tmcon.VoteMessage{Vote: nilPrevote})) - } - } - } - return b -} - -// DEFAULTS - -func defaultEnterPropose(cs *State, height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - // If we don't get the proposal and all block parts quick enough, enterPrevote - cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) - - // Nothing more to do if we're not a validator - if cs.privValidator == nil { - logger.Debug("This node is not a validator") - return - } - logger.Debug("This node is a validator") - - pubKey, err := cs.privValidator.GetPubKey() - if err != nil { - // If this node is a validator & proposer in the currentx round, it will - // miss the opportunity to create a block. - logger.Error("Error on retrival of pubkey", "err", err) - return - } - address := pubKey.Address() - - // if not a validator, we're done - if !cs.Validators.HasAddress(address) { - logger.Debug("This node is not a validator", "addr", address, "vals", cs.Validators) - return - } - - if cs.isProposer(address) { - logger.Debug("enterPropose: our turn to propose", - "proposer", address, - ) - cs.decideProposal(height, round) - } else { - logger.Debug("enterPropose: not our turn to propose", - "proposer", cs.Validators.GetProposer().Address, - ) - } -} - -func defaultEnterPrevote(cs *State, height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - // If a block is locked, prevote that. - if cs.LockedBlock != nil { - logger.Debug("enterPrevote: already locked on a block, prevoting locked block") - cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) - return - } - - // If ProposalBlock is nil, prevote nil. - if cs.ProposalBlock == nil { - logger.Debug("enterPrevote: ProposalBlock is nil") - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - // Validate proposal block - err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) - if err != nil { - // ProposalBlock is invalid, prevote nil. - logger.Error("enterPrevote: ProposalBlock is invalid", "err", err) - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - // Prevote cs.ProposalBlock - // NOTE: the proposal signature is validated when it is received, - // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) - logger.Debug("enterPrevote: ProposalBlock is valid") - cs.signAddVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) -} - -func defaultEnterPrecommit(cs *State, height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - // check for a polka - blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() - - // If we don't have a polka, we must precommit nil. - if !ok { - if cs.LockedBlock != nil { - logger.Debug("enterPrecommit: no +2/3 prevotes during enterPrecommit while we're locked; precommitting nil") - } else { - logger.Debug("enterPrecommit: no +2/3 prevotes during enterPrecommit; precommitting nil.") - } - cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) - return - } - - // At this point +2/3 prevoted for a particular block or nil. - _ = cs.eventBus.PublishEventPolka(cs.RoundStateEvent()) - - // the latest POLRound should be this round. - polRound, _ := cs.Votes.POLInfo() - if polRound < round { - panic(fmt.Sprintf("This POLRound should be %v but got %v", round, polRound)) - } - - // +2/3 prevoted nil. Unlock and precommit nil. - if len(blockID.Hash) == 0 { - if cs.LockedBlock == nil { - logger.Debug("enterPrecommit: +2/3 prevoted for nil") - } else { - logger.Debug("enterPrecommit: +2/3 prevoted for nil; unlocking") - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - _ = cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()) - } - cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) - return - } - - // At this point, +2/3 prevoted for a particular block. - - // If we're already locked on that block, precommit it, and update the LockedRound - if cs.LockedBlock.HashesTo(blockID.Hash) { - logger.Debug("enterPrecommit: +2/3 prevoted locked block; relocking") - cs.LockedRound = round - _ = cs.eventBus.PublishEventRelock(cs.RoundStateEvent()) - cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) - return - } - - // If +2/3 prevoted for proposal block, stage and precommit it - if cs.ProposalBlock.HashesTo(blockID.Hash) { - logger.Debug("enterPrecommit: +2/3 prevoted proposal block; locking", "hash", blockID.Hash) - // Validate the block. - if err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock); err != nil { - panic(fmt.Sprintf("enterPrecommit: +2/3 prevoted for an invalid block: %v", err)) - } - cs.LockedRound = round - cs.LockedBlock = cs.ProposalBlock - cs.LockedBlockParts = cs.ProposalBlockParts - _ = cs.eventBus.PublishEventLock(cs.RoundStateEvent()) - cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) - return - } - - // There was a polka in this round for a block we don't have. - // Fetch that block, unlock, and precommit nil. - // The +2/3 prevotes for this round is the POL for our unlock. - logger.Debug("enterPrecommit: +2/3 prevotes for a block we don't have; voting nil", "blockID", blockID) - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { - cs.ProposalBlock = nil - cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) - } - _ = cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()) - cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) -} - -func defaultReceivePrevote(cs *State, vote *types.Vote) { - height := cs.Height - prevotes := cs.Votes.Prevotes(vote.Round) - - // If +2/3 prevotes for a block or nil for *any* round: - if blockID, ok := prevotes.TwoThirdsMajority(); ok { - - // There was a polka! - // If we're locked but this is a recent polka, unlock. - // If it matches our ProposalBlock, update the ValidBlock - - // Unlock if `cs.LockedRound < vote.Round <= cs.Round` - // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round - if (cs.LockedBlock != nil) && - (cs.LockedRound < vote.Round) && - (vote.Round <= cs.Round) && - !cs.LockedBlock.HashesTo(blockID.Hash) { - - cs.Logger.Info("Unlocking because of POL.", "lockedRound", cs.LockedRound, "POLRound", vote.Round) - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - _ = cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()) - } - - // Update Valid* if we can. - // NOTE: our proposal block may be nil or not what received a polka.. - if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { - - if cs.ProposalBlock.HashesTo(blockID.Hash) { - cs.Logger.Info( - "Updating ValidBlock because of POL.", "validRound", cs.ValidRound, "POLRound", vote.Round) - cs.ValidRound = vote.Round - cs.ValidBlock = cs.ProposalBlock - cs.ValidBlockParts = cs.ProposalBlockParts - } else { - cs.Logger.Info( - "valid block we do not know about; set ProposalBlock=nil", - "proposal", log.NewLazyBlockHash(cs.ProposalBlock), - "blockID", blockID.Hash, - ) - - // We're getting the wrong block. - cs.ProposalBlock = nil - } - if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { - cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) - } - cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) - _ = cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()) - } - } - - // If +2/3 prevotes for *anything* for future round: - switch { - case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): - // Round-skip if there is any 2/3+ of votes ahead of us - cs.enterNewRound(height, vote.Round) - case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round - blockID, ok := prevotes.TwoThirdsMajority() - if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { - cs.enterPrecommit(height, vote.Round) - } else if prevotes.HasTwoThirdsAny() { - cs.enterPrevoteWait(height, vote.Round) - } - case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: - // If the proposal is now complete, enter prevote of cs.Round. - if cs.isProposalComplete() { - cs.enterPrevote(height, cs.Round) - } - } - -} - -func defaultReceivePrecommit(cs *State, vote *types.Vote) { - height := cs.Height - precommits := cs.Votes.Precommits(vote.Round) - cs.Logger.Info("Added to precommit", "vote", vote, "precommits", precommits.StringShort()) - - blockID, ok := precommits.TwoThirdsMajority() - if ok { - // Executed as TwoThirdsMajority could be from a higher round - cs.enterNewRound(height, vote.Round) - cs.enterPrecommit(height, vote.Round) - if len(blockID.Hash) != 0 { - cs.enterCommit(height, vote.Round) - if cs.config.SkipTimeoutCommit && precommits.HasAll() { - cs.enterNewRound(cs.Height, 0) - } - } else { - cs.enterPrecommitWait(height, vote.Round) - } - } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { - cs.enterNewRound(height, vote.Round) - cs.enterPrecommitWait(height, vote.Round) - } -} - -func defaultReceiveProposal(cs *State, proposal *types.Proposal) error { - // Already have one - // TODO: possibly catch double proposals - if cs.Proposal != nil { - return nil - } - - // Does not apply - if proposal.Height != cs.Height || proposal.Round != cs.Round { - return nil - } - - // Verify POLRound, which must be -1 or in range [0, proposal.Round). - if proposal.POLRound < -1 || - (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { - return ErrInvalidProposalPOLRound - } - - p := proposal.ToProto() - // Verify signature - if !cs.Validators.GetProposer().PubKey.VerifySignature( - types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature) { - return ErrInvalidProposalSignature - } - - proposal.Signature = p.Signature - cs.Proposal = proposal - // We don't update cs.ProposalBlockParts if it is already set. - // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. - // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! - if cs.ProposalBlockParts == nil { - cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) - } - - cs.Logger.Info("received proposal", "proposal", proposal) - return nil -} diff --git a/test/maverick/consensus/msgs.go b/test/maverick/consensus/msgs.go deleted file mode 100644 index de78cad36..000000000 --- a/test/maverick/consensus/msgs.go +++ /dev/null @@ -1,115 +0,0 @@ -package consensus - -import ( - "errors" - "fmt" - - tmcon "github.com/tendermint/tendermint/consensus" - cstypes "github.com/tendermint/tendermint/consensus/types" - tmmath "github.com/tendermint/tendermint/libs/math" - "github.com/tendermint/tendermint/p2p" - tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus" - tmproto "github.com/tendermint/tendermint/proto/tendermint/types" - "github.com/tendermint/tendermint/types" -) - -func WALToProto(msg tmcon.WALMessage) (*tmcons.WALMessage, error) { - var pb tmcons.WALMessage - - switch msg := msg.(type) { - case types.EventDataRoundState: - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_EventDataRoundState{ - EventDataRoundState: &tmproto.EventDataRoundState{ - Height: msg.Height, - Round: msg.Round, - Step: msg.Step, - }, - }, - } - case msgInfo: - consMsg, err := tmcon.MsgToProto(msg.Msg) - if err != nil { - return nil, err - } - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_MsgInfo{ - MsgInfo: &tmcons.MsgInfo{ - Msg: *consMsg, - PeerID: string(msg.PeerID), - }, - }, - } - case timeoutInfo: - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_TimeoutInfo{ - TimeoutInfo: &tmcons.TimeoutInfo{ - Duration: msg.Duration, - Height: msg.Height, - Round: msg.Round, - Step: uint32(msg.Step), - }, - }, - } - case tmcon.EndHeightMessage: - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_EndHeight{ - EndHeight: &tmcons.EndHeight{ - Height: msg.Height, - }, - }, - } - default: - return nil, fmt.Errorf("to proto: wal message not recognized: %T", msg) - } - - return &pb, nil -} - -// WALFromProto takes a proto wal message and return a consensus walMessage and error -func WALFromProto(msg *tmcons.WALMessage) (tmcon.WALMessage, error) { - if msg == nil { - return nil, errors.New("nil WAL message") - } - var pb tmcon.WALMessage - - switch msg := msg.Sum.(type) { - case *tmcons.WALMessage_EventDataRoundState: - pb = types.EventDataRoundState{ - Height: msg.EventDataRoundState.Height, - Round: msg.EventDataRoundState.Round, - Step: msg.EventDataRoundState.Step, - } - case *tmcons.WALMessage_MsgInfo: - walMsg, err := tmcon.MsgFromProto(&msg.MsgInfo.Msg) - if err != nil { - return nil, fmt.Errorf("msgInfo from proto error: %w", err) - } - pb = msgInfo{ - Msg: walMsg, - PeerID: p2p.ID(msg.MsgInfo.PeerID), - } - - case *tmcons.WALMessage_TimeoutInfo: - tis, err := tmmath.SafeConvertUint8(int64(msg.TimeoutInfo.Step)) - // deny message based on possible overflow - if err != nil { - return nil, fmt.Errorf("denying message due to possible overflow: %w", err) - } - pb = timeoutInfo{ - Duration: msg.TimeoutInfo.Duration, - Height: msg.TimeoutInfo.Height, - Round: msg.TimeoutInfo.Round, - Step: cstypes.RoundStepType(tis), - } - return pb, nil - case *tmcons.WALMessage_EndHeight: - pb := tmcon.EndHeightMessage{ - Height: msg.EndHeight.Height, - } - return pb, nil - default: - return nil, fmt.Errorf("from proto: wal message not recognized: %T", msg) - } - return pb, nil -} diff --git a/test/maverick/consensus/reactor.go b/test/maverick/consensus/reactor.go deleted file mode 100644 index bd303a2ee..000000000 --- a/test/maverick/consensus/reactor.go +++ /dev/null @@ -1,1419 +0,0 @@ -package consensus - -import ( - "errors" - "fmt" - "reflect" - "sync" - "time" - - "github.com/gogo/protobuf/proto" - - tmcon "github.com/tendermint/tendermint/consensus" - cstypes "github.com/tendermint/tendermint/consensus/types" - "github.com/tendermint/tendermint/libs/bits" - tmevents "github.com/tendermint/tendermint/libs/events" - tmjson "github.com/tendermint/tendermint/libs/json" - "github.com/tendermint/tendermint/libs/log" - tmsync "github.com/tendermint/tendermint/libs/sync" - "github.com/tendermint/tendermint/p2p" - tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus" - tmproto "github.com/tendermint/tendermint/proto/tendermint/types" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" - tmtime "github.com/tendermint/tendermint/types/time" -) - -const ( - StateChannel = byte(0x20) - DataChannel = byte(0x21) - VoteChannel = byte(0x22) - VoteSetBitsChannel = byte(0x23) - - maxMsgSize = 1048576 // 1MB; NOTE/TODO: keep in sync with types.PartSet sizes. - - blocksToContributeToBecomeGoodPeer = 10000 - votesToContributeToBecomeGoodPeer = 10000 -) - -//----------------------------------------------------------------------------- - -// Reactor defines a reactor for the consensus service. -type Reactor struct { - p2p.BaseReactor // BaseService + p2p.Switch - - conS *State - - mtx tmsync.RWMutex - waitSync bool - eventBus *types.EventBus - - Metrics *tmcon.Metrics -} - -type ReactorOption func(*Reactor) - -// NewReactor returns a new Reactor with the given -// consensusState. -func NewReactor(consensusState *State, waitSync bool, options ...ReactorOption) *Reactor { - conR := &Reactor{ - conS: consensusState, - waitSync: waitSync, - Metrics: tmcon.NopMetrics(), - } - conR.BaseReactor = *p2p.NewBaseReactor("Consensus", conR) - - for _, option := range options { - option(conR) - } - - return conR -} - -// OnStart implements BaseService by subscribing to events, which later will be -// broadcasted to other peers and starting state if we're not in fast sync. -func (conR *Reactor) OnStart() error { - conR.Logger.Info("Reactor ", "waitSync", conR.WaitSync()) - - // start routine that computes peer statistics for evaluating peer quality - go conR.peerStatsRoutine() - - conR.subscribeToBroadcastEvents() - - if !conR.WaitSync() { - conR.conS.SetSwitch(conR.Switch) - err := conR.conS.Start() - if err != nil { - return err - } - } - - return nil -} - -// OnStop implements BaseService by unsubscribing from events and stopping -// state. -func (conR *Reactor) OnStop() { - conR.unsubscribeFromBroadcastEvents() - if err := conR.conS.Stop(); err != nil { - conR.Logger.Error("Error stopping consensus state", "err", err) - } - if !conR.WaitSync() { - conR.conS.Wait() - } -} - -// SwitchToConsensus switches from fast_sync mode to consensus mode. -// It resets the state, turns off fast_sync, and starts the consensus state-machine -func (conR *Reactor) SwitchToConsensus(state sm.State, skipWAL bool) { - conR.Logger.Info("SwitchToConsensus") - - // We have no votes, so reconstruct LastCommit from SeenCommit. - if state.LastBlockHeight > 0 { - conR.conS.reconstructLastCommit(state) - } - - // NOTE: The line below causes broadcastNewRoundStepRoutine() to broadcast a - // NewRoundStepMessage. - conR.conS.updateToState(state) - - conR.mtx.Lock() - conR.waitSync = false - conR.mtx.Unlock() - conR.Metrics.FastSyncing.Set(0) - conR.Metrics.StateSyncing.Set(0) - - if skipWAL { - conR.conS.doWALCatchup = false - } - conR.conS.SetSwitch(conR.Switch) - err := conR.conS.Start() - if err != nil { - panic(fmt.Sprintf(`Failed to start consensus state: %v - -conS: -%+v - -conR: -%+v`, err, conR.conS, conR)) - } -} - -// GetChannels implements Reactor -func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor { - // TODO optimize - return []*p2p.ChannelDescriptor{ - { - ID: StateChannel, - Priority: 6, - SendQueueCapacity: 100, - RecvMessageCapacity: maxMsgSize, - }, - { - ID: DataChannel, // maybe split between gossiping current block and catchup stuff - // once we gossip the whole block there's nothing left to send until next height or round - Priority: 10, - SendQueueCapacity: 100, - RecvBufferCapacity: 50 * 4096, - RecvMessageCapacity: maxMsgSize, - }, - { - ID: VoteChannel, - Priority: 7, - SendQueueCapacity: 100, - RecvBufferCapacity: 100 * 100, - RecvMessageCapacity: maxMsgSize, - }, - { - ID: VoteSetBitsChannel, - Priority: 1, - SendQueueCapacity: 2, - RecvBufferCapacity: 1024, - RecvMessageCapacity: maxMsgSize, - }, - } -} - -// InitPeer implements Reactor by creating a state for the peer. -func (conR *Reactor) InitPeer(peer p2p.Peer) p2p.Peer { - peerState := NewPeerState(peer).SetLogger(conR.Logger) - peer.Set(types.PeerStateKey, peerState) - return peer -} - -// AddPeer implements Reactor by spawning multiple gossiping goroutines for the -// peer. -func (conR *Reactor) AddPeer(peer p2p.Peer) { - if !conR.IsRunning() { - return - } - - peerState, ok := peer.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("peer %v has no state", peer)) - } - // Begin routines for this peer. - go conR.gossipDataRoutine(peer, peerState) - go conR.gossipVotesRoutine(peer, peerState) - go conR.queryMaj23Routine(peer, peerState) - - // Send our state to peer. - // If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus(). - if !conR.WaitSync() { - conR.sendNewRoundStepMessage(peer) - } -} - -// RemovePeer is a noop. -func (conR *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) { - if !conR.IsRunning() { - return - } - // TODO - // ps, ok := peer.Get(PeerStateKey).(*PeerState) - // if !ok { - // panic(fmt.Sprintf("Peer %v has no state", peer)) - // } - // ps.Disconnect() -} - -// Receive implements Reactor -// NOTE: We process these messages even when we're fast_syncing. -// Messages affect either a peer state or the consensus state. -// Peer state updates can happen in parallel, but processing of -// proposals, block parts, and votes are ordered by the receiveRoutine -// NOTE: blocks on consensus state for proposals, block parts, and votes -func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { - if !conR.IsRunning() { - conR.Logger.Debug("Receive", "src", src, "chId", chID, "bytes", msgBytes) - return - } - - msg, err := decodeMsg(msgBytes) - if err != nil { - conR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err) - conR.Switch.StopPeerForError(src, err) - return - } - - if err = msg.ValidateBasic(); err != nil { - conR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err) - conR.Switch.StopPeerForError(src, err) - return - } - - conR.Logger.Debug("Receive", "src", src, "chId", chID, "msg", msg) - - // Get peer states - ps, ok := src.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", src)) - } - - switch chID { - case StateChannel: - switch msg := msg.(type) { - case *tmcon.NewRoundStepMessage: - conR.conS.mtx.Lock() - initialHeight := conR.conS.state.InitialHeight - conR.conS.mtx.Unlock() - if err = msg.ValidateHeight(initialHeight); err != nil { - conR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err) - conR.Switch.StopPeerForError(src, err) - return - } - ps.ApplyNewRoundStepMessage(msg) - case *tmcon.NewValidBlockMessage: - ps.ApplyNewValidBlockMessage(msg) - case *tmcon.HasVoteMessage: - ps.ApplyHasVoteMessage(msg) - case *tmcon.VoteSetMaj23Message: - cs := conR.conS - cs.mtx.Lock() - height, votes := cs.Height, cs.Votes - cs.mtx.Unlock() - if height != msg.Height { - return - } - // Peer claims to have a maj23 for some BlockID at H,R,S, - err := votes.SetPeerMaj23(msg.Round, msg.Type, ps.peer.ID(), msg.BlockID) - if err != nil { - conR.Switch.StopPeerForError(src, err) - return - } - // Respond with a VoteSetBitsMessage showing which votes we have. - // (and consequently shows which we don't have) - var ourVotes *bits.BitArray - switch msg.Type { - case tmproto.PrevoteType: - ourVotes = votes.Prevotes(msg.Round).BitArrayByBlockID(msg.BlockID) - case tmproto.PrecommitType: - ourVotes = votes.Precommits(msg.Round).BitArrayByBlockID(msg.BlockID) - default: - panic("Bad VoteSetBitsMessage field Type. Forgot to add a check in ValidateBasic?") - } - src.TrySend(VoteSetBitsChannel, tmcon.MustEncode(&tmcon.VoteSetBitsMessage{ - Height: msg.Height, - Round: msg.Round, - Type: msg.Type, - BlockID: msg.BlockID, - Votes: ourVotes, - })) - default: - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - case DataChannel: - if conR.WaitSync() { - conR.Logger.Info("Ignoring message received during sync", "msg", msg) - return - } - switch msg := msg.(type) { - case *tmcon.ProposalMessage: - ps.SetHasProposal(msg.Proposal) - conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()} - case *tmcon.ProposalPOLMessage: - ps.ApplyProposalPOLMessage(msg) - case *tmcon.BlockPartMessage: - ps.SetHasProposalBlockPart(msg.Height, msg.Round, int(msg.Part.Index)) - conR.Metrics.BlockParts.With("peer_id", string(src.ID())).Add(1) - conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()} - default: - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - case VoteChannel: - if conR.WaitSync() { - conR.Logger.Info("Ignoring message received during sync", "msg", msg) - return - } - switch msg := msg.(type) { - case *tmcon.VoteMessage: - cs := conR.conS - cs.mtx.RLock() - height, valSize, lastCommitSize := cs.Height, cs.Validators.Size(), cs.LastCommit.Size() - cs.mtx.RUnlock() - ps.EnsureVoteBitArrays(height, valSize) - ps.EnsureVoteBitArrays(height-1, lastCommitSize) - ps.SetHasVote(msg.Vote) - - cs.peerMsgQueue <- msgInfo{msg, src.ID()} - - default: - // don't punish (leave room for soft upgrades) - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - case VoteSetBitsChannel: - if conR.WaitSync() { - conR.Logger.Info("Ignoring message received during sync", "msg", msg) - return - } - switch msg := msg.(type) { - case *tmcon.VoteSetBitsMessage: - cs := conR.conS - cs.mtx.Lock() - height, votes := cs.Height, cs.Votes - cs.mtx.Unlock() - - if height == msg.Height { - var ourVotes *bits.BitArray - switch msg.Type { - case tmproto.PrevoteType: - ourVotes = votes.Prevotes(msg.Round).BitArrayByBlockID(msg.BlockID) - case tmproto.PrecommitType: - ourVotes = votes.Precommits(msg.Round).BitArrayByBlockID(msg.BlockID) - default: - panic("Bad VoteSetBitsMessage field Type. Forgot to add a check in ValidateBasic?") - } - ps.ApplyVoteSetBitsMessage(msg, ourVotes) - } else { - ps.ApplyVoteSetBitsMessage(msg, nil) - } - default: - // don't punish (leave room for soft upgrades) - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - default: - conR.Logger.Error(fmt.Sprintf("Unknown chId %X", chID)) - } -} - -// SetEventBus sets event bus. -func (conR *Reactor) SetEventBus(b *types.EventBus) { - conR.eventBus = b - conR.conS.SetEventBus(b) -} - -// WaitSync returns whether the consensus reactor is waiting for state/fast sync. -func (conR *Reactor) WaitSync() bool { - conR.mtx.RLock() - defer conR.mtx.RUnlock() - return conR.waitSync -} - -//-------------------------------------- - -// subscribeToBroadcastEvents subscribes for new round steps and votes -// using internal pubsub defined on state to broadcast -// them to peers upon receiving. -func (conR *Reactor) subscribeToBroadcastEvents() { - const subscriber = "consensus-reactor" - if err := conR.conS.evsw.AddListenerForEvent(subscriber, types.EventNewRoundStep, - func(data tmevents.EventData) { - conR.broadcastNewRoundStepMessage(data.(*cstypes.RoundState)) - }); err != nil { - conR.Logger.Error("Error adding listener for events", "err", err) - } - - if err := conR.conS.evsw.AddListenerForEvent(subscriber, types.EventValidBlock, - func(data tmevents.EventData) { - conR.broadcastNewValidBlockMessage(data.(*cstypes.RoundState)) - }); err != nil { - conR.Logger.Error("Error adding listener for events", "err", err) - } - - if err := conR.conS.evsw.AddListenerForEvent(subscriber, types.EventVote, - func(data tmevents.EventData) { - conR.broadcastHasVoteMessage(data.(*types.Vote)) - }); err != nil { - conR.Logger.Error("Error adding listener for events", "err", err) - } - -} - -func (conR *Reactor) unsubscribeFromBroadcastEvents() { - const subscriber = "consensus-reactor" - conR.conS.evsw.RemoveListener(subscriber) -} - -func (conR *Reactor) broadcastNewRoundStepMessage(rs *cstypes.RoundState) { - nrsMsg := makeRoundStepMessage(rs) - conR.Switch.Broadcast(StateChannel, tmcon.MustEncode(nrsMsg)) -} - -func (conR *Reactor) broadcastNewValidBlockMessage(rs *cstypes.RoundState) { - csMsg := &tmcon.NewValidBlockMessage{ - Height: rs.Height, - Round: rs.Round, - BlockPartSetHeader: rs.ProposalBlockParts.Header(), - BlockParts: rs.ProposalBlockParts.BitArray(), - IsCommit: rs.Step == cstypes.RoundStepCommit, - } - conR.Switch.Broadcast(StateChannel, tmcon.MustEncode(csMsg)) -} - -// Broadcasts HasVoteMessage to peers that care. -func (conR *Reactor) broadcastHasVoteMessage(vote *types.Vote) { - msg := &tmcon.HasVoteMessage{ - Height: vote.Height, - Round: vote.Round, - Type: vote.Type, - Index: vote.ValidatorIndex, - } - conR.Switch.Broadcast(StateChannel, tmcon.MustEncode(msg)) - /* - // TODO: Make this broadcast more selective. - for _, peer := range conR.Switch.Peers().List() { - ps, ok := peer.Get(PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", peer)) - } - prs := ps.GetRoundState() - if prs.Height == vote.Height { - // TODO: Also filter on round? - peer.TrySend(StateChannel, struct{ ConsensusMessage }{msg}) - } else { - // Height doesn't match - // TODO: check a field, maybe CatchupCommitRound? - // TODO: But that requires changing the struct field comment. - } - } - */ -} - -func makeRoundStepMessage(rs *cstypes.RoundState) (nrsMsg *tmcon.NewRoundStepMessage) { - nrsMsg = &tmcon.NewRoundStepMessage{ - Height: rs.Height, - Round: rs.Round, - Step: rs.Step, - SecondsSinceStartTime: int64(time.Since(rs.StartTime).Seconds()), - LastCommitRound: rs.LastCommit.GetRound(), - } - return -} - -func (conR *Reactor) sendNewRoundStepMessage(peer p2p.Peer) { - rs := conR.conS.GetRoundState() - nrsMsg := makeRoundStepMessage(rs) - peer.Send(StateChannel, tmcon.MustEncode(nrsMsg)) -} - -func (conR *Reactor) gossipDataRoutine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) - -OUTER_LOOP: - for { - // Manage disconnects from self or peer. - if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping gossipDataRoutine for peer") - return - } - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - - // Send proposal Block parts? - if rs.ProposalBlockParts.HasHeader(prs.ProposalBlockPartSetHeader) { - if index, ok := rs.ProposalBlockParts.BitArray().Sub(prs.ProposalBlockParts.Copy()).PickRandom(); ok { - part := rs.ProposalBlockParts.GetPart(index) - msg := &tmcon.BlockPartMessage{ - Height: rs.Height, // This tells peer that this part applies to us. - Round: rs.Round, // This tells peer that this part applies to us. - Part: part, - } - logger.Debug("Sending block part", "height", prs.Height, "round", prs.Round) - if peer.Send(DataChannel, tmcon.MustEncode(msg)) { - ps.SetHasProposalBlockPart(prs.Height, prs.Round, index) - } - continue OUTER_LOOP - } - } - - // If the peer is on a previous height that we have, help catch up. - if (0 < prs.Height) && (prs.Height < rs.Height) && (prs.Height >= conR.conS.blockStore.Base()) { - heightLogger := logger.With("height", prs.Height) - - // if we never received the commit message from the peer, the block parts wont be initialized - if prs.ProposalBlockParts == nil { - blockMeta := conR.conS.blockStore.LoadBlockMeta(prs.Height) - if blockMeta == nil { - heightLogger.Error("Failed to load block meta", - "blockstoreBase", conR.conS.blockStore.Base(), "blockstoreHeight", conR.conS.blockStore.Height()) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - } else { - ps.InitProposalBlockParts(blockMeta.BlockID.PartSetHeader) - } - // continue the loop since prs is a copy and not effected by this initialization - continue OUTER_LOOP - } - conR.gossipDataForCatchup(heightLogger, rs, prs, ps, peer) - continue OUTER_LOOP - } - - // If height and round don't match, sleep. - if (rs.Height != prs.Height) || (rs.Round != prs.Round) { - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - continue OUTER_LOOP - } - - // By here, height and round match. - // Proposal block parts were already matched and sent if any were wanted. - // (These can match on hash so the round doesn't matter) - // Now consider sending other things, like the Proposal itself. - - // Send Proposal && ProposalPOL BitArray? - if rs.Proposal != nil && !prs.Proposal { - // Proposal: share the proposal metadata with peer. - { - msg := &tmcon.ProposalMessage{Proposal: rs.Proposal} - logger.Debug("Sending proposal", "height", prs.Height, "round", prs.Round) - if peer.Send(DataChannel, tmcon.MustEncode(msg)) { - // NOTE[ZM]: A peer might have received different proposal msg so this Proposal msg will be rejected! - ps.SetHasProposal(rs.Proposal) - } - } - // ProposalPOL: lets peer know which POL votes we have so far. - // Peer must receive ProposalMessage first. - // rs.Proposal was validated, so rs.Proposal.POLRound <= rs.Round, - // so we definitely have rs.Votes.Prevotes(rs.Proposal.POLRound). - if 0 <= rs.Proposal.POLRound { - msg := &tmcon.ProposalPOLMessage{ - Height: rs.Height, - ProposalPOLRound: rs.Proposal.POLRound, - ProposalPOL: rs.Votes.Prevotes(rs.Proposal.POLRound).BitArray(), - } - logger.Debug("Sending POL", "height", prs.Height, "round", prs.Round) - peer.Send(DataChannel, tmcon.MustEncode(msg)) - } - continue OUTER_LOOP - } - - // Nothing to do. Sleep. - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - continue OUTER_LOOP - } -} - -func (conR *Reactor) gossipDataForCatchup(logger log.Logger, rs *cstypes.RoundState, - prs *cstypes.PeerRoundState, ps *PeerState, peer p2p.Peer) { - - if index, ok := prs.ProposalBlockParts.Not().PickRandom(); ok { - // Ensure that the peer's PartSetHeader is correct - blockMeta := conR.conS.blockStore.LoadBlockMeta(prs.Height) - if blockMeta == nil { - logger.Error("Failed to load block meta", "ourHeight", rs.Height, - "blockstoreBase", conR.conS.blockStore.Base(), "blockstoreHeight", conR.conS.blockStore.Height()) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - return - } else if !blockMeta.BlockID.PartSetHeader.Equals(prs.ProposalBlockPartSetHeader) { - logger.Info("Peer ProposalBlockPartSetHeader mismatch, sleeping", - "blockPartSetHeader", blockMeta.BlockID.PartSetHeader, "peerBlockPartSetHeader", prs.ProposalBlockPartSetHeader) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - return - } - // Load the part - part := conR.conS.blockStore.LoadBlockPart(prs.Height, index) - if part == nil { - logger.Error("Could not load part", "index", index, - "blockPartSetHeader", blockMeta.BlockID.PartSetHeader, "peerBlockPartSetHeader", prs.ProposalBlockPartSetHeader) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - return - } - // Send the part - msg := &tmcon.BlockPartMessage{ - Height: prs.Height, // Not our height, so it doesn't matter. - Round: prs.Round, // Not our height, so it doesn't matter. - Part: part, - } - logger.Debug("Sending block part for catchup", "round", prs.Round, "index", index) - if peer.Send(DataChannel, tmcon.MustEncode(msg)) { - ps.SetHasProposalBlockPart(prs.Height, prs.Round, index) - } else { - logger.Debug("Sending block part for catchup failed") - } - return - } - time.Sleep(conR.conS.config.PeerGossipSleepDuration) -} - -func (conR *Reactor) gossipVotesRoutine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) - - // Simple hack to throttle logs upon sleep. - var sleeping = 0 - -OUTER_LOOP: - for { - // Manage disconnects from self or peer. - if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping gossipVotesRoutine for peer") - return - } - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - - switch sleeping { - case 1: // First sleep - sleeping = 2 - case 2: // No more sleep - sleeping = 0 - } - - // If height matches, then send LastCommit, Prevotes, Precommits. - if rs.Height == prs.Height { - heightLogger := logger.With("height", prs.Height) - if conR.gossipVotesForHeight(heightLogger, rs, prs, ps) { - continue OUTER_LOOP - } - } - - // Special catchup logic. - // If peer is lagging by height 1, send LastCommit. - if prs.Height != 0 && rs.Height == prs.Height+1 { - if ps.PickSendVote(rs.LastCommit) { - logger.Debug("Picked rs.LastCommit to send", "height", prs.Height) - continue OUTER_LOOP - } - } - - // Catchup logic - // If peer is lagging by more than 1, send Commit. - if prs.Height != 0 && rs.Height >= prs.Height+2 && prs.Height >= conR.conS.blockStore.Base() { - // Load the block commit for prs.Height, - // which contains precommit signatures for prs.Height. - if commit := conR.conS.blockStore.LoadBlockCommit(prs.Height); commit != nil { - if ps.PickSendVote(commit) { - logger.Debug("Picked Catchup commit to send", "height", prs.Height) - continue OUTER_LOOP - } - } - } - - if sleeping == 0 { - // We sent nothing. Sleep... - sleeping = 1 - logger.Debug("No votes to send, sleeping", "rs.Height", rs.Height, "prs.Height", prs.Height, - "localPV", rs.Votes.Prevotes(rs.Round).BitArray(), "peerPV", prs.Prevotes, - "localPC", rs.Votes.Precommits(rs.Round).BitArray(), "peerPC", prs.Precommits) - } else if sleeping == 2 { - // Continued sleep... - sleeping = 1 - } - - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - continue OUTER_LOOP - } -} - -func (conR *Reactor) gossipVotesForHeight( - logger log.Logger, - rs *cstypes.RoundState, - prs *cstypes.PeerRoundState, - ps *PeerState, -) bool { - - // If there are lastCommits to send... - if prs.Step == cstypes.RoundStepNewHeight { - if ps.PickSendVote(rs.LastCommit) { - logger.Debug("Picked rs.LastCommit to send") - return true - } - } - // If there are POL prevotes to send... - if prs.Step <= cstypes.RoundStepPropose && prs.Round != -1 && prs.Round <= rs.Round && prs.ProposalPOLRound != -1 { - if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil { - if ps.PickSendVote(polPrevotes) { - logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send", - "round", prs.ProposalPOLRound) - return true - } - } - } - // If there are prevotes to send... - if prs.Step <= cstypes.RoundStepPrevoteWait && prs.Round != -1 && prs.Round <= rs.Round { - if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) { - logger.Debug("Picked rs.Prevotes(prs.Round) to send", "round", prs.Round) - return true - } - } - // If there are precommits to send... - if prs.Step <= cstypes.RoundStepPrecommitWait && prs.Round != -1 && prs.Round <= rs.Round { - if ps.PickSendVote(rs.Votes.Precommits(prs.Round)) { - logger.Debug("Picked rs.Precommits(prs.Round) to send", "round", prs.Round) - return true - } - } - // If there are prevotes to send...Needed because of validBlock mechanism - if prs.Round != -1 && prs.Round <= rs.Round { - if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) { - logger.Debug("Picked rs.Prevotes(prs.Round) to send", "round", prs.Round) - return true - } - } - // If there are POLPrevotes to send... - if prs.ProposalPOLRound != -1 { - if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil { - if ps.PickSendVote(polPrevotes) { - logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send", - "round", prs.ProposalPOLRound) - return true - } - } - } - - return false -} - -// NOTE: `queryMaj23Routine` has a simple crude design since it only comes -// into play for liveness when there's a signature DDoS attack happening. -func (conR *Reactor) queryMaj23Routine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) - -OUTER_LOOP: - for { - // Manage disconnects from self or peer. - if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping queryMaj23Routine for peer") - return - } - - // Maybe send Height/Round/Prevotes - { - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - if rs.Height == prs.Height { - if maj23, ok := rs.Votes.Prevotes(prs.Round).TwoThirdsMajority(); ok { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: prs.Round, - Type: tmproto.PrevoteType, - BlockID: maj23, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - // Maybe send Height/Round/Precommits - { - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - if rs.Height == prs.Height { - if maj23, ok := rs.Votes.Precommits(prs.Round).TwoThirdsMajority(); ok { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: prs.Round, - Type: tmproto.PrecommitType, - BlockID: maj23, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - // Maybe send Height/Round/ProposalPOL - { - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - if rs.Height == prs.Height && prs.ProposalPOLRound >= 0 { - if maj23, ok := rs.Votes.Prevotes(prs.ProposalPOLRound).TwoThirdsMajority(); ok { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: prs.ProposalPOLRound, - Type: tmproto.PrevoteType, - BlockID: maj23, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - // Little point sending LastCommitRound/LastCommit, - // These are fleeting and non-blocking. - - // Maybe send Height/CatchupCommitRound/CatchupCommit. - { - prs := ps.GetRoundState() - if prs.CatchupCommitRound != -1 && prs.Height > 0 && prs.Height <= conR.conS.blockStore.Height() && - prs.Height >= conR.conS.blockStore.Base() { - if commit := conR.conS.LoadCommit(prs.Height); commit != nil { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: commit.Round, - Type: tmproto.PrecommitType, - BlockID: commit.BlockID, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - - continue OUTER_LOOP - } -} - -func (conR *Reactor) peerStatsRoutine() { - for { - if !conR.IsRunning() { - conR.Logger.Info("Stopping peerStatsRoutine") - return - } - - select { - case msg := <-conR.conS.statsMsgQueue: - // Get peer - peer := conR.Switch.Peers().Get(msg.PeerID) - if peer == nil { - conR.Logger.Debug("Attempt to update stats for non-existent peer", - "peer", msg.PeerID) - continue - } - // Get peer state - ps, ok := peer.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", peer)) - } - switch msg.Msg.(type) { - case *tmcon.VoteMessage: - if numVotes := ps.RecordVote(); numVotes%votesToContributeToBecomeGoodPeer == 0 { - conR.Switch.MarkPeerAsGood(peer) - } - case *tmcon.BlockPartMessage: - if numParts := ps.RecordBlockPart(); numParts%blocksToContributeToBecomeGoodPeer == 0 { - conR.Switch.MarkPeerAsGood(peer) - } - } - case <-conR.conS.Quit(): - return - - case <-conR.Quit(): - return - } - } -} - -// String returns a string representation of the Reactor. -// NOTE: For now, it is just a hard-coded string to avoid accessing unprotected shared variables. -// TODO: improve! -func (conR *Reactor) String() string { - // better not to access shared variables - return "ConsensusReactor" // conR.StringIndented("") -} - -// StringIndented returns an indented string representation of the Reactor -func (conR *Reactor) StringIndented(indent string) string { - s := "ConsensusReactor{\n" - s += indent + " " + conR.conS.StringIndented(indent+" ") + "\n" - for _, peer := range conR.Switch.Peers().List() { - ps, ok := peer.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", peer)) - } - s += indent + " " + ps.StringIndented(indent+" ") + "\n" - } - s += indent + "}" - return s -} - -// ReactorMetrics sets the metrics -func ReactorMetrics(metrics *tmcon.Metrics) ReactorOption { - return func(conR *Reactor) { conR.Metrics = metrics } -} - -//----------------------------------------------------------------------------- - -var ( - ErrPeerStateHeightRegression = errors.New("error peer state height regression") - ErrPeerStateInvalidStartTime = errors.New("error peer state invalid startTime") -) - -// PeerState contains the known state of a peer, including its connection and -// threadsafe access to its PeerRoundState. -// NOTE: THIS GETS DUMPED WITH rpc/core/consensus.go. -// Be mindful of what you Expose. -type PeerState struct { - peer p2p.Peer - logger log.Logger - - mtx sync.Mutex // NOTE: Modify below using setters, never directly. - PRS cstypes.PeerRoundState `json:"round_state"` // Exposed. - Stats *peerStateStats `json:"stats"` // Exposed. -} - -// peerStateStats holds internal statistics for a peer. -type peerStateStats struct { - Votes int `json:"votes"` - BlockParts int `json:"block_parts"` -} - -func (pss peerStateStats) String() string { - return fmt.Sprintf("peerStateStats{votes: %d, blockParts: %d}", - pss.Votes, pss.BlockParts) -} - -// NewPeerState returns a new PeerState for the given Peer -func NewPeerState(peer p2p.Peer) *PeerState { - return &PeerState{ - peer: peer, - logger: log.NewNopLogger(), - PRS: cstypes.PeerRoundState{ - Round: -1, - ProposalPOLRound: -1, - LastCommitRound: -1, - CatchupCommitRound: -1, - }, - Stats: &peerStateStats{}, - } -} - -// SetLogger allows to set a logger on the peer state. Returns the peer state -// itself. -func (ps *PeerState) SetLogger(logger log.Logger) *PeerState { - ps.logger = logger - return ps -} - -// GetRoundState returns an shallow copy of the PeerRoundState. -// There's no point in mutating it since it won't change PeerState. -func (ps *PeerState) GetRoundState() *cstypes.PeerRoundState { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - prs := ps.PRS // copy - return &prs -} - -// ToJSON returns a json of PeerState. -func (ps *PeerState) ToJSON() ([]byte, error) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - return tmjson.Marshal(ps) -} - -// GetHeight returns an atomic snapshot of the PeerRoundState's height -// used by the mempool to ensure peers are caught up before broadcasting new txs -func (ps *PeerState) GetHeight() int64 { - ps.mtx.Lock() - defer ps.mtx.Unlock() - return ps.PRS.Height -} - -// SetHasProposal sets the given proposal as known for the peer. -func (ps *PeerState) SetHasProposal(proposal *types.Proposal) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != proposal.Height || ps.PRS.Round != proposal.Round { - return - } - - if ps.PRS.Proposal { - return - } - - ps.PRS.Proposal = true - - // ps.PRS.ProposalBlockParts is set due to NewValidBlockMessage - if ps.PRS.ProposalBlockParts != nil { - return - } - - ps.PRS.ProposalBlockPartSetHeader = proposal.BlockID.PartSetHeader - ps.PRS.ProposalBlockParts = bits.NewBitArray(int(proposal.BlockID.PartSetHeader.Total)) - ps.PRS.ProposalPOLRound = proposal.POLRound - ps.PRS.ProposalPOL = nil // Nil until ProposalPOLMessage received. -} - -// InitProposalBlockParts initializes the peer's proposal block parts header and bit array. -func (ps *PeerState) InitProposalBlockParts(partSetHeader types.PartSetHeader) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.ProposalBlockParts != nil { - return - } - - ps.PRS.ProposalBlockPartSetHeader = partSetHeader - ps.PRS.ProposalBlockParts = bits.NewBitArray(int(partSetHeader.Total)) -} - -// SetHasProposalBlockPart sets the given block part index as known for the peer. -func (ps *PeerState) SetHasProposalBlockPart(height int64, round int32, index int) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != height || ps.PRS.Round != round { - return - } - - ps.PRS.ProposalBlockParts.SetIndex(index, true) -} - -// PickSendVote picks a vote and sends it to the peer. -// Returns true if vote was sent. -func (ps *PeerState) PickSendVote(votes types.VoteSetReader) bool { - if vote, ok := ps.PickVoteToSend(votes); ok { - msg := &tmcon.VoteMessage{Vote: vote} - ps.logger.Debug("Sending vote message", "ps", ps, "vote", vote) - if ps.peer.Send(VoteChannel, tmcon.MustEncode(msg)) { - ps.SetHasVote(vote) - return true - } - return false - } - return false -} - -// PickVoteToSend picks a vote to send to the peer. -// Returns true if a vote was picked. -// NOTE: `votes` must be the correct Size() for the Height(). -func (ps *PeerState) PickVoteToSend(votes types.VoteSetReader) (vote *types.Vote, ok bool) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if votes.Size() == 0 { - return nil, false - } - - height, round, votesType, size := - votes.GetHeight(), votes.GetRound(), tmproto.SignedMsgType(votes.Type()), votes.Size() - - // Lazily set data using 'votes'. - if votes.IsCommit() { - ps.ensureCatchupCommitRound(height, round, size) - } - ps.ensureVoteBitArrays(height, size) - - psVotes := ps.getVoteBitArray(height, round, votesType) - if psVotes == nil { - return nil, false // Not something worth sending - } - if index, ok := votes.BitArray().Sub(psVotes).PickRandom(); ok { - return votes.GetByIndex(int32(index)), true - } - return nil, false -} - -func (ps *PeerState) getVoteBitArray(height int64, round int32, votesType tmproto.SignedMsgType) *bits.BitArray { - if !types.IsVoteTypeValid(votesType) { - return nil - } - - if ps.PRS.Height == height { - if ps.PRS.Round == round { - switch votesType { - case tmproto.PrevoteType: - return ps.PRS.Prevotes - case tmproto.PrecommitType: - return ps.PRS.Precommits - } - } - if ps.PRS.CatchupCommitRound == round { - switch votesType { - case tmproto.PrevoteType: - return nil - case tmproto.PrecommitType: - return ps.PRS.CatchupCommit - } - } - if ps.PRS.ProposalPOLRound == round { - switch votesType { - case tmproto.PrevoteType: - return ps.PRS.ProposalPOL - case tmproto.PrecommitType: - return nil - } - } - return nil - } - if ps.PRS.Height == height+1 { - if ps.PRS.LastCommitRound == round { - switch votesType { - case tmproto.PrevoteType: - return nil - case tmproto.PrecommitType: - return ps.PRS.LastCommit - } - } - return nil - } - return nil -} - -// 'round': A round for which we have a +2/3 commit. -func (ps *PeerState) ensureCatchupCommitRound(height int64, round int32, numValidators int) { - if ps.PRS.Height != height { - return - } - /* - NOTE: This is wrong, 'round' could change. - e.g. if orig round is not the same as block LastCommit round. - if ps.CatchupCommitRound != -1 && ps.CatchupCommitRound != round { - panic(fmt.Sprintf( - "Conflicting CatchupCommitRound. Height: %v, - Orig: %v, - New: %v", - height, - ps.CatchupCommitRound, - round)) - } - */ - if ps.PRS.CatchupCommitRound == round { - return // Nothing to do! - } - ps.PRS.CatchupCommitRound = round - if round == ps.PRS.Round { - ps.PRS.CatchupCommit = ps.PRS.Precommits - } else { - ps.PRS.CatchupCommit = bits.NewBitArray(numValidators) - } -} - -// EnsureVoteBitArrays ensures the bit-arrays have been allocated for tracking -// what votes this peer has received. -// NOTE: It's important to make sure that numValidators actually matches -// what the node sees as the number of validators for height. -func (ps *PeerState) EnsureVoteBitArrays(height int64, numValidators int) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - ps.ensureVoteBitArrays(height, numValidators) -} - -func (ps *PeerState) ensureVoteBitArrays(height int64, numValidators int) { - if ps.PRS.Height == height { - if ps.PRS.Prevotes == nil { - ps.PRS.Prevotes = bits.NewBitArray(numValidators) - } - if ps.PRS.Precommits == nil { - ps.PRS.Precommits = bits.NewBitArray(numValidators) - } - if ps.PRS.CatchupCommit == nil { - ps.PRS.CatchupCommit = bits.NewBitArray(numValidators) - } - if ps.PRS.ProposalPOL == nil { - ps.PRS.ProposalPOL = bits.NewBitArray(numValidators) - } - } else if ps.PRS.Height == height+1 { - if ps.PRS.LastCommit == nil { - ps.PRS.LastCommit = bits.NewBitArray(numValidators) - } - } -} - -// RecordVote increments internal votes related statistics for this peer. -// It returns the total number of added votes. -func (ps *PeerState) RecordVote() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - ps.Stats.Votes++ - - return ps.Stats.Votes -} - -// VotesSent returns the number of blocks for which peer has been sending us -// votes. -func (ps *PeerState) VotesSent() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - return ps.Stats.Votes -} - -// RecordBlockPart increments internal block part related statistics for this peer. -// It returns the total number of added block parts. -func (ps *PeerState) RecordBlockPart() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - ps.Stats.BlockParts++ - return ps.Stats.BlockParts -} - -// BlockPartsSent returns the number of useful block parts the peer has sent us. -func (ps *PeerState) BlockPartsSent() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - return ps.Stats.BlockParts -} - -// SetHasVote sets the given vote as known by the peer -func (ps *PeerState) SetHasVote(vote *types.Vote) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - ps.setHasVote(vote.Height, vote.Round, vote.Type, vote.ValidatorIndex) -} - -func (ps *PeerState) setHasVote(height int64, round int32, voteType tmproto.SignedMsgType, index int32) { - logger := ps.logger.With( - "peerH/R", - fmt.Sprintf("%d/%d", ps.PRS.Height, ps.PRS.Round), - "H/R", - fmt.Sprintf("%d/%d", height, round)) - logger.Debug("setHasVote", "type", voteType, "index", index) - - // NOTE: some may be nil BitArrays -> no side effects. - psVotes := ps.getVoteBitArray(height, round, voteType) - if psVotes != nil { - psVotes.SetIndex(int(index), true) - } -} - -// ApplyNewRoundStepMessage updates the peer state for the new round. -func (ps *PeerState) ApplyNewRoundStepMessage(msg *tmcon.NewRoundStepMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - // Ignore duplicates or decreases - if CompareHRS(msg.Height, msg.Round, msg.Step, ps.PRS.Height, ps.PRS.Round, ps.PRS.Step) <= 0 { - return - } - - // Just remember these values. - psHeight := ps.PRS.Height - psRound := ps.PRS.Round - psCatchupCommitRound := ps.PRS.CatchupCommitRound - psCatchupCommit := ps.PRS.CatchupCommit - - startTime := tmtime.Now().Add(-1 * time.Duration(msg.SecondsSinceStartTime) * time.Second) - ps.PRS.Height = msg.Height - ps.PRS.Round = msg.Round - ps.PRS.Step = msg.Step - ps.PRS.StartTime = startTime - if psHeight != msg.Height || psRound != msg.Round { - ps.PRS.Proposal = false - ps.PRS.ProposalBlockPartSetHeader = types.PartSetHeader{} - ps.PRS.ProposalBlockParts = nil - ps.PRS.ProposalPOLRound = -1 - ps.PRS.ProposalPOL = nil - // We'll update the BitArray capacity later. - ps.PRS.Prevotes = nil - ps.PRS.Precommits = nil - } - if psHeight == msg.Height && psRound != msg.Round && msg.Round == psCatchupCommitRound { - // Peer caught up to CatchupCommitRound. - // Preserve psCatchupCommit! - // NOTE: We prefer to use prs.Precommits if - // pr.Round matches pr.CatchupCommitRound. - ps.PRS.Precommits = psCatchupCommit - } - if psHeight != msg.Height { - // Shift Precommits to LastCommit. - if psHeight+1 == msg.Height && psRound == msg.LastCommitRound { - ps.PRS.LastCommitRound = msg.LastCommitRound - ps.PRS.LastCommit = ps.PRS.Precommits - } else { - ps.PRS.LastCommitRound = msg.LastCommitRound - ps.PRS.LastCommit = nil - } - // We'll update the BitArray capacity later. - ps.PRS.CatchupCommitRound = -1 - ps.PRS.CatchupCommit = nil - } -} - -// ApplyNewValidBlockMessage updates the peer state for the new valid block. -func (ps *PeerState) ApplyNewValidBlockMessage(msg *tmcon.NewValidBlockMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != msg.Height { - return - } - - if ps.PRS.Round != msg.Round && !msg.IsCommit { - return - } - - ps.PRS.ProposalBlockPartSetHeader = msg.BlockPartSetHeader - ps.PRS.ProposalBlockParts = msg.BlockParts -} - -// ApplyProposalPOLMessage updates the peer state for the new proposal POL. -func (ps *PeerState) ApplyProposalPOLMessage(msg *tmcon.ProposalPOLMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != msg.Height { - return - } - if ps.PRS.ProposalPOLRound != msg.ProposalPOLRound { - return - } - - // TODO: Merge onto existing ps.PRS.ProposalPOL? - // We might have sent some prevotes in the meantime. - ps.PRS.ProposalPOL = msg.ProposalPOL -} - -// ApplyHasVoteMessage updates the peer state for the new vote. -func (ps *PeerState) ApplyHasVoteMessage(msg *tmcon.HasVoteMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != msg.Height { - return - } - - ps.setHasVote(msg.Height, msg.Round, msg.Type, msg.Index) -} - -// ApplyVoteSetBitsMessage updates the peer state for the bit-array of votes -// it claims to have for the corresponding BlockID. -// `ourVotes` is a BitArray of votes we have for msg.BlockID -// NOTE: if ourVotes is nil (e.g. msg.Height < rs.Height), -// we conservatively overwrite ps's votes w/ msg.Votes. -func (ps *PeerState) ApplyVoteSetBitsMessage(msg *tmcon.VoteSetBitsMessage, ourVotes *bits.BitArray) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - votes := ps.getVoteBitArray(msg.Height, msg.Round, msg.Type) - if votes != nil { - if ourVotes == nil { - votes.Update(msg.Votes) - } else { - otherVotes := votes.Sub(ourVotes) - hasVotes := otherVotes.Or(msg.Votes) - votes.Update(hasVotes) - } - } -} - -// String returns a string representation of the PeerState -func (ps *PeerState) String() string { - return ps.StringIndented("") -} - -// StringIndented returns a string representation of the PeerState -func (ps *PeerState) StringIndented(indent string) string { - ps.mtx.Lock() - defer ps.mtx.Unlock() - return fmt.Sprintf(`PeerState{ -%s Key %v -%s RoundState %v -%s Stats %v -%s}`, - indent, ps.peer.ID(), - indent, ps.PRS.StringIndented(indent+" "), - indent, ps.Stats, - indent) -} - -//----------------------------------------------------------------------------- - -// func init() { -// tmjson.RegisterType(&NewRoundStepMessage{}, "tendermint/NewRoundStepMessage") -// tmjson.RegisterType(&NewValidBlockMessage{}, "tendermint/NewValidBlockMessage") -// tmjson.RegisterType(&ProposalMessage{}, "tendermint/Proposal") -// tmjson.RegisterType(&ProposalPOLMessage{}, "tendermint/ProposalPOL") -// tmjson.RegisterType(&BlockPartMessage{}, "tendermint/BlockPart") -// tmjson.RegisterType(&VoteMessage{}, "tendermint/Vote") -// tmjson.RegisterType(&HasVoteMessage{}, "tendermint/HasVote") -// tmjson.RegisterType(&VoteSetMaj23Message{}, "tendermint/VoteSetMaj23") -// tmjson.RegisterType(&VoteSetBitsMessage{}, "tendermint/VoteSetBits") -// } - -func decodeMsg(bz []byte) (msg tmcon.Message, err error) { - pb := &tmcons.Message{} - if err = proto.Unmarshal(bz, pb); err != nil { - return msg, err - } - - return tmcon.MsgFromProto(pb) -} diff --git a/test/maverick/consensus/replay.go b/test/maverick/consensus/replay.go deleted file mode 100644 index 6b8bf06c1..000000000 --- a/test/maverick/consensus/replay.go +++ /dev/null @@ -1,534 +0,0 @@ -package consensus - -import ( - "bytes" - "fmt" - "hash/crc32" - "io" - "reflect" - "time" - - abci "github.com/tendermint/tendermint/abci/types" - tmcon "github.com/tendermint/tendermint/consensus" - "github.com/tendermint/tendermint/crypto/merkle" - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/proxy" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" -) - -var crc32c = crc32.MakeTable(crc32.Castagnoli) - -// Functionality to replay blocks and messages on recovery from a crash. -// There are two general failure scenarios: -// -// 1. failure during consensus -// 2. failure while applying the block -// -// The former is handled by the WAL, the latter by the proxyApp Handshake on -// restart, which ultimately hands off the work to the WAL. - -//----------------------------------------- -// 1. Recover from failure during consensus -// (by replaying messages from the WAL) -//----------------------------------------- - -// Unmarshal and apply a single message to the consensus state as if it were -// received in receiveRoutine. Lines that start with "#" are ignored. -// NOTE: receiveRoutine should not be running. -func (cs *State) readReplayMessage(msg *tmcon.TimedWALMessage, newStepSub types.Subscription) error { - // Skip meta messages which exist for demarcating boundaries. - if _, ok := msg.Msg.(tmcon.EndHeightMessage); ok { - return nil - } - - // for logging - switch m := msg.Msg.(type) { - case types.EventDataRoundState: - cs.Logger.Info("Replay: New Step", "height", m.Height, "round", m.Round, "step", m.Step) - // these are playback checks - ticker := time.After(time.Second * 2) - if newStepSub != nil { - select { - case stepMsg := <-newStepSub.Out(): - m2 := stepMsg.Data().(types.EventDataRoundState) - if m.Height != m2.Height || m.Round != m2.Round || m.Step != m2.Step { - return fmt.Errorf("roundState mismatch. Got %v; Expected %v", m2, m) - } - case <-newStepSub.Cancelled(): - return fmt.Errorf("failed to read off newStepSub.Out(). newStepSub was canceled") - case <-ticker: - return fmt.Errorf("failed to read off newStepSub.Out()") - } - } - case msgInfo: - peerID := m.PeerID - if peerID == "" { - peerID = "local" - } - switch msg := m.Msg.(type) { - case *tmcon.ProposalMessage: - p := msg.Proposal - cs.Logger.Info("Replay: Proposal", "height", p.Height, "round", p.Round, "header", - p.BlockID.PartSetHeader, "pol", p.POLRound, "peer", peerID) - case *tmcon.BlockPartMessage: - cs.Logger.Info("Replay: BlockPart", "height", msg.Height, "round", msg.Round, "peer", peerID) - case *tmcon.VoteMessage: - v := msg.Vote - cs.Logger.Info("Replay: Vote", "height", v.Height, "round", v.Round, "type", v.Type, - "blockID", v.BlockID, "peer", peerID) - } - - cs.handleMsg(m) - case timeoutInfo: - cs.Logger.Info("Replay: Timeout", "height", m.Height, "round", m.Round, "step", m.Step, "dur", m.Duration) - cs.handleTimeout(m, cs.RoundState) - default: - return fmt.Errorf("replay: Unknown TimedWALMessage type: %v", reflect.TypeOf(msg.Msg)) - } - return nil -} - -// Replay only those messages since the last block. `timeoutRoutine` should -// run concurrently to read off tickChan. -func (cs *State) catchupReplay(csHeight int64) error { - - // Set replayMode to true so we don't log signing errors. - cs.replayMode = true - defer func() { cs.replayMode = false }() - - // Ensure that #ENDHEIGHT for this height doesn't exist. - // NOTE: This is just a sanity check. As far as we know things work fine - // without it, and Handshake could reuse State if it weren't for - // this check (since we can crash after writing #ENDHEIGHT). - // - // Ignore data corruption errors since this is a sanity check. - gr, found, err := cs.wal.SearchForEndHeight(csHeight, &tmcon.WALSearchOptions{IgnoreDataCorruptionErrors: true}) - if err != nil { - return err - } - if gr != nil { - if err := gr.Close(); err != nil { - return err - } - } - if found { - return fmt.Errorf("wal should not contain #ENDHEIGHT %d", csHeight) - } - - // Search for last height marker. - // - // Ignore data corruption errors in previous heights because we only care about last height - if csHeight < cs.state.InitialHeight { - return fmt.Errorf("cannot replay height %v, below initial height %v", csHeight, cs.state.InitialHeight) - } - endHeight := csHeight - 1 - if csHeight == cs.state.InitialHeight { - endHeight = 0 - } - gr, found, err = cs.wal.SearchForEndHeight(endHeight, &tmcon.WALSearchOptions{IgnoreDataCorruptionErrors: true}) - if err == io.EOF { - cs.Logger.Error("Replay: wal.group.Search returned EOF", "#ENDHEIGHT", endHeight) - } else if err != nil { - return err - } - if !found { - return fmt.Errorf("cannot replay height %d. WAL does not contain #ENDHEIGHT for %d", csHeight, endHeight) - } - defer gr.Close() - - cs.Logger.Info("Catchup by replaying consensus messages", "height", csHeight) - - var msg *tmcon.TimedWALMessage - dec := WALDecoder{gr} - -LOOP: - for { - msg, err = dec.Decode() - switch { - case err == io.EOF: - break LOOP - case IsDataCorruptionError(err): - cs.Logger.Error("data has been corrupted in last height of consensus WAL", "err", err, "height", csHeight) - return err - case err != nil: - return err - } - - // NOTE: since the priv key is set when the msgs are received - // it will attempt to eg double sign but we can just ignore it - // since the votes will be replayed and we'll get to the next step - if err := cs.readReplayMessage(msg, nil); err != nil { - return err - } - } - cs.Logger.Info("Replay: Done") - return nil -} - -//-------------------------------------------------------------------------------- - -// Parses marker lines of the form: -// #ENDHEIGHT: 12345 -/* -func makeHeightSearchFunc(height int64) auto.SearchFunc { - return func(line string) (int, error) { - line = strings.TrimRight(line, "\n") - parts := strings.Split(line, " ") - if len(parts) != 2 { - return -1, errors.New("line did not have 2 parts") - } - i, err := strconv.Atoi(parts[1]) - if err != nil { - return -1, errors.New("failed to parse INFO: " + err.Error()) - } - if height < i { - return 1, nil - } else if height == i { - return 0, nil - } else { - return -1, nil - } - } -}*/ - -//--------------------------------------------------- -// 2. Recover from failure while applying the block. -// (by handshaking with the app to figure out where -// we were last, and using the WAL to recover there.) -//--------------------------------------------------- - -type Handshaker struct { - stateStore sm.Store - initialState sm.State - store sm.BlockStore - eventBus types.BlockEventPublisher - genDoc *types.GenesisDoc - logger log.Logger - - nBlocks int // number of blocks applied to the state -} - -func NewHandshaker(stateStore sm.Store, state sm.State, - store sm.BlockStore, genDoc *types.GenesisDoc) *Handshaker { - - return &Handshaker{ - stateStore: stateStore, - initialState: state, - store: store, - eventBus: types.NopEventBus{}, - genDoc: genDoc, - logger: log.NewNopLogger(), - nBlocks: 0, - } -} - -func (h *Handshaker) SetLogger(l log.Logger) { - h.logger = l -} - -// SetEventBus - sets the event bus for publishing block related events. -// If not called, it defaults to types.NopEventBus. -func (h *Handshaker) SetEventBus(eventBus types.BlockEventPublisher) { - h.eventBus = eventBus -} - -// NBlocks returns the number of blocks applied to the state. -func (h *Handshaker) NBlocks() int { - return h.nBlocks -} - -// TODO: retry the handshake/replay if it fails ? -func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { - - // Handshake is done via ABCI Info on the query conn. - res, err := proxyApp.Query().InfoSync(proxy.RequestInfo) - if err != nil { - return fmt.Errorf("error calling Info: %v", err) - } - - blockHeight := res.LastBlockHeight - if blockHeight < 0 { - return fmt.Errorf("got a negative last block height (%d) from the app", blockHeight) - } - appHash := res.LastBlockAppHash - - h.logger.Info("ABCI Handshake App Info", - "height", blockHeight, - "hash", appHash, - "software-version", res.Version, - "protocol-version", res.AppVersion, - ) - - // Only set the version if there is no existing state. - if h.initialState.LastBlockHeight == 0 { - h.initialState.Version.Consensus.App = res.AppVersion - } - - // Replay blocks up to the latest in the blockstore. - _, err = h.ReplayBlocks(h.initialState, appHash, blockHeight, proxyApp) - if err != nil { - return fmt.Errorf("error on replay: %v", err) - } - - h.logger.Info("Completed ABCI Handshake - Tendermint and App are synced", - "appHeight", blockHeight, "appHash", appHash) - - // TODO: (on restart) replay mempool - - return nil -} - -// ReplayBlocks replays all blocks since appBlockHeight and ensures the result -// matches the current state. -// Returns the final AppHash or an error. -func (h *Handshaker) ReplayBlocks( - state sm.State, - appHash []byte, - appBlockHeight int64, - proxyApp proxy.AppConns, -) ([]byte, error) { - storeBlockBase := h.store.Base() - storeBlockHeight := h.store.Height() - stateBlockHeight := state.LastBlockHeight - h.logger.Info( - "ABCI Replay Blocks", - "appHeight", - appBlockHeight, - "storeHeight", - storeBlockHeight, - "stateHeight", - stateBlockHeight) - - // If appBlockHeight == 0 it means that we are at genesis and hence should send InitChain. - if appBlockHeight == 0 { - validators := make([]*types.Validator, len(h.genDoc.Validators)) - for i, val := range h.genDoc.Validators { - validators[i] = types.NewValidator(val.PubKey, val.Power) - } - validatorSet := types.NewValidatorSet(validators) - nextVals := types.TM2PB.ValidatorUpdates(validatorSet) - csParams := types.TM2PB.ConsensusParams(h.genDoc.ConsensusParams) - req := abci.RequestInitChain{ - Time: h.genDoc.GenesisTime, - ChainId: h.genDoc.ChainID, - InitialHeight: h.genDoc.InitialHeight, - ConsensusParams: csParams, - Validators: nextVals, - AppStateBytes: h.genDoc.AppState, - } - res, err := proxyApp.Consensus().InitChainSync(req) - if err != nil { - return nil, err - } - - appHash = res.AppHash - - if stateBlockHeight == 0 { // we only update state when we are in initial state - // If the app did not return an app hash, we keep the one set from the genesis doc in - // the state. We don't set appHash since we don't want the genesis doc app hash - // recorded in the genesis block. We should probably just remove GenesisDoc.AppHash. - if len(res.AppHash) > 0 { - state.AppHash = res.AppHash - } - // If the app returned validators or consensus params, update the state. - if len(res.Validators) > 0 { - vals, err := types.PB2TM.ValidatorUpdates(res.Validators) - if err != nil { - return nil, err - } - state.Validators = types.NewValidatorSet(vals) - state.NextValidators = types.NewValidatorSet(vals).CopyIncrementProposerPriority(1) - } else if len(h.genDoc.Validators) == 0 { - // If validator set is not set in genesis and still empty after InitChain, exit. - return nil, fmt.Errorf("validator set is nil in genesis and still empty after InitChain") - } - - if res.ConsensusParams != nil { - state.ConsensusParams = types.UpdateConsensusParams(state.ConsensusParams, res.ConsensusParams) - state.Version.Consensus.App = state.ConsensusParams.Version.AppVersion - } - // We update the last results hash with the empty hash, to conform with RFC-6962. - state.LastResultsHash = merkle.HashFromByteSlices(nil) - if err := h.stateStore.Save(state); err != nil { - return nil, err - } - } - } - - // First handle edge cases and constraints on the storeBlockHeight and storeBlockBase. - switch { - case storeBlockHeight == 0: - assertAppHashEqualsOneFromState(appHash, state) - return appHash, nil - - case appBlockHeight == 0 && state.InitialHeight < storeBlockBase: - // the app has no state, and the block store is truncated above the initial height - return appHash, sm.ErrAppBlockHeightTooLow{AppHeight: appBlockHeight, StoreBase: storeBlockBase} - - case appBlockHeight > 0 && appBlockHeight < storeBlockBase-1: - // the app is too far behind truncated store (can be 1 behind since we replay the next) - return appHash, sm.ErrAppBlockHeightTooLow{AppHeight: appBlockHeight, StoreBase: storeBlockBase} - - case storeBlockHeight < appBlockHeight: - // the app should never be ahead of the store (but this is under app's control) - return appHash, sm.ErrAppBlockHeightTooHigh{CoreHeight: storeBlockHeight, AppHeight: appBlockHeight} - - case storeBlockHeight < stateBlockHeight: - // the state should never be ahead of the store (this is under tendermint's control) - panic(fmt.Sprintf("StateBlockHeight (%d) > StoreBlockHeight (%d)", stateBlockHeight, storeBlockHeight)) - - case storeBlockHeight > stateBlockHeight+1: - // store should be at most one ahead of the state (this is under tendermint's control) - panic(fmt.Sprintf("StoreBlockHeight (%d) > StateBlockHeight + 1 (%d)", storeBlockHeight, stateBlockHeight+1)) - } - - var err error - // Now either store is equal to state, or one ahead. - // For each, consider all cases of where the app could be, given app <= store - if storeBlockHeight == stateBlockHeight { - // Tendermint ran Commit and saved the state. - // Either the app is asking for replay, or we're all synced up. - if appBlockHeight < storeBlockHeight { - // the app is behind, so replay blocks, but no need to go through WAL (state is already synced to store) - return h.replayBlocks(state, proxyApp, appBlockHeight, storeBlockHeight, false) - - } else if appBlockHeight == storeBlockHeight { - // We're good! - assertAppHashEqualsOneFromState(appHash, state) - return appHash, nil - } - - } else if storeBlockHeight == stateBlockHeight+1 { - // We saved the block in the store but haven't updated the state, - // so we'll need to replay a block using the WAL. - switch { - case appBlockHeight < stateBlockHeight: - // the app is further behind than it should be, so replay blocks - // but leave the last block to go through the WAL - return h.replayBlocks(state, proxyApp, appBlockHeight, storeBlockHeight, true) - - case appBlockHeight == stateBlockHeight: - // We haven't run Commit (both the state and app are one block behind), - // so replayBlock with the real app. - // NOTE: We could instead use the cs.WAL on cs.Start, - // but we'd have to allow the WAL to replay a block that wrote it's #ENDHEIGHT - h.logger.Info("Replay last block using real app") - state, err = h.replayBlock(state, storeBlockHeight, proxyApp.Consensus()) - return state.AppHash, err - - case appBlockHeight == storeBlockHeight: - // We ran Commit, but didn't save the state, so replayBlock with mock app. - abciResponses, err := h.stateStore.LoadABCIResponses(storeBlockHeight) - if err != nil { - return nil, err - } - mockApp := newMockProxyApp(appHash, abciResponses) - h.logger.Info("Replay last block using mock app") - state, err = h.replayBlock(state, storeBlockHeight, mockApp) - return state.AppHash, err - } - - } - - panic(fmt.Sprintf("uncovered case! appHeight: %d, storeHeight: %d, stateHeight: %d", - appBlockHeight, storeBlockHeight, stateBlockHeight)) -} - -func (h *Handshaker) replayBlocks( - state sm.State, - proxyApp proxy.AppConns, - appBlockHeight, - storeBlockHeight int64, - mutateState bool) ([]byte, error) { - // App is further behind than it should be, so we need to replay blocks. - // We replay all blocks from appBlockHeight+1. - // - // Note that we don't have an old version of the state, - // so we by-pass state validation/mutation using sm.ExecCommitBlock. - // This also means we won't be saving validator sets if they change during this period. - // TODO: Load the historical information to fix this and just use state.ApplyBlock - // - // If mutateState == true, the final block is replayed with h.replayBlock() - - var appHash []byte - var err error - finalBlock := storeBlockHeight - if mutateState { - finalBlock-- - } - firstBlock := appBlockHeight + 1 - if firstBlock == 1 { - firstBlock = state.InitialHeight - } - for i := firstBlock; i <= finalBlock; i++ { - h.logger.Info("Applying block", "height", i) - block := h.store.LoadBlock(i) - // Extra check to ensure the app was not changed in a way it shouldn't have. - if len(appHash) > 0 { - assertAppHashEqualsOneFromBlock(appHash, block) - } - - appHash, err = sm.ExecCommitBlock(proxyApp.Consensus(), block, h.logger, h.stateStore, h.genDoc.InitialHeight) - if err != nil { - return nil, err - } - - h.nBlocks++ - } - - if mutateState { - // sync the final block - state, err = h.replayBlock(state, storeBlockHeight, proxyApp.Consensus()) - if err != nil { - return nil, err - } - appHash = state.AppHash - } - - assertAppHashEqualsOneFromState(appHash, state) - return appHash, nil -} - -// ApplyBlock on the proxyApp with the last block. -func (h *Handshaker) replayBlock(state sm.State, height int64, proxyApp proxy.AppConnConsensus) (sm.State, error) { - block := h.store.LoadBlock(height) - meta := h.store.LoadBlockMeta(height) - - // Use stubs for both mempool and evidence pool since no transactions nor - // evidence are needed here - block already exists. - blockExec := sm.NewBlockExecutor(h.stateStore, h.logger, proxyApp, emptyMempool{}, sm.EmptyEvidencePool{}) - blockExec.SetEventBus(h.eventBus) - - var err error - state, _, err = blockExec.ApplyBlock(state, meta.BlockID, block) - if err != nil { - return sm.State{}, err - } - - h.nBlocks++ - - return state, nil -} - -func assertAppHashEqualsOneFromBlock(appHash []byte, block *types.Block) { - if !bytes.Equal(appHash, block.AppHash) { - panic(fmt.Sprintf(`block.AppHash does not match AppHash after replay. Got %X, expected %X. - -Block: %v -`, - appHash, block.AppHash, block)) - } -} - -func assertAppHashEqualsOneFromState(appHash []byte, state sm.State) { - if !bytes.Equal(appHash, state.AppHash) { - panic(fmt.Sprintf(`state.AppHash does not match AppHash after replay. Got -%X, expected %X. - -State: %v - -Did you reset Tendermint without resetting your application's data?`, - appHash, state.AppHash, state)) - } -} diff --git a/test/maverick/consensus/replay_file.go b/test/maverick/consensus/replay_file.go deleted file mode 100644 index 2dbc5cf37..000000000 --- a/test/maverick/consensus/replay_file.go +++ /dev/null @@ -1,339 +0,0 @@ -package consensus - -import ( - "bufio" - "context" - "errors" - "fmt" - "io" - "os" - "strconv" - "strings" - - dbm "github.com/tendermint/tm-db" - - cfg "github.com/tendermint/tendermint/config" - tmcon "github.com/tendermint/tendermint/consensus" - "github.com/tendermint/tendermint/libs/log" - tmos "github.com/tendermint/tendermint/libs/os" - "github.com/tendermint/tendermint/proxy" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/store" - "github.com/tendermint/tendermint/types" -) - -const ( - // event bus subscriber - subscriber = "replay-file" -) - -//-------------------------------------------------------- -// replay messages interactively or all at once - -// replay the wal file -func RunReplayFile(config cfg.BaseConfig, csConfig *cfg.ConsensusConfig, console bool) { - consensusState := newConsensusStateForReplay(config, csConfig) - - if err := consensusState.ReplayFile(csConfig.WalFile(), console); err != nil { - tmos.Exit(fmt.Sprintf("Error during consensus replay: %v", err)) - } -} - -// Replay msgs in file or start the console -func (cs *State) ReplayFile(file string, console bool) error { - - if cs.IsRunning() { - return errors.New("cs is already running, cannot replay") - } - if cs.wal != nil { - return errors.New("cs wal is open, cannot replay") - } - - cs.startForReplay() - - // ensure all new step events are regenerated as expected - - ctx := context.Background() - newStepSub, err := cs.eventBus.Subscribe(ctx, subscriber, types.EventQueryNewRoundStep) - if err != nil { - return fmt.Errorf("failed to subscribe %s to %v", subscriber, types.EventQueryNewRoundStep) - } - defer func() { - if err := cs.eventBus.Unsubscribe(ctx, subscriber, types.EventQueryNewRoundStep); err != nil { - cs.Logger.Error("Error unsubscribing to event bus", "err", err) - } - }() - - // just open the file for reading, no need to use wal - fp, err := os.OpenFile(file, os.O_RDONLY, 0600) - if err != nil { - return err - } - - pb := newPlayback(file, fp, cs, cs.state.Copy()) - defer pb.fp.Close() - - var nextN int // apply N msgs in a row - var msg *tmcon.TimedWALMessage - for { - if nextN == 0 && console { - nextN = pb.replayConsoleLoop() - } - - msg, err = pb.dec.Decode() - if err == io.EOF { - return nil - } else if err != nil { - return err - } - - if err := pb.cs.readReplayMessage(msg, newStepSub); err != nil { - return err - } - - if nextN > 0 { - nextN-- - } - pb.count++ - } -} - -//------------------------------------------------ -// playback manager - -type playback struct { - cs *State - - fp *os.File - dec *WALDecoder - count int // how many lines/msgs into the file are we - - // replays can be reset to beginning - fileName string // so we can close/reopen the file - genesisState sm.State // so the replay session knows where to restart from -} - -func newPlayback(fileName string, fp *os.File, cs *State, genState sm.State) *playback { - return &playback{ - cs: cs, - fp: fp, - fileName: fileName, - genesisState: genState, - dec: NewWALDecoder(fp), - } -} - -// go back count steps by resetting the state and running (pb.count - count) steps -func (pb *playback) replayReset(count int, newStepSub types.Subscription) error { - if err := pb.cs.Stop(); err != nil { - return err - } - pb.cs.Wait() - - newCS := NewState(pb.cs.config, pb.genesisState.Copy(), pb.cs.blockExec, - pb.cs.blockStore, pb.cs.txNotifier, pb.cs.evpool, map[int64]Misbehavior{}) - newCS.SetEventBus(pb.cs.eventBus) - newCS.startForReplay() - - if err := pb.fp.Close(); err != nil { - return err - } - fp, err := os.OpenFile(pb.fileName, os.O_RDONLY, 0600) - if err != nil { - return err - } - pb.fp = fp - pb.dec = NewWALDecoder(fp) - count = pb.count - count - fmt.Printf("Reseting from %d to %d\n", pb.count, count) - pb.count = 0 - pb.cs = newCS - var msg *tmcon.TimedWALMessage - for i := 0; i < count; i++ { - msg, err = pb.dec.Decode() - if err == io.EOF { - return nil - } else if err != nil { - return err - } - if err := pb.cs.readReplayMessage(msg, newStepSub); err != nil { - return err - } - pb.count++ - } - return nil -} - -func (cs *State) startForReplay() { - cs.Logger.Error("Replay commands are disabled until someone updates them and writes tests") - /* TODO:! - // since we replay tocks we just ignore ticks - go func() { - for { - select { - case <-cs.tickChan: - case <-cs.Quit: - return - } - } - }()*/ -} - -// console function for parsing input and running commands -func (pb *playback) replayConsoleLoop() int { - for { - fmt.Printf("> ") - bufReader := bufio.NewReader(os.Stdin) - line, more, err := bufReader.ReadLine() - if more { - tmos.Exit("input is too long") - } else if err != nil { - tmos.Exit(err.Error()) - } - - tokens := strings.Split(string(line), " ") - if len(tokens) == 0 { - continue - } - - switch tokens[0] { - case "next": - // "next" -> replay next message - // "next N" -> replay next N messages - - if len(tokens) == 1 { - return 0 - } - i, err := strconv.Atoi(tokens[1]) - if err != nil { - fmt.Println("next takes an integer argument") - } else { - return i - } - - case "back": - // "back" -> go back one message - // "back N" -> go back N messages - - // NOTE: "back" is not supported in the state machine design, - // so we restart and replay up to - - ctx := context.Background() - // ensure all new step events are regenerated as expected - - newStepSub, err := pb.cs.eventBus.Subscribe(ctx, subscriber, types.EventQueryNewRoundStep) - if err != nil { - tmos.Exit(fmt.Sprintf("failed to subscribe %s to %v", subscriber, types.EventQueryNewRoundStep)) - } - defer func() { - if err := pb.cs.eventBus.Unsubscribe(ctx, subscriber, types.EventQueryNewRoundStep); err != nil { - pb.cs.Logger.Error("Error unsubscribing from eventBus", "err", err) - } - }() - - if len(tokens) == 1 { - if err := pb.replayReset(1, newStepSub); err != nil { - pb.cs.Logger.Error("Replay reset error", "err", err) - } - } else { - i, err := strconv.Atoi(tokens[1]) - if err != nil { - fmt.Println("back takes an integer argument") - } else if i > pb.count { - fmt.Printf("argument to back must not be larger than the current count (%d)\n", pb.count) - } else if err := pb.replayReset(i, newStepSub); err != nil { - pb.cs.Logger.Error("Replay reset error", "err", err) - } - } - - case "rs": - // "rs" -> print entire round state - // "rs short" -> print height/round/step - // "rs " -> print another field of the round state - - rs := pb.cs.RoundState - if len(tokens) == 1 { - fmt.Println(rs) - } else { - switch tokens[1] { - case "short": - fmt.Printf("%v/%v/%v\n", rs.Height, rs.Round, rs.Step) - case "validators": - fmt.Println(rs.Validators) - case "proposal": - fmt.Println(rs.Proposal) - case "proposal_block": - fmt.Printf("%v %v\n", rs.ProposalBlockParts.StringShort(), rs.ProposalBlock.StringShort()) - case "locked_round": - fmt.Println(rs.LockedRound) - case "locked_block": - fmt.Printf("%v %v\n", rs.LockedBlockParts.StringShort(), rs.LockedBlock.StringShort()) - case "votes": - fmt.Println(rs.Votes.StringIndented(" ")) - - default: - fmt.Println("Unknown option", tokens[1]) - } - } - case "n": - fmt.Println(pb.count) - } - } -} - -//-------------------------------------------------------------------------------- - -// convenience for replay mode -func newConsensusStateForReplay(config cfg.BaseConfig, csConfig *cfg.ConsensusConfig) *State { - dbType := dbm.BackendType(config.DBBackend) - // Get BlockStore - blockStoreDB, err := dbm.NewDB("blockstore", dbType, config.DBDir()) - if err != nil { - tmos.Exit(err.Error()) - } - blockStore := store.NewBlockStore(blockStoreDB) - - // Get State - stateDB, err := dbm.NewDB("state", dbType, config.DBDir()) - if err != nil { - tmos.Exit(err.Error()) - } - stateStore := sm.NewStore(stateDB) - gdoc, err := sm.MakeGenesisDocFromFile(config.GenesisFile()) - if err != nil { - tmos.Exit(err.Error()) - } - state, err := sm.MakeGenesisState(gdoc) - if err != nil { - tmos.Exit(err.Error()) - } - - // Create proxyAppConn connection (consensus, mempool, query) - clientCreator := proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()) - proxyApp := proxy.NewAppConns(clientCreator) - err = proxyApp.Start() - if err != nil { - tmos.Exit(fmt.Sprintf("Error starting proxy app conns: %v", err)) - } - - eventBus := types.NewEventBus() - if err := eventBus.Start(); err != nil { - tmos.Exit(fmt.Sprintf("Failed to start event bus: %v", err)) - } - - handshaker := NewHandshaker(stateStore, state, blockStore, gdoc) - handshaker.SetEventBus(eventBus) - err = handshaker.Handshake(proxyApp) - if err != nil { - tmos.Exit(fmt.Sprintf("Error on handshake: %v", err)) - } - - mempool, evpool := emptyMempool{}, sm.EmptyEvidencePool{} - blockExec := sm.NewBlockExecutor(stateStore, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) - - consensusState := NewState(csConfig, state.Copy(), blockExec, - blockStore, mempool, evpool, map[int64]Misbehavior{}) - - consensusState.SetEventBus(eventBus) - return consensusState -} diff --git a/test/maverick/consensus/replay_stubs.go b/test/maverick/consensus/replay_stubs.go deleted file mode 100644 index 5f3badd18..000000000 --- a/test/maverick/consensus/replay_stubs.go +++ /dev/null @@ -1,92 +0,0 @@ -package consensus - -import ( - abci "github.com/tendermint/tendermint/abci/types" - "github.com/tendermint/tendermint/libs/clist" - mempl "github.com/tendermint/tendermint/mempool" - tmstate "github.com/tendermint/tendermint/proto/tendermint/state" - "github.com/tendermint/tendermint/proxy" - "github.com/tendermint/tendermint/types" -) - -//----------------------------------------------------------------------------- - -type emptyMempool struct{} - -var _ mempl.Mempool = emptyMempool{} - -func (emptyMempool) Lock() {} -func (emptyMempool) Unlock() {} -func (emptyMempool) Size() int { return 0 } -func (emptyMempool) SizeBytes() int64 { return 0 } -func (emptyMempool) CheckTx(_ types.Tx, _ func(*abci.Response), _ mempl.TxInfo) error { - return nil -} -func (emptyMempool) RemoveTxByKey(txKey types.TxKey) error { return nil } -func (emptyMempool) ReapMaxBytesMaxGas(_, _ int64) types.Txs { return types.Txs{} } -func (emptyMempool) ReapMaxTxs(n int) types.Txs { return types.Txs{} } -func (emptyMempool) Update( - _ int64, - _ types.Txs, - _ []*abci.ResponseDeliverTx, - _ mempl.PreCheckFunc, - _ mempl.PostCheckFunc, -) error { - return nil -} -func (emptyMempool) Flush() {} -func (emptyMempool) FlushAppConn() error { return nil } -func (emptyMempool) TxsAvailable() <-chan struct{} { return make(chan struct{}) } -func (emptyMempool) EnableTxsAvailable() {} -func (emptyMempool) TxsBytes() int64 { return 0 } - -func (emptyMempool) TxsFront() *clist.CElement { return nil } -func (emptyMempool) TxsWaitChan() <-chan struct{} { return nil } - -func (emptyMempool) InitWAL() error { return nil } -func (emptyMempool) CloseWAL() {} - -//----------------------------------------------------------------------------- -// mockProxyApp uses ABCIResponses to give the right results. -// -// Useful because we don't want to call Commit() twice for the same block on -// the real app. - -func newMockProxyApp(appHash []byte, abciResponses *tmstate.ABCIResponses) proxy.AppConnConsensus { - clientCreator := proxy.NewLocalClientCreator(&mockProxyApp{ - appHash: appHash, - abciResponses: abciResponses, - }) - cli, _ := clientCreator.NewABCIClient() - err := cli.Start() - if err != nil { - panic(err) - } - return proxy.NewAppConnConsensus(cli) -} - -type mockProxyApp struct { - abci.BaseApplication - - appHash []byte - txCount int - abciResponses *tmstate.ABCIResponses -} - -func (mock *mockProxyApp) DeliverTx(req abci.RequestDeliverTx) abci.ResponseDeliverTx { - r := mock.abciResponses.DeliverTxs[mock.txCount] - mock.txCount++ - if r == nil { - return abci.ResponseDeliverTx{} - } - return *r -} - -func (mock *mockProxyApp) EndBlock(req abci.RequestEndBlock) abci.ResponseEndBlock { - mock.txCount = 0 - return *mock.abciResponses.EndBlock -} - -func (mock *mockProxyApp) Commit() abci.ResponseCommit { - return abci.ResponseCommit{Data: mock.appHash} -} diff --git a/test/maverick/consensus/state.go b/test/maverick/consensus/state.go deleted file mode 100644 index 1cb91682b..000000000 --- a/test/maverick/consensus/state.go +++ /dev/null @@ -1,2000 +0,0 @@ -package consensus - -import ( - "bytes" - "errors" - "fmt" - "io/ioutil" - "os" - "reflect" - "runtime/debug" - "sync" - "time" - - "github.com/gogo/protobuf/proto" - - cfg "github.com/tendermint/tendermint/config" - tmcon "github.com/tendermint/tendermint/consensus" - cstypes "github.com/tendermint/tendermint/consensus/types" - "github.com/tendermint/tendermint/crypto" - tmevents "github.com/tendermint/tendermint/libs/events" - "github.com/tendermint/tendermint/libs/fail" - tmjson "github.com/tendermint/tendermint/libs/json" - "github.com/tendermint/tendermint/libs/log" - tmmath "github.com/tendermint/tendermint/libs/math" - tmos "github.com/tendermint/tendermint/libs/os" - "github.com/tendermint/tendermint/libs/service" - "github.com/tendermint/tendermint/p2p" - tmproto "github.com/tendermint/tendermint/proto/tendermint/types" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/types" - tmtime "github.com/tendermint/tendermint/types/time" -) - -// State handles execution of the consensus algorithm. -// It processes votes and proposals, and upon reaching agreement, -// commits blocks to the chain and executes them against the application. -// The internal state machine receives input from peers, the internal validator, and from a timer. -type State struct { - service.BaseService - - // config details - config *cfg.ConsensusConfig - privValidator types.PrivValidator // for signing votes - - // store blocks and commits - blockStore sm.BlockStore - - // create and execute blocks - blockExec *sm.BlockExecutor - - // notify us if txs are available - txNotifier txNotifier - - // add evidence to the pool - // when it's detected - evpool evidencePool - - // internal state - mtx sync.RWMutex - cstypes.RoundState - state sm.State // State until height-1. - - // state changes may be triggered by: msgs from peers, - // msgs from ourself, or by timeouts - peerMsgQueue chan msgInfo - internalMsgQueue chan msgInfo - timeoutTicker TimeoutTicker - // privValidator pubkey, memoized for the duration of one block - // to avoid extra requests to HSM - privValidatorPubKey crypto.PubKey - - // information about about added votes and block parts are written on this channel - // so statistics can be computed by reactor - statsMsgQueue chan msgInfo - - // we use eventBus to trigger msg broadcasts in the reactor, - // and to notify external subscribers, eg. through a websocket - eventBus *types.EventBus - - // a Write-Ahead Log ensures we can recover from any kind of crash - // and helps us avoid signing conflicting votes - wal tmcon.WAL - replayMode bool // so we don't log signing errors during replay - doWALCatchup bool // determines if we even try to do the catchup - - // for tests where we want to limit the number of transitions the state makes - nSteps int - - // some functions can be overwritten for testing - decideProposal func(height int64, round int32) - - // closed when we finish shutting down - done chan struct{} - - // synchronous pubsub between consensus state and reactor. - // state only emits EventNewRoundStep and EventVote - evsw tmevents.EventSwitch - - // for reporting metrics - metrics *tmcon.Metrics - - // misbehaviors mapped for each height (can't have more than one misbehavior per height) - misbehaviors map[int64]Misbehavior - - // the switch is passed to the state so that maveick misbehaviors can directly control which - // information they send to which nodes - sw *p2p.Switch -} - -// StateOption sets an optional parameter on the State. -type StateOption func(*State) - -// NewState returns a new State. -func NewState( - config *cfg.ConsensusConfig, - state sm.State, - blockExec *sm.BlockExecutor, - blockStore sm.BlockStore, - txNotifier txNotifier, - evpool evidencePool, - misbehaviors map[int64]Misbehavior, - options ...StateOption, -) *State { - cs := &State{ - config: config, - blockExec: blockExec, - blockStore: blockStore, - txNotifier: txNotifier, - peerMsgQueue: make(chan msgInfo, msgQueueSize), - internalMsgQueue: make(chan msgInfo, msgQueueSize), - timeoutTicker: NewTimeoutTicker(), - statsMsgQueue: make(chan msgInfo, msgQueueSize), - done: make(chan struct{}), - doWALCatchup: true, - wal: nilWAL{}, - evpool: evpool, - evsw: tmevents.NewEventSwitch(), - metrics: tmcon.NopMetrics(), - misbehaviors: misbehaviors, - } - // set function defaults (may be overwritten before calling Start) - cs.decideProposal = cs.defaultDecideProposal - - // We have no votes, so reconstruct LastCommit from SeenCommit. - if state.LastBlockHeight > 0 { - cs.reconstructLastCommit(state) - } - - cs.updateToState(state) - - // Don't call scheduleRound0 yet. - // We do that upon Start(). - - cs.BaseService = *service.NewBaseService(nil, "State", cs) - for _, option := range options { - option(cs) - } - return cs -} - -// I know this is not great but the maverick consensus state needs access to the peers -func (cs *State) SetSwitch(sw *p2p.Switch) { - cs.sw = sw -} - -// state transitions on complete-proposal, 2/3-any, 2/3-one -func (cs *State) handleMsg(mi msgInfo) { - cs.mtx.Lock() - defer cs.mtx.Unlock() - - var ( - added bool - err error - ) - msg, peerID := mi.Msg, mi.PeerID - switch msg := msg.(type) { - case *tmcon.ProposalMessage: - // will not cause transition. - // once proposal is set, we can receive block parts - // err = cs.setProposal(msg.Proposal) - if b, ok := cs.misbehaviors[cs.Height]; ok { - err = b.ReceiveProposal(cs, msg.Proposal) - } else { - err = defaultReceiveProposal(cs, msg.Proposal) - } - case *tmcon.BlockPartMessage: - // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit - added, err = cs.addProposalBlockPart(msg, peerID) - if added { - cs.statsMsgQueue <- mi - } - - if err != nil && msg.Round != cs.Round { - cs.Logger.Debug( - "Received block part from wrong round", - "height", - cs.Height, - "csRound", - cs.Round, - "blockRound", - msg.Round) - err = nil - } - case *tmcon.VoteMessage: - // attempt to add the vote and dupeout the validator if its a duplicate signature - // if the vote gives us a 2/3-any or 2/3-one, we transition - added, err = cs.tryAddVote(msg.Vote, peerID) - if added { - cs.statsMsgQueue <- mi - } - - // if err == ErrAddingVote { - // TODO: punish peer - // We probably don't want to stop the peer here. The vote does not - // necessarily comes from a malicious peer but can be just broadcasted by - // a typical peer. - // https://github.com/tendermint/tendermint/issues/1281 - // } - - // NOTE: the vote is broadcast to peers by the reactor listening - // for vote events - - // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, - // the peer is sending us CatchupCommit precommits. - // We could make note of this and help filter in broadcastHasVoteMessage(). - default: - cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg)) - return - } - - if err != nil { - cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round, - "peer", peerID, "err", err, "msg", msg) - } -} - -// Enter (CreateEmptyBlocks): from enterNewRound(height,round) -// Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): -// after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval -// Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool -func (cs *State) enterPropose(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { - logger.Debug("enter propose", "msg", log.NewLazySprintf( - "enterPropose(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - logger.Info("enter propose", - "msg", - log.NewLazySprintf("enterPropose(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPropose: - cs.updateRoundStep(round, cstypes.RoundStepPropose) - cs.newStep() - - // If we have the whole proposal + POL, then goto Prevote now. - // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), - // or else after timeoutPropose - if cs.isProposalComplete() { - cs.enterPrevote(height, cs.Round) - } - }() - - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.EnterPropose(cs, height, round) - } else { - defaultEnterPropose(cs, height, round) - } -} - -// Enter: `timeoutPropose` after entering Propose. -// Enter: proposal block and POL is ready. -// Prevote for LockedBlock if we're locked, or ProposalBlock if valid. -// Otherwise vote nil. -func (cs *State) enterPrevote(height int64, round int32) { - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { - cs.Logger.Debug("enter prevote", "msg", log.NewLazySprintf( - "enterPrevote(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - - defer func() { - // Done enterPrevote: - cs.updateRoundStep(round, cstypes.RoundStepPrevote) - cs.newStep() - }() - - cs.Logger.Debug("enter prevote", - "msg", - log.NewLazySprintf("enterPrevote(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - // Sign and broadcast vote as necessary - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.EnterPrevote(cs, height, round) - } else { - defaultEnterPrevote(cs, height, round) - } - - // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait - // (so we have more time to try and collect +2/3 prevotes for a single block) -} - -// Enter: `timeoutPrevote` after any +2/3 prevotes. -// Enter: `timeoutPrecommit` after any +2/3 precommits. -// Enter: +2/3 precomits for block or nil. -// Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) -// else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, -// else, precommit nil otherwise. -func (cs *State) enterPrecommit(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { - logger.Debug("enter precommit", - "msg", - log.NewLazySprintf("enterPrecommit(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - - logger.Info("enter precommit", - "msg", - log.NewLazySprintf("enterPrecommit(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPrecommit: - cs.updateRoundStep(round, cstypes.RoundStepPrecommit) - cs.newStep() - }() - - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.EnterPrecommit(cs, height, round) - } else { - defaultEnterPrecommit(cs, height, round) - } - -} - -func (cs *State) addVote( - vote *types.Vote, - peerID p2p.ID) (added bool, err error) { - cs.Logger.Debug( - "addVote", - "voteHeight", - vote.Height, - "voteType", - vote.Type, - "valIndex", - vote.ValidatorIndex, - "csHeight", - cs.Height, - ) - - // A precommit for the previous height? - // These come in while we wait timeoutCommit - if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { - if cs.Step != cstypes.RoundStepNewHeight { - // Late precommit at prior height is ignored - cs.Logger.Debug("Precommit vote came in after commit timeout and has been ignored", "vote", vote) - return - } - added, err = cs.LastCommit.AddVote(vote) - if !added { - return - } - - cs.Logger.Info("add vote", - "msg", - log.NewLazySprintf("Added to lastPrecommits: %v", cs.LastCommit.StringShort())) - _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}) - cs.evsw.FireEvent(types.EventVote, vote) - - // if we can skip timeoutCommit and have all the votes now, - if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { - // go straight to new round (skip timeout commit) - // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) - cs.enterNewRound(cs.Height, 0) - } - - return - } - - // Height mismatch is ignored. - // Not necessarily a bad peer, but not favorable behavior. - if vote.Height != cs.Height { - cs.Logger.Debug("vote ignored and not added", "voteHeight", vote.Height, "csHeight", cs.Height, "peerID", peerID) - return - } - - added, err = cs.Votes.AddVote(vote, peerID) - if !added { - // Either duplicate, or error upon cs.Votes.AddByIndex() - return - } - - _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}) - cs.evsw.FireEvent(types.EventVote, vote) - - switch vote.Type { - case tmproto.PrevoteType: - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.ReceivePrevote(cs, vote) - } else { - defaultReceivePrevote(cs, vote) - } - - case tmproto.PrecommitType: - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.ReceivePrecommit(cs, vote) - } - defaultReceivePrecommit(cs, vote) - - default: - panic(fmt.Sprintf("Unexpected vote type %v", vote.Type)) - } - - return added, err -} - -//----------------------------------------------------------------------------- -// Errors - -var ( - ErrInvalidProposalSignature = errors.New("error invalid proposal signature") - ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") - ErrAddingVote = errors.New("error adding vote") - ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") - - errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") -) - -//----------------------------------------------------------------------------- - -var ( - msgQueueSize = 1000 -) - -// msgs from the reactor which may update the state -type msgInfo struct { - Msg tmcon.Message `json:"msg"` - PeerID p2p.ID `json:"peer_key"` -} - -// internally generated messages which may update the state -type timeoutInfo struct { - Duration time.Duration `json:"duration"` - Height int64 `json:"height"` - Round int32 `json:"round"` - Step cstypes.RoundStepType `json:"step"` -} - -func (ti *timeoutInfo) String() string { - return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) -} - -// interface to the mempool -type txNotifier interface { - TxsAvailable() <-chan struct{} -} - -// interface to the evidence pool -type evidencePool interface { - // reports conflicting votes to the evidence pool to be processed into evidence - ReportConflictingVotes(voteA, voteB *types.Vote) -} - -//---------------------------------------- -// Public interface - -// SetLogger implements Service. -func (cs *State) SetLogger(l log.Logger) { - cs.BaseService.Logger = l - cs.timeoutTicker.SetLogger(l) -} - -// SetEventBus sets event bus. -func (cs *State) SetEventBus(b *types.EventBus) { - cs.eventBus = b - cs.blockExec.SetEventBus(b) -} - -// StateMetrics sets the metrics. -func StateMetrics(metrics *tmcon.Metrics) StateOption { - return func(cs *State) { cs.metrics = metrics } -} - -// String returns a string. -func (cs *State) String() string { - // better not to access shared variables - return "ConsensusState" -} - -// GetState returns a copy of the chain state. -func (cs *State) GetState() sm.State { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return cs.state.Copy() -} - -// GetLastHeight returns the last height committed. -// If there were no blocks, returns 0. -func (cs *State) GetLastHeight() int64 { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return cs.RoundState.Height - 1 -} - -// GetRoundState returns a shallow copy of the internal consensus state. -func (cs *State) GetRoundState() *cstypes.RoundState { - cs.mtx.RLock() - rs := cs.RoundState // copy - cs.mtx.RUnlock() - return &rs -} - -// GetRoundStateJSON returns a json of RoundState. -func (cs *State) GetRoundStateJSON() ([]byte, error) { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return tmjson.Marshal(cs.RoundState) -} - -// GetRoundStateSimpleJSON returns a json of RoundStateSimple -func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return tmjson.Marshal(cs.RoundState.RoundStateSimple()) -} - -// GetValidators returns a copy of the current validators. -func (cs *State) GetValidators() (int64, []*types.Validator) { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators -} - -// SetPrivValidator sets the private validator account for signing votes. It -// immediately requests pubkey and caches it. -func (cs *State) SetPrivValidator(priv types.PrivValidator) { - cs.mtx.Lock() - defer cs.mtx.Unlock() - - cs.privValidator = priv - - if err := cs.updatePrivValidatorPubKey(); err != nil { - cs.Logger.Error("Can't get private validator pubkey", "err", err) - } -} - -// SetTimeoutTicker sets the local timer. It may be useful to overwrite for testing. -func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { - cs.mtx.Lock() - cs.timeoutTicker = timeoutTicker - cs.mtx.Unlock() -} - -// LoadCommit loads the commit for a given height. -func (cs *State) LoadCommit(height int64) *types.Commit { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - if height == cs.blockStore.Height() { - return cs.blockStore.LoadSeenCommit(height) - } - return cs.blockStore.LoadBlockCommit(height) -} - -// OnStart loads the latest state via the WAL, and starts the timeout and -// receive routines. -func (cs *State) OnStart() error { - // We may set the WAL in testing before calling Start, so only OpenWAL if its - // still the nilWAL. - if _, ok := cs.wal.(nilWAL); ok { - if err := cs.loadWalFile(); err != nil { - return err - } - } - - // We may have lost some votes if the process crashed reload from consensus - // log to catchup. - if cs.doWALCatchup { - repairAttempted := false - LOOP: - for { - err := cs.catchupReplay(cs.Height) - switch { - case err == nil: - break LOOP - case !IsDataCorruptionError(err): - cs.Logger.Error("Error on catchup replay. Proceeding to start State anyway", "err", err) - break LOOP - case repairAttempted: - return err - } - - cs.Logger.Info("WAL file is corrupted. Attempting repair", "err", err) - - // 1) prep work - if err := cs.wal.Stop(); err != nil { - return err - } - repairAttempted = true - - // 2) backup original WAL file - corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) - if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { - return err - } - cs.Logger.Info("Backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) - - // 3) try to repair (WAL file will be overwritten!) - if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { - cs.Logger.Error("Repair failed", "err", err) - return err - } - cs.Logger.Info("Successful repair") - - // reload WAL file - if err := cs.loadWalFile(); err != nil { - return err - } - } - } - - if err := cs.evsw.Start(); err != nil { - return err - } - - // we need the timeoutRoutine for replay so - // we don't block on the tick chan. - // NOTE: we will get a build up of garbage go routines - // firing on the tockChan until the receiveRoutine is started - // to deal with them (by that point, at most one will be valid) - if err := cs.timeoutTicker.Start(); err != nil { - return err - } - - // Double Signing Risk Reduction - if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { - return err - } - - // now start the receiveRoutine - go cs.receiveRoutine(0) - - // schedule the first round! - // use GetRoundState so we don't race the receiveRoutine for access - cs.scheduleRound0(cs.GetRoundState()) - - return nil -} - -// loadWalFile loads WAL data from file. It overwrites cs.wal. -func (cs *State) loadWalFile() error { - wal, err := cs.OpenWAL(cs.config.WalFile()) - if err != nil { - cs.Logger.Error("Error loading State wal", "err", err) - return err - } - cs.wal = wal - return nil -} - -// OnStop implements service.Service. -func (cs *State) OnStop() { - if err := cs.evsw.Stop(); err != nil { - cs.Logger.Error("error trying to stop eventSwitch", "error", err) - } - if err := cs.timeoutTicker.Stop(); err != nil { - cs.Logger.Error("error trying to stop timeoutTicket", "error", err) - } - // WAL is stopped in receiveRoutine. -} - -// Wait waits for the the main routine to return. -// NOTE: be sure to Stop() the event switch and drain -// any event channels or this may deadlock -func (cs *State) Wait() { - <-cs.done -} - -// OpenWAL opens a file to log all consensus messages and timeouts for -// deterministic accountability. -func (cs *State) OpenWAL(walFile string) (tmcon.WAL, error) { - wal, err := NewWAL(walFile) - if err != nil { - cs.Logger.Error("Failed to open WAL", "file", walFile, "err", err) - return nil, err - } - wal.SetLogger(cs.Logger.With("wal", walFile)) - if err := wal.Start(); err != nil { - cs.Logger.Error("Failed to start WAL", "err", err) - return nil, err - } - return wal, nil -} - -//------------------------------------------------------------ -// Public interface for passing messages into the consensus state, possibly causing a state transition. -// If peerID == "", the msg is considered internal. -// Messages are added to the appropriate queue (peer or internal). -// If the queue is full, the function may block. -// TODO: should these return anything or let callers just use events? - -// AddVote inputs a vote. -func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { - if peerID == "" { - cs.internalMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, ""} - } else { - cs.peerMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, peerID} - } - - // TODO: wait for event?! - return false, nil -} - -// SetProposal inputs a proposal. -func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { - - if peerID == "" { - cs.internalMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""} - } else { - cs.peerMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, peerID} - } - - // TODO: wait for event?! - return nil -} - -// AddProposalBlockPart inputs a part of the proposal block. -func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { - - if peerID == "" { - cs.internalMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, ""} - } else { - cs.peerMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, peerID} - } - - // TODO: wait for event?! - return nil -} - -// SetProposalAndBlock inputs the proposal and all block parts. -func (cs *State) SetProposalAndBlock( - proposal *types.Proposal, - block *types.Block, - parts *types.PartSet, - peerID p2p.ID, -) error { - if err := cs.SetProposal(proposal, peerID); err != nil { - return err - } - for i := 0; i < int(parts.Total()); i++ { - part := parts.GetPart(i) - if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { - return err - } - } - return nil -} - -//------------------------------------------------------------ -// internal functions for managing the state - -func (cs *State) updateHeight(height int64) { - cs.metrics.Height.Set(float64(height)) - cs.Height = height -} - -func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { - cs.Round = round - cs.Step = step -} - -// enterNewRound(height, 0) at cs.StartTime. -func (cs *State) scheduleRound0(rs *cstypes.RoundState) { - // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) - sleepDuration := rs.StartTime.Sub(tmtime.Now()) - cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) -} - -// Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) -func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { - cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) -} - -// send a msg into the receiveRoutine regarding our own proposal, block part, or vote -func (cs *State) sendInternalMessage(mi msgInfo) { - select { - case cs.internalMsgQueue <- mi: - default: - // NOTE: using the go-routine means our votes can - // be processed out of order. - // TODO: use CList here for strict determinism and - // attempt push to internalMsgQueue in receiveRoutine - cs.Logger.Info("Internal msg queue is full. Using a go-routine") - go func() { cs.internalMsgQueue <- mi }() - } -} - -// Reconstruct LastCommit from SeenCommit, which we saved along with the block, -// (which happens even before saving the state) -func (cs *State) reconstructLastCommit(state sm.State) { - seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) - if seenCommit == nil { - panic(fmt.Sprintf("Failed to reconstruct LastCommit: seen commit for height %v not found", - state.LastBlockHeight)) - } - - lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) - if !lastPrecommits.HasTwoThirdsMajority() { - panic("Failed to reconstruct LastCommit: Does not have +2/3 maj") - } - - cs.LastCommit = lastPrecommits -} - -// Updates State and increments height to match that of state. -// The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. -func (cs *State) updateToState(state sm.State) { - if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { - panic(fmt.Sprintf("updateToState() expected state height of %v but found %v", - cs.Height, state.LastBlockHeight)) - } - if !cs.state.IsEmpty() { - if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { - // This might happen when someone else is mutating cs.state. - // Someone forgot to pass in state.Copy() somewhere?! - panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", - cs.state.LastBlockHeight+1, cs.Height)) - } - if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { - panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", - cs.state.LastBlockHeight, cs.state.InitialHeight)) - } - - // If state isn't further out than cs.state, just ignore. - // This happens when SwitchToConsensus() is called in the reactor. - // We don't want to reset e.g. the Votes, but we still want to - // signal the new round step, because other services (eg. txNotifier) - // depend on having an up-to-date peer state! - if state.LastBlockHeight <= cs.state.LastBlockHeight { - cs.Logger.Info( - "Ignoring updateToState()", - "newHeight", - state.LastBlockHeight+1, - "oldHeight", - cs.state.LastBlockHeight+1) - cs.newStep() - return - } - } - - // Reset fields based on state. - validators := state.Validators - - switch { - case state.LastBlockHeight == 0: // Very first commit should be empty. - cs.LastCommit = (*types.VoteSet)(nil) - case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes - if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { - panic(fmt.Sprintf("Wanted to form a Commit, but Precommits (H/R: %d/%d) didn't have 2/3+: %v", - state.LastBlockHeight, - cs.CommitRound, - cs.Votes.Precommits(cs.CommitRound))) - } - cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) - case cs.LastCommit == nil: - // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit - // must be called to reconstruct LastCommit from SeenCommit. - panic(fmt.Sprintf("LastCommit cannot be empty after initial block (H:%d)", - state.LastBlockHeight+1, - )) - } - - // Next desired block height - height := state.LastBlockHeight + 1 - if height == 1 { - height = state.InitialHeight - } - - // RoundState fields - cs.updateHeight(height) - cs.updateRoundStep(0, cstypes.RoundStepNewHeight) - if cs.CommitTime.IsZero() { - // "Now" makes it easier to sync up dev nodes. - // We add timeoutCommit to allow transactions - // to be gathered for the first block. - // And alternative solution that relies on clocks: - // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) - cs.StartTime = cs.config.Commit(tmtime.Now()) - } else { - cs.StartTime = cs.config.Commit(cs.CommitTime) - } - - cs.Validators = validators - cs.Proposal = nil - cs.ProposalBlock = nil - cs.ProposalBlockParts = nil - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - cs.ValidRound = -1 - cs.ValidBlock = nil - cs.ValidBlockParts = nil - cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) - cs.CommitRound = -1 - cs.LastValidators = state.LastValidators - cs.TriggeredTimeoutPrecommit = false - - cs.state = state - - // Finally, broadcast RoundState - cs.newStep() -} - -func (cs *State) newStep() { - rs := cs.RoundStateEvent() - if err := cs.wal.Write(rs); err != nil { - cs.Logger.Error("Error writing to wal", "err", err) - } - cs.nSteps++ - // newStep is called by updateToState in NewState before the eventBus is set! - if cs.eventBus != nil { - if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { - cs.Logger.Error("Error publishing new round step", "err", err) - } - cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) - } -} - -//----------------------------------------- -// the main go routines - -// receiveRoutine handles messages which may cause state transitions. -// it's argument (n) is the number of messages to process before exiting - use 0 to run forever -// It keeps the RoundState and is the only thing that updates it. -// Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. -// State must be locked before any internal state is updated. -func (cs *State) receiveRoutine(maxSteps int) { - onExit := func(cs *State) { - // NOTE: the internalMsgQueue may have signed messages from our - // priv_val that haven't hit the WAL, but its ok because - // priv_val tracks LastSig - - // close wal now that we're done writing to it - if err := cs.wal.Stop(); err != nil { - cs.Logger.Error("error trying to stop wal", "error", err) - } - cs.wal.Wait() - - close(cs.done) - } - - defer func() { - if r := recover(); r != nil { - cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) - // stop gracefully - // - // NOTE: We most probably shouldn't be running any further when there is - // some unexpected panic. Some unknown error happened, and so we don't - // know if that will result in the validator signing an invalid thing. It - // might be worthwhile to explore a mechanism for manual resuming via - // some console or secure RPC system, but for now, halting the chain upon - // unexpected consensus bugs sounds like the better option. - onExit(cs) - } - }() - - for { - if maxSteps > 0 { - if cs.nSteps >= maxSteps { - cs.Logger.Info("reached max steps. exiting receive routine") - cs.nSteps = 0 - return - } - } - rs := cs.RoundState - var mi msgInfo - - select { - case <-cs.txNotifier.TxsAvailable(): - cs.handleTxsAvailable() - case mi = <-cs.peerMsgQueue: - if err := cs.wal.Write(mi); err != nil { - cs.Logger.Error("Error writing to wal", "err", err) - } - // handles proposals, block parts, votes - // may generate internal events (votes, complete proposals, 2/3 majorities) - cs.handleMsg(mi) - case mi = <-cs.internalMsgQueue: - err := cs.wal.WriteSync(mi) // NOTE: fsync - if err != nil { - panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err)) - } - - if _, ok := mi.Msg.(*tmcon.VoteMessage); ok { - // we actually want to simulate failing during - // the previous WriteSync, but this isn't easy to do. - // Equivalent would be to fail here and manually remove - // some bytes from the end of the wal. - fail.Fail() // XXX - } - - // handles proposals, block parts, votes - cs.handleMsg(mi) - case ti := <-cs.timeoutTicker.Chan(): // tockChan: - if err := cs.wal.Write(ti); err != nil { - cs.Logger.Error("Error writing to wal", "err", err) - } - // if the timeout is relevant to the rs - // go to the next step - cs.handleTimeout(ti, rs) - case <-cs.Quit(): - onExit(cs) - return - } - } -} - -func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { - cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) - - // timeouts must be for current height, round, step - if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { - cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) - return - } - - // the timeout will now cause a state transition - cs.mtx.Lock() - defer cs.mtx.Unlock() - - switch ti.Step { - case cstypes.RoundStepNewHeight: - // NewRound event fired from enterNewRound. - // XXX: should we fire timeout here (for timeout commit)? - cs.enterNewRound(ti.Height, 0) - case cstypes.RoundStepNewRound: - cs.enterPropose(ti.Height, 0) - case cstypes.RoundStepPropose: - if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing timeout propose", "err", err) - } - cs.enterPrevote(ti.Height, ti.Round) - case cstypes.RoundStepPrevoteWait: - if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing timeout wait", "err", err) - } - cs.enterPrecommit(ti.Height, ti.Round) - case cstypes.RoundStepPrecommitWait: - if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing timeout wait", "err", err) - } - cs.enterPrecommit(ti.Height, ti.Round) - cs.enterNewRound(ti.Height, ti.Round+1) - default: - panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step)) - } - -} - -func (cs *State) handleTxsAvailable() { - cs.mtx.Lock() - defer cs.mtx.Unlock() - - // We only need to do this for round 0. - if cs.Round != 0 { - return - } - - switch cs.Step { - case cstypes.RoundStepNewHeight: // timeoutCommit phase - if cs.needProofBlock(cs.Height) { - // enterPropose will be called by enterNewRound - return - } - - // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight - timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond - cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) - case cstypes.RoundStepNewRound: // after timeoutCommit - cs.enterPropose(cs.Height, 0) - } -} - -//----------------------------------------------------------------------------- -// State functions -// Used internally by handleTimeout and handleMsg to make state transitions - -// Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), -// or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) -// Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) -// Enter: +2/3 precommits for nil at (height,round-1) -// Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) -// NOTE: cs.StartTime was already set for height. -func (cs *State) enterNewRound(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { - logger.Debug("enter new round", "msg", log.NewLazySprintf( - "enterNewRound(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - - if now := tmtime.Now(); cs.StartTime.After(now) { - logger.Debug("need to set a buffer and log message here for sanity", "startTime", cs.StartTime, "now", now) - } - - logger.Info("enter new round", - "msg", - log.NewLazySprintf("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - // Increment validators if necessary - validators := cs.Validators - if cs.Round < round { - validators = validators.Copy() - validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round)) - } - - // Setup new round - // we don't fire newStep for this step, - // but we fire an event, so update the round step first - cs.updateRoundStep(round, cstypes.RoundStepNewRound) - cs.Validators = validators - if round == 0 { - // We've already reset these upon new height, - // and meanwhile we might have received a proposal - // for round 0. - } else { - logger.Info("Resetting Proposal info") - cs.Proposal = nil - cs.ProposalBlock = nil - cs.ProposalBlockParts = nil - } - cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping - cs.TriggeredTimeoutPrecommit = false - - if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { - cs.Logger.Error("Error publishing new round", "err", err) - } - cs.metrics.Rounds.Set(float64(round)) - - // Wait for txs to be available in the mempool - // before we enterPropose in round 0. If the last block changed the app hash, - // we may need an empty "proof" block, and enterPropose immediately. - waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) - if waitForTxs { - if cs.config.CreateEmptyBlocksInterval > 0 { - cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, - cstypes.RoundStepNewRound) - } - } else { - cs.enterPropose(height, round) - } -} - -// needProofBlock returns true on the first height (so the genesis app hash is signed right away) -// and where the last block (height-1) caused the app hash to change -func (cs *State) needProofBlock(height int64) bool { - if height == cs.state.InitialHeight { - return true - } - - lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) - if lastBlockMeta == nil { - panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) - } - return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) -} - -func (cs *State) isProposer(address []byte) bool { - return bytes.Equal(cs.Validators.GetProposer().Address, address) -} - -func (cs *State) defaultDecideProposal(height int64, round int32) { - var block *types.Block - var blockParts *types.PartSet - - // Decide on block - if cs.ValidBlock != nil { - // If there is valid block, choose that. - block, blockParts = cs.ValidBlock, cs.ValidBlockParts - } else { - // Create a new proposal block from state/txs from the mempool. - var err error - block, err = cs.createProposalBlock() - if err != nil { - cs.Logger.Error("unable to create proposal block", "error", err) - return - } else if block == nil { - return - } - blockParts, err = block.MakePartSet(types.BlockPartSizeBytes) - if err != nil { - cs.Logger.Error("unable to create proposal block part set", "error", err) - return - } - } - - // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, - // and the privValidator will refuse to sign anything. - if err := cs.wal.FlushAndSync(); err != nil { - cs.Logger.Error("Error flushing to disk") - } - - // Make proposal - propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} - proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) - p := proposal.ToProto() - if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { - proposal.Signature = p.Signature - - // send proposal and block parts on internal msg queue - cs.sendInternalMessage(msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""}) - for i := 0; i < int(blockParts.Total()); i++ { - part := blockParts.GetPart(i) - cs.sendInternalMessage(msgInfo{&tmcon.BlockPartMessage{Height: cs.Height, Round: cs.Round, Part: part}, ""}) - } - cs.Logger.Info("Signed proposal", "height", height, "round", round, "proposal", proposal) - cs.Logger.Debug("default decide proposal", - "msg", - log.NewLazySprintf("Signed proposal block: %v", block)) - } else if !cs.replayMode { - cs.Logger.Error("enterPropose: Error signing proposal", "height", height, "round", round, "err", err) - } -} - -// Returns true if the proposal block is complete && -// (if POLRound was proposed, we have +2/3 prevotes from there). -func (cs *State) isProposalComplete() bool { - if cs.Proposal == nil || cs.ProposalBlock == nil { - return false - } - // we have the proposal. if there's a POLRound, - // make sure we have the prevotes from it too - if cs.Proposal.POLRound < 0 { - return true - } - // if this is false the proposer is lying or we haven't received the POL yet - return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() - -} - -// Create the next block to propose and return it. Returns nil block upon error. -// -// We really only need to return the parts, but the block is returned for -// convenience so we can log the proposal block. -// -// NOTE: keep it side-effect free for clarity. -// CONTRACT: cs.privValidator is not nil. -func (cs *State) createProposalBlock() (*types.Block, error) { - if cs.privValidator == nil { - return nil, errors.New("entered createProposalBlock with privValidator being nil") - } - - var commit *types.Commit - switch { - case cs.Height == cs.state.InitialHeight: - // We're creating a proposal for the first block. - // The commit is empty, but not nil. - commit = types.NewCommit(0, 0, types.BlockID{}, nil) - case cs.LastCommit.HasTwoThirdsMajority(): - // Make the commit from LastCommit - commit = cs.LastCommit.MakeCommit() - default: // This shouldn't happen. - cs.Logger.Error("propose step; cannot propose anything without commit for the previous block") - return nil, nil - } - - if cs.privValidatorPubKey == nil { - // If this node is a validator & proposer in the current round, it will - // miss the opportunity to create a block. - cs.Logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) - return nil, nil - } - proposerAddr := cs.privValidatorPubKey.Address() - - return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr, cs.LastCommit.GetVotes()) -} - -// Enter: any +2/3 prevotes at next round. -func (cs *State) enterPrevoteWait(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { - logger.Debug("enter prevote wait", - "msg", - log.NewLazySprintf( - "enterPrevoteWait(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { - panic(fmt.Sprintf("enterPrevoteWait(%v/%v), but Prevotes does not have any +2/3 votes", height, round)) - } - - logger.Debug("enter prevote wait", - "msg", - log.NewLazySprintf("enterPrevoteWait(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPrevoteWait: - cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) - cs.newStep() - }() - - // Wait for some more prevotes; enterPrecommit - cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) -} - -// Enter: any +2/3 precommits for next round. -func (cs *State) enterPrecommitWait(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { - logger.Debug("state enter precommit wait", - "msg", - log.NewLazySprintf( - "enterPrecommitWait(%v/%v): Invalid args. "+ - "Current state is Height/Round: %v/%v/, TriggeredTimeoutPrecommit:%v", - height, round, cs.Height, cs.Round, cs.TriggeredTimeoutPrecommit)) - return - } - if !cs.Votes.Precommits(round).HasTwoThirdsAny() { - panic(fmt.Sprintf("enterPrecommitWait(%v/%v), but Precommits does not have any +2/3 votes", height, round)) - } - logger.Info("enter precommit wait", - "msg", - log.NewLazySprintf("enterPrecommitWait(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPrecommitWait: - cs.TriggeredTimeoutPrecommit = true - cs.newStep() - }() - - // Wait for some more precommits; enterNewRound - cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) -} - -// Enter: +2/3 precommits for block -func (cs *State) enterCommit(height int64, commitRound int32) { - logger := cs.Logger.With("height", height, "commitRound", commitRound) - - if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { - logger.Debug("enter commit", - "msg", - log.NewLazySprintf("enterCommit(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - commitRound, - cs.Height, - cs.Round, - cs.Step)) - return - } - logger.Info("enter commit", - "msg", - log.NewLazySprintf("enterCommit(%v/%v). Current: %v/%v/%v", height, commitRound, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterCommit: - // keep cs.Round the same, commitRound points to the right Precommits set. - cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) - cs.CommitRound = commitRound - cs.CommitTime = tmtime.Now() - cs.newStep() - - // Maybe finalize immediately. - cs.tryFinalizeCommit(height) - }() - - blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() - if !ok { - panic("RunActionCommit() expects +2/3 precommits") - } - - // The Locked* fields no longer matter. - // Move them over to ProposalBlock if they match the commit hash, - // otherwise they'll be cleared in updateToState. - if cs.LockedBlock.HashesTo(blockID.Hash) { - logger.Info("Commit is for locked block. Set ProposalBlock=LockedBlock", "blockHash", blockID.Hash) - cs.ProposalBlock = cs.LockedBlock - cs.ProposalBlockParts = cs.LockedBlockParts - } - - // If we don't have the block being committed, set up to get it. - if !cs.ProposalBlock.HashesTo(blockID.Hash) { - if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { - logger.Info( - "commit is for a block we do not know about; set ProposalBlock=nil", - "proposal", log.NewLazyBlockHash(cs.ProposalBlock), - "commit", blockID.Hash, - ) - - // We're getting the wrong block. - // Set up ProposalBlockParts and keep waiting. - cs.ProposalBlock = nil - cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) - if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing valid block", "err", err) - } - cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) - } - // else { - // We just need to keep waiting. - // } - } -} - -// If we have the block AND +2/3 commits for it, finalize. -func (cs *State) tryFinalizeCommit(height int64) { - logger := cs.Logger.With("height", height) - - if cs.Height != height { - panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) - } - - blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() - if !ok || len(blockID.Hash) == 0 { - logger.Error("Attempt to finalize failed. There was no +2/3 majority, or +2/3 was for .") - return - } - if !cs.ProposalBlock.HashesTo(blockID.Hash) { - // TODO: this happens every time if we're not a validator (ugly logs) - // TODO: ^^ wait, why does it matter that we're a validator? - logger.Debug( - "attempt to finalize failed; we do not have the commit block", - "proposal-block", log.NewLazyBlockHash(cs.ProposalBlock), - "commit-block", blockID.Hash, - ) - return - } - - // go - cs.finalizeCommit(height) -} - -// Increment height and goto cstypes.RoundStepNewHeight -func (cs *State) finalizeCommit(height int64) { - if cs.Height != height || cs.Step != cstypes.RoundStepCommit { - cs.Logger.Debug("finalize commit", - "msg", - log.NewLazySprintf("finalizeCommit(%v): Invalid args. Current step: %v/%v/%v", - height, - cs.Height, - cs.Round, - cs.Step)) - return - } - - blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() - block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts - - if !ok { - panic("Cannot finalizeCommit, commit does not have two thirds majority") - } - if !blockParts.HasHeader(blockID.PartSetHeader) { - panic("Expected ProposalBlockParts header to be commit header") - } - if !block.HashesTo(blockID.Hash) { - panic("Cannot finalizeCommit, ProposalBlock does not hash to commit hash") - } - if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { - panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) - } - - cs.Logger.Info("finalizing commit of block with N txs", - "height", block.Height, - "hash", block.Hash(), - "root", block.AppHash, - "N", len(block.Txs), - ) - cs.Logger.Debug("finalize commit", "msg", log.NewLazySprintf("%v", block)) - - fail.Fail() // XXX - - // Save to blockStore. - if cs.blockStore.Height() < block.Height { - // NOTE: the seenCommit is local justification to commit this block, - // but may differ from the LastCommit included in the next block - precommits := cs.Votes.Precommits(cs.CommitRound) - seenCommit := precommits.MakeCommit() - cs.blockStore.SaveBlock(block, blockParts, seenCommit) - } else { - // Happens during replay if we already saved the block but didn't commit - cs.Logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) - } - - fail.Fail() // XXX - - // Write EndHeightMessage{} for this height, implying that the blockstore - // has saved the block. - // - // If we crash before writing this EndHeightMessage{}, we will recover by - // running ApplyBlock during the ABCI handshake when we restart. If we - // didn't save the block to the blockstore before writing - // EndHeightMessage{}, we'd have to change WAL replay -- currently it - // complains about replaying for heights where an #ENDHEIGHT entry already - // exists. - // - // Either way, the State should not be resumed until we - // successfully call ApplyBlock (ie. later here, or in Handshake after - // restart). - endMsg := tmcon.EndHeightMessage{Height: height} - if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync - panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", - endMsg, err)) - } - - fail.Fail() // XXX - - // Create a copy of the state for staging and an event cache for txs. - stateCopy := cs.state.Copy() - - // Execute and commit the block, update and save the state, and update the mempool. - // NOTE The block.AppHash wont reflect these txs until the next block. - var err error - var retainHeight int64 - stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( - stateCopy, - types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()}, - block) - if err != nil { - cs.Logger.Error("Error on ApplyBlock", "err", err) - return - } - - fail.Fail() // XXX - - // Prune old heights, if requested by ABCI app. - if retainHeight > 0 { - pruned, err := cs.pruneBlocks(retainHeight) - if err != nil { - cs.Logger.Error("Failed to prune blocks", "retainHeight", retainHeight, "err", err) - } else { - cs.Logger.Info("Pruned blocks", "pruned", pruned, "retainHeight", retainHeight) - } - } - - // must be called before we update state - cs.recordMetrics(height, block) - - // NewHeightStep! - cs.updateToState(stateCopy) - - fail.Fail() // XXX - - // Private validator might have changed it's key pair => refetch pubkey. - if err := cs.updatePrivValidatorPubKey(); err != nil { - cs.Logger.Error("Can't get private validator pubkey", "err", err) - } - - // cs.StartTime is already set. - // Schedule Round0 to start soon. - cs.scheduleRound0(&cs.RoundState) - - // By here, - // * cs.Height has been increment to height+1 - // * cs.Step is now cstypes.RoundStepNewHeight - // * cs.StartTime is set to when we will start round0. -} - -func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { - base := cs.blockStore.Base() - if retainHeight <= base { - return 0, nil - } - pruned, err := cs.blockStore.PruneBlocks(retainHeight) - if err != nil { - return 0, fmt.Errorf("failed to prune block store: %w", err) - } - err = cs.blockExec.Store().PruneStates(base, retainHeight) - if err != nil { - return 0, fmt.Errorf("failed to prune state database: %w", err) - } - return pruned, nil -} - -func (cs *State) recordMetrics(height int64, block *types.Block) { - cs.metrics.Validators.Set(float64(cs.Validators.Size())) - cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) - - var ( - missingValidators int - missingValidatorsPower int64 - ) - // height=0 -> MissingValidators and MissingValidatorsPower are both 0. - // Remember that the first LastCommit is intentionally empty, so it's not - // fair to increment missing validators number. - if height > cs.state.InitialHeight { - // Sanity check that commit size matches validator set size - only applies - // after first block. - var ( - commitSize = block.LastCommit.Size() - valSetLen = len(cs.LastValidators.Validators) - address types.Address - ) - if commitSize != valSetLen { - panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", - commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) - } - - if cs.privValidator != nil { - if cs.privValidatorPubKey == nil { - // Metrics won't be updated, but it's not critical. - cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) - } else { - address = cs.privValidatorPubKey.Address() - } - } - - for i, val := range cs.LastValidators.Validators { - commitSig := block.LastCommit.Signatures[i] - if commitSig.Absent() { - missingValidators++ - missingValidatorsPower += val.VotingPower - } - - if bytes.Equal(val.Address, address) { - label := []string{ - "validator_address", val.Address.String(), - } - cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) - if commitSig.ForBlock() { - cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) - } else { - cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) - } - } - - } - } - cs.metrics.MissingValidators.Set(float64(missingValidators)) - cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) - - // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote - var ( - byzantineValidatorsPower = int64(0) - byzantineValidatorsCount = int64(0) - ) - for _, ev := range block.Evidence.Evidence { - if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { - if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { - byzantineValidatorsCount++ - byzantineValidatorsPower += val.VotingPower - } - } - } - cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) - cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) - - if height > 1 { - lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) - if lastBlockMeta != nil { - cs.metrics.BlockIntervalSeconds.Observe( - block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), - ) - } - } - - cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) - cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) - cs.metrics.BlockSizeBytes.Set(float64(block.Size())) - cs.metrics.CommittedHeight.Set(float64(block.Height)) -} - -//----------------------------------------------------------------------------- - -// NOTE: block is not necessarily valid. -// Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, -// once we have the full block. -func (cs *State) addProposalBlockPart(msg *tmcon.BlockPartMessage, peerID p2p.ID) (added bool, err error) { - height, round, part := msg.Height, msg.Round, msg.Part - - // Blocks might be reused, so round mismatch is OK - if cs.Height != height { - cs.Logger.Debug("Received block part from wrong height", "height", height, "round", round) - return false, nil - } - - // We're not expecting a block part. - if cs.ProposalBlockParts == nil { - // NOTE: this can happen when we've gone to a higher round and - // then receive parts from the previous round - not necessarily a bad peer. - cs.Logger.Info("Received a block part when we're not expecting any", - "height", height, "round", round, "index", part.Index, "peer", peerID) - return false, nil - } - - added, err = cs.ProposalBlockParts.AddPart(part) - if err != nil { - return added, err - } - if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { - return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", - cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, - ) - } - if added && cs.ProposalBlockParts.IsComplete() { - bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) - if err != nil { - return added, err - } - - var pbb = new(tmproto.Block) - err = proto.Unmarshal(bz, pbb) - if err != nil { - return added, err - } - - block, err := types.BlockFromProto(pbb) - if err != nil { - return added, err - } - - cs.ProposalBlock = block - // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal - cs.Logger.Info("Received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) - if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { - cs.Logger.Error("Error publishing event complete proposal", "err", err) - } - - // Update Valid* if we can. - prevotes := cs.Votes.Prevotes(cs.Round) - blockID, hasTwoThirds := prevotes.TwoThirdsMajority() - if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { - if cs.ProposalBlock.HashesTo(blockID.Hash) { - cs.Logger.Info("Updating valid block to new proposal block", - "valid-round", cs.Round, "valid-block-hash", cs.ProposalBlock.Hash()) - cs.ValidRound = cs.Round - cs.ValidBlock = cs.ProposalBlock - cs.ValidBlockParts = cs.ProposalBlockParts - } - // TODO: In case there is +2/3 majority in Prevotes set for some - // block and cs.ProposalBlock contains different block, either - // proposer is faulty or voting power of faulty processes is more - // than 1/3. We should trigger in the future accountability - // procedure at this point. - } - - if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { - // Move onto the next step - cs.enterPrevote(height, cs.Round) - if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added - cs.enterPrecommit(height, cs.Round) - } - } else if cs.Step == cstypes.RoundStepCommit { - // If we're waiting on the proposal block... - cs.tryFinalizeCommit(height) - } - return added, nil - } - return added, nil -} - -// Attempt to add the vote. if its a duplicate signature, dupeout the validator -func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { - added, err := cs.addVote(vote, peerID) - if err != nil { - // If the vote height is off, we'll just ignore it, - // But if it's a conflicting sig, add it to the cs.evpool. - // If it's otherwise invalid, punish peer. - // nolint: gocritic - if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { - if cs.privValidatorPubKey == nil { - return false, errPubKeyIsNotSet - } - - if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { - cs.Logger.Error( - "Found conflicting vote from ourselves. Did you unsafe_reset a validator?", - "height", - vote.Height, - "round", - vote.Round, - "type", - vote.Type) - return added, err - } - cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) - return added, err - } else if err == types.ErrVoteNonDeterministicSignature { - cs.Logger.Debug("Vote has non-deterministic signature", "err", err) - } else { - // Either - // 1) bad peer OR - // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR - // 3) tmkms use with multiple validators connecting to a single tmkms instance - // (https://github.com/tendermint/tendermint/issues/3839). - cs.Logger.Info("Error attempting to add vote", "err", err) - return added, ErrAddingVote - } - } - return added, nil -} - -//----------------------------------------------------------------------------- - -// CONTRACT: cs.privValidator is not nil. -func (cs *State) signVote( - msgType tmproto.SignedMsgType, - hash []byte, - header types.PartSetHeader, -) (*types.Vote, error) { - // Flush the WAL. Otherwise, we may not recompute the same vote to sign, - // and the privValidator will refuse to sign anything. - if err := cs.wal.FlushAndSync(); err != nil { - return nil, err - } - - if cs.privValidatorPubKey == nil { - return nil, errPubKeyIsNotSet - } - addr := cs.privValidatorPubKey.Address() - valIdx, _ := cs.Validators.GetByAddress(addr) - - vote := &types.Vote{ - ValidatorAddress: addr, - ValidatorIndex: valIdx, - Height: cs.Height, - Round: cs.Round, - Timestamp: cs.voteTime(), - Type: msgType, - BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, - } - v := vote.ToProto() - err := cs.privValidator.SignVote(cs.state.ChainID, v) - vote.Signature = v.Signature - - return vote, err -} - -func (cs *State) voteTime() time.Time { - now := tmtime.Now() - minVoteTime := now - // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, - // even if cs.LockedBlock != nil. See https://docs.tendermint.com/master/spec/. - timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond - if cs.LockedBlock != nil { - // See the BFT time spec https://docs.tendermint.com/master/spec/consensus/bft-time.html - minVoteTime = cs.LockedBlock.Time.Add(timeIota) - } else if cs.ProposalBlock != nil { - minVoteTime = cs.ProposalBlock.Time.Add(timeIota) - } - - if now.After(minVoteTime) { - return now - } - return minVoteTime -} - -// sign the vote and publish on internalMsgQueue -func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { - if cs.privValidator == nil { // the node does not have a key - return nil - } - - if cs.privValidatorPubKey == nil { - // Vote won't be signed, but it's not critical. - cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) - return nil - } - - // If the node not in the validator set, do nothing. - if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { - return nil - } - - // TODO: pass pubKey to signVote - vote, err := cs.signVote(msgType, hash, header) - if err == nil { - cs.sendInternalMessage(msgInfo{&tmcon.VoteMessage{Vote: vote}, ""}) - cs.Logger.Info("Signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) - return vote - } - // if !cs.replayMode { - cs.Logger.Error("Error signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) - //} - return nil -} - -// updatePrivValidatorPubKey get's the private validator public key and -// memoizes it. This func returns an error if the private validator is not -// responding or responds with an error. -func (cs *State) updatePrivValidatorPubKey() error { - if cs.privValidator == nil { - return nil - } - - pubKey, err := cs.privValidator.GetPubKey() - if err != nil { - return err - } - cs.privValidatorPubKey = pubKey - return nil -} - -// look back to check existence of the node's consensus votes before joining consensus -func (cs *State) checkDoubleSigningRisk(height int64) error { - if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { - valAddr := cs.privValidatorPubKey.Address() - doubleSignCheckHeight := cs.config.DoubleSignCheckHeight - if doubleSignCheckHeight > height { - doubleSignCheckHeight = height - } - for i := int64(1); i < doubleSignCheckHeight; i++ { - lastCommit := cs.blockStore.LoadSeenCommit(height - i) - if lastCommit != nil { - for sigIdx, s := range lastCommit.Signatures { - if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { - cs.Logger.Info("Found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) - return ErrSignatureFoundInPastBlocks - } - } - } - } - } - return nil -} - -//--------------------------------------------------------- - -func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { - if h1 < h2 { - return -1 - } else if h1 > h2 { - return 1 - } - if r1 < r2 { - return -1 - } else if r1 > r2 { - return 1 - } - if s1 < s2 { - return -1 - } else if s1 > s2 { - return 1 - } - return 0 -} - -// repairWalFile decodes messages from src (until the decoder errors) and -// writes them to dst. -func repairWalFile(src, dst string) error { - in, err := os.Open(src) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Open(dst) - if err != nil { - return err - } - defer out.Close() - - var ( - dec = NewWALDecoder(in) - enc = NewWALEncoder(out) - ) - - // best-case repair (until first error is encountered) - for { - msg, err := dec.Decode() - if err != nil { - break - } - - err = enc.Encode(msg) - if err != nil { - return fmt.Errorf("failed to encode msg: %w", err) - } - } - - return nil -} diff --git a/test/maverick/consensus/ticker.go b/test/maverick/consensus/ticker.go deleted file mode 100644 index fb3571ac8..000000000 --- a/test/maverick/consensus/ticker.go +++ /dev/null @@ -1,134 +0,0 @@ -package consensus - -import ( - "time" - - "github.com/tendermint/tendermint/libs/log" - "github.com/tendermint/tendermint/libs/service" -) - -var ( - tickTockBufferSize = 10 -) - -// TimeoutTicker is a timer that schedules timeouts -// conditional on the height/round/step in the timeoutInfo. -// The timeoutInfo.Duration may be non-positive. -type TimeoutTicker interface { - Start() error - Stop() error - Chan() <-chan timeoutInfo // on which to receive a timeout - ScheduleTimeout(ti timeoutInfo) // reset the timer - - SetLogger(log.Logger) -} - -// timeoutTicker wraps time.Timer, -// scheduling timeouts only for greater height/round/step -// than what it's already seen. -// Timeouts are scheduled along the tickChan, -// and fired on the tockChan. -type timeoutTicker struct { - service.BaseService - - timer *time.Timer - tickChan chan timeoutInfo // for scheduling timeouts - tockChan chan timeoutInfo // for notifying about them -} - -// NewTimeoutTicker returns a new TimeoutTicker. -func NewTimeoutTicker() TimeoutTicker { - tt := &timeoutTicker{ - timer: time.NewTimer(0), - tickChan: make(chan timeoutInfo, tickTockBufferSize), - tockChan: make(chan timeoutInfo, tickTockBufferSize), - } - tt.BaseService = *service.NewBaseService(nil, "TimeoutTicker", tt) - tt.stopTimer() // don't want to fire until the first scheduled timeout - return tt -} - -// OnStart implements service.Service. It starts the timeout routine. -func (t *timeoutTicker) OnStart() error { - - go t.timeoutRoutine() - - return nil -} - -// OnStop implements service.Service. It stops the timeout routine. -func (t *timeoutTicker) OnStop() { - t.BaseService.OnStop() - t.stopTimer() -} - -// Chan returns a channel on which timeouts are sent. -func (t *timeoutTicker) Chan() <-chan timeoutInfo { - return t.tockChan -} - -// ScheduleTimeout schedules a new timeout by sending on the internal tickChan. -// The timeoutRoutine is always available to read from tickChan, so this won't block. -// The scheduling may fail if the timeoutRoutine has already scheduled a timeout for a later height/round/step. -func (t *timeoutTicker) ScheduleTimeout(ti timeoutInfo) { - t.tickChan <- ti -} - -//------------------------------------------------------------- - -// stop the timer and drain if necessary -func (t *timeoutTicker) stopTimer() { - // Stop() returns false if it was already fired or was stopped - if !t.timer.Stop() { - select { - case <-t.timer.C: - default: - t.Logger.Debug("Timer already stopped") - } - } -} - -// send on tickChan to start a new timer. -// timers are interupted and replaced by new ticks from later steps -// timeouts of 0 on the tickChan will be immediately relayed to the tockChan -func (t *timeoutTicker) timeoutRoutine() { - t.Logger.Debug("Starting timeout routine") - var ti timeoutInfo - for { - select { - case newti := <-t.tickChan: - t.Logger.Debug("Received tick", "old_ti", ti, "new_ti", newti) - - // ignore tickers for old height/round/step - if newti.Height < ti.Height { - continue - } else if newti.Height == ti.Height { - if newti.Round < ti.Round { - continue - } else if newti.Round == ti.Round { - if ti.Step > 0 && newti.Step <= ti.Step { - continue - } - } - } - - // stop the last timer - t.stopTimer() - - // update timeoutInfo and reset timer - // NOTE time.Timer allows duration to be non-positive - ti = newti - t.timer.Reset(ti.Duration) - t.Logger.Debug("Scheduled timeout", "dur", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) - case <-t.timer.C: - t.Logger.Info("Timed out", "dur", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) - // go routine here guarantees timeoutRoutine doesn't block. - // Determinism comes from playback in the receiveRoutine. - // We can eliminate it by merging the timeoutRoutine into receiveRoutine - // and managing the timeouts ourselves with a millisecond ticker - go func(toi timeoutInfo) { t.tockChan <- toi }(ti) - case <-t.Quit(): - return - } - } -} diff --git a/test/maverick/consensus/wal.go b/test/maverick/consensus/wal.go deleted file mode 100644 index 99b929456..000000000 --- a/test/maverick/consensus/wal.go +++ /dev/null @@ -1,407 +0,0 @@ -package consensus - -import ( - "encoding/binary" - "errors" - "fmt" - "hash/crc32" - "io" - "path/filepath" - "time" - - "github.com/gogo/protobuf/proto" - - // tmjson "github.com/tendermint/tendermint/libs/json" - tmcon "github.com/tendermint/tendermint/consensus" - auto "github.com/tendermint/tendermint/libs/autofile" - "github.com/tendermint/tendermint/libs/log" - tmos "github.com/tendermint/tendermint/libs/os" - "github.com/tendermint/tendermint/libs/service" - tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus" - tmtime "github.com/tendermint/tendermint/types/time" -) - -const ( - // time.Time + max consensus msg size - maxMsgSizeBytes = maxMsgSize + 24 - - // how often the WAL should be sync'd during period sync'ing - walDefaultFlushInterval = 2 * time.Second -) - -//-------------------------------------------------------- -// types and functions for savings consensus messages -// func init() { -// tmjson.RegisterType(msgInfo{}, "tendermint/wal/MsgInfo") -// tmjson.RegisterType(timeoutInfo{}, "tendermint/wal/TimeoutInfo") -// tmjson.RegisterType(tmcon.EndHeightMessage {}, "tendermint/wal/EndHeightMessage ") -// } - -// Write ahead logger writes msgs to disk before they are processed. -// Can be used for crash-recovery and deterministic replay. -// TODO: currently the wal is overwritten during replay catchup, give it a mode -// so it's either reading or appending - must read to end to start appending -// again. -type BaseWAL struct { - service.BaseService - - group *auto.Group - - enc *WALEncoder - - flushTicker *time.Ticker - flushInterval time.Duration -} - -var _ tmcon.WAL = &BaseWAL{} - -// NewWAL returns a new write-ahead logger based on `baseWAL`, which implements -// WAL. It's flushed and synced to disk every 2s and once when stopped. -func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { - err := tmos.EnsureDir(filepath.Dir(walFile), 0700) - if err != nil { - return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err) - } - - group, err := auto.OpenGroup(walFile, groupOptions...) - if err != nil { - return nil, err - } - wal := &BaseWAL{ - group: group, - enc: NewWALEncoder(group), - flushInterval: walDefaultFlushInterval, - } - wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal) - return wal, nil -} - -// SetFlushInterval allows us to override the periodic flush interval for the WAL. -func (wal *BaseWAL) SetFlushInterval(i time.Duration) { - wal.flushInterval = i -} - -func (wal *BaseWAL) Group() *auto.Group { - return wal.group -} - -func (wal *BaseWAL) SetLogger(l log.Logger) { - wal.BaseService.Logger = l - wal.group.SetLogger(l) -} - -func (wal *BaseWAL) OnStart() error { - size, err := wal.group.Head.Size() - if err != nil { - return err - } else if size == 0 { - if err := wal.WriteSync(tmcon.EndHeightMessage{Height: 0}); err != nil { - return err - } - } - err = wal.group.Start() - if err != nil { - return err - } - wal.flushTicker = time.NewTicker(wal.flushInterval) - go wal.processFlushTicks() - return nil -} - -func (wal *BaseWAL) processFlushTicks() { - for { - select { - case <-wal.flushTicker.C: - if err := wal.FlushAndSync(); err != nil { - wal.Logger.Error("Periodic WAL flush failed", "err", err) - } - case <-wal.Quit(): - return - } - } -} - -// FlushAndSync flushes and fsync's the underlying group's data to disk. -// See auto#FlushAndSync -func (wal *BaseWAL) FlushAndSync() error { - return wal.group.FlushAndSync() -} - -// Stop the underlying autofile group. -// Use Wait() to ensure it's finished shutting down -// before cleaning up files. -func (wal *BaseWAL) OnStop() { - wal.flushTicker.Stop() - if err := wal.FlushAndSync(); err != nil { - wal.Logger.Error("error on flush data to disk", "error", err) - } - if err := wal.group.Stop(); err != nil { - wal.Logger.Error("error trying to stop wal", "error", err) - } - wal.group.Close() -} - -// Wait for the underlying autofile group to finish shutting down -// so it's safe to cleanup files. -func (wal *BaseWAL) Wait() { - wal.group.Wait() -} - -// Write is called in newStep and for each receive on the -// peerMsgQueue and the timeoutTicker. -// NOTE: does not call fsync() -func (wal *BaseWAL) Write(msg tmcon.WALMessage) error { - if wal == nil { - return nil - } - - if err := wal.enc.Encode(&tmcon.TimedWALMessage{Time: tmtime.Now(), Msg: msg}); err != nil { - wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height", - "err", err, "msg", msg) - return err - } - - return nil -} - -// WriteSync is called when we receive a msg from ourselves -// so that we write to disk before sending signed messages. -// NOTE: calls fsync() -func (wal *BaseWAL) WriteSync(msg tmcon.WALMessage) error { - if wal == nil { - return nil - } - - if err := wal.Write(msg); err != nil { - return err - } - - if err := wal.FlushAndSync(); err != nil { - wal.Logger.Error(`WriteSync failed to flush consensus wal. - WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, - "err", err) - return err - } - - return nil -} - -// WALSearchOptions are optional arguments to SearchForEndHeight. -type WALSearchOptions struct { - // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. - IgnoreDataCorruptionErrors bool -} - -// SearchForEndHeight searches for the EndHeightMessage with the given height -// and returns an auto.GroupReader, whenever it was found or not and an error. -// Group reader will be nil if found equals false. -// -// CONTRACT: caller must close group reader. -func (wal *BaseWAL) SearchForEndHeight( - height int64, - options *tmcon.WALSearchOptions) (rd io.ReadCloser, found bool, err error) { - var ( - msg *tmcon.TimedWALMessage - gr *auto.GroupReader - ) - lastHeightFound := int64(-1) - - // NOTE: starting from the last file in the group because we're usually - // searching for the last height. See replay.go - min, max := wal.group.MinIndex(), wal.group.MaxIndex() - wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) - for index := max; index >= min; index-- { - gr, err = wal.group.NewReader(index) - if err != nil { - return nil, false, err - } - - dec := NewWALDecoder(gr) - for { - msg, err = dec.Decode() - if err == io.EOF { - // OPTIMISATION: no need to look for height in older files if we've seen h < height - if lastHeightFound > 0 && lastHeightFound < height { - gr.Close() - return nil, false, nil - } - // check next file - break - } - if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { - wal.Logger.Error("Corrupted entry. Skipping...", "err", err) - // do nothing - continue - } else if err != nil { - gr.Close() - return nil, false, err - } - - if m, ok := msg.Msg.(tmcon.EndHeightMessage); ok { - lastHeightFound = m.Height - if m.Height == height { // found - wal.Logger.Info("Found", "height", height, "index", index) - return gr, true, nil - } - } - } - gr.Close() - } - - return nil, false, nil -} - -// ///////////////////////////////////////////////////////////////////////////// - -// A WALEncoder writes custom-encoded WAL messages to an output stream. -// -// Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value -type WALEncoder struct { - wr io.Writer -} - -// NewWALEncoder returns a new encoder that writes to wr. -func NewWALEncoder(wr io.Writer) *WALEncoder { - return &WALEncoder{wr} -} - -// Encode writes the custom encoding of v to the stream. It returns an error if -// the encoded size of v is greater than 1MB. Any error encountered -// during the write is also returned. -func (enc *WALEncoder) Encode(v *tmcon.TimedWALMessage) error { - pbMsg, err := WALToProto(v.Msg) - if err != nil { - return err - } - pv := tmcons.TimedWALMessage{ - Time: v.Time, - Msg: pbMsg, - } - - data, err := proto.Marshal(&pv) - if err != nil { - panic(fmt.Errorf("encode timed wall message failure: %w", err)) - } - - crc := crc32.Checksum(data, crc32c) - length := uint32(len(data)) - if length > maxMsgSizeBytes { - return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) - } - totalLength := 8 + int(length) - - msg := make([]byte, totalLength) - binary.BigEndian.PutUint32(msg[0:4], crc) - binary.BigEndian.PutUint32(msg[4:8], length) - copy(msg[8:], data) - - _, err = enc.wr.Write(msg) - return err -} - -// ///////////////////////////////////////////////////////////////////////////// - -// IsDataCorruptionError returns true if data has been corrupted inside WAL. -func IsDataCorruptionError(err error) bool { - _, ok := err.(DataCorruptionError) - return ok -} - -// DataCorruptionError is an error that occures if data on disk was corrupted. -type DataCorruptionError struct { - cause error -} - -func (e DataCorruptionError) Error() string { - return fmt.Sprintf("DataCorruptionError[%v]", e.cause) -} - -func (e DataCorruptionError) Cause() error { - return e.cause -} - -// A WALDecoder reads and decodes custom-encoded WAL messages from an input -// stream. See WALEncoder for the format used. -// -// It will also compare the checksums and make sure data size is equal to the -// length from the header. If that is not the case, error will be returned. -type WALDecoder struct { - rd io.Reader -} - -// NewWALDecoder returns a new decoder that reads from rd. -func NewWALDecoder(rd io.Reader) *WALDecoder { - return &WALDecoder{rd} -} - -// Decode reads the next custom-encoded value from its reader and returns it. -func (dec *WALDecoder) Decode() (*tmcon.TimedWALMessage, error) { - b := make([]byte, 4) - - _, err := dec.rd.Read(b) - if errors.Is(err, io.EOF) { - return nil, err - } - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} - } - crc := binary.BigEndian.Uint32(b) - - b = make([]byte, 4) - _, err = dec.rd.Read(b) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} - } - length := binary.BigEndian.Uint32(b) - - if length > maxMsgSizeBytes { - return nil, DataCorruptionError{fmt.Errorf( - "length %d exceeded maximum possible value of %d bytes", - length, - maxMsgSizeBytes)} - } - - data := make([]byte, length) - n, err := dec.rd.Read(data) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} - } - - // check checksum before decoding data - actualCRC := crc32.Checksum(data, crc32c) - if actualCRC != crc { - return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} - } - - var res = new(tmcons.TimedWALMessage) - err = proto.Unmarshal(data, res) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} - } - - walMsg, err := WALFromProto(res.Msg) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)} - } - tMsgWal := &tmcon.TimedWALMessage{ - Time: res.Time, - Msg: walMsg, - } - - return tMsgWal, err -} - -type nilWAL struct{} - -var _ tmcon.WAL = nilWAL{} - -func (nilWAL) Write(m tmcon.WALMessage) error { return nil } -func (nilWAL) WriteSync(m tmcon.WALMessage) error { return nil } -func (nilWAL) FlushAndSync() error { return nil } -func (nilWAL) SearchForEndHeight(height int64, - options *tmcon.WALSearchOptions) (rd io.ReadCloser, found bool, err error) { - return nil, false, nil -} -func (nilWAL) Start() error { return nil } -func (nilWAL) Stop() error { return nil } -func (nilWAL) Wait() {} diff --git a/test/maverick/consensus/wal_generator.go b/test/maverick/consensus/wal_generator.go deleted file mode 100644 index 6997db14e..000000000 --- a/test/maverick/consensus/wal_generator.go +++ /dev/null @@ -1,230 +0,0 @@ -package consensus - -import ( - "bufio" - "bytes" - "fmt" - "io" - "path/filepath" - "testing" - "time" - - db "github.com/tendermint/tm-db" - - "github.com/tendermint/tendermint/abci/example/kvstore" - cfg "github.com/tendermint/tendermint/config" - tmcon "github.com/tendermint/tendermint/consensus" - "github.com/tendermint/tendermint/libs/log" - tmrand "github.com/tendermint/tendermint/libs/rand" - "github.com/tendermint/tendermint/privval" - "github.com/tendermint/tendermint/proxy" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/store" - "github.com/tendermint/tendermint/types" -) - -// WALGenerateNBlocks generates a consensus WAL. It does this by spinning up a -// stripped down version of node (proxy app, event bus, consensus state) with a -// persistent kvstore application and special consensus wal instance -// (byteBufferWAL) and waits until numBlocks are created. -// If the node fails to produce given numBlocks, it returns an error. -func WALGenerateNBlocks(t *testing.T, wr io.Writer, numBlocks int) (err error) { - config := getConfig(t) - - app := kvstore.NewPersistentKVStoreApplication(filepath.Join(config.DBDir(), "wal_generator")) - - logger := log.TestingLogger().With("wal_generator", "wal_generator") - logger.Info("generating WAL (last height msg excluded)", "numBlocks", numBlocks) - - // /////////////////////////////////////////////////////////////////////////// - // COPY PASTE FROM node.go WITH A FEW MODIFICATIONS - // NOTE: we can't import node package because of circular dependency. - // NOTE: we don't do handshake so need to set state.Version.Consensus.App directly. - privValidatorKeyFile := config.PrivValidatorKeyFile() - privValidatorStateFile := config.PrivValidatorStateFile() - privValidator := privval.LoadOrGenFilePV(privValidatorKeyFile, privValidatorStateFile) - genDoc, err := types.GenesisDocFromFile(config.GenesisFile()) - if err != nil { - return fmt.Errorf("failed to read genesis file: %w", err) - } - blockStoreDB := db.NewMemDB() - stateDB := blockStoreDB - stateStore := sm.NewStore(stateDB) - state, err := sm.MakeGenesisState(genDoc) - if err != nil { - return fmt.Errorf("failed to make genesis state: %w", err) - } - state.Version.Consensus.App = kvstore.ProtocolVersion - if err = stateStore.Save(state); err != nil { - t.Error(err) - } - - blockStore := store.NewBlockStore(blockStoreDB) - - proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app)) - proxyApp.SetLogger(logger.With("module", "proxy")) - if err := proxyApp.Start(); err != nil { - return fmt.Errorf("failed to start proxy app connections: %w", err) - } - t.Cleanup(func() { - if err := proxyApp.Stop(); err != nil { - t.Error(err) - } - }) - - eventBus := types.NewEventBus() - eventBus.SetLogger(logger.With("module", "events")) - if err := eventBus.Start(); err != nil { - return fmt.Errorf("failed to start event bus: %w", err) - } - t.Cleanup(func() { - if err := eventBus.Stop(); err != nil { - t.Error(err) - } - }) - mempool := emptyMempool{} - evpool := sm.EmptyEvidencePool{} - blockExec := sm.NewBlockExecutor(stateStore, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) - consensusState := NewState(config.Consensus, state.Copy(), - blockExec, blockStore, mempool, evpool, map[int64]Misbehavior{}) - consensusState.SetLogger(logger) - consensusState.SetEventBus(eventBus) - if privValidator != nil { - consensusState.SetPrivValidator(privValidator) - } - // END OF COPY PASTE - // /////////////////////////////////////////////////////////////////////////// - - // set consensus wal to buffered WAL, which will write all incoming msgs to buffer - numBlocksWritten := make(chan struct{}) - wal := newByteBufferWAL(logger, NewWALEncoder(wr), int64(numBlocks), numBlocksWritten) - // see wal.go#103 - if err := wal.Write(tmcon.EndHeightMessage{Height: 0}); err != nil { - t.Error(err) - } - - consensusState.wal = wal - - if err := consensusState.Start(); err != nil { - return fmt.Errorf("failed to start consensus state: %w", err) - } - - select { - case <-numBlocksWritten: - if err := consensusState.Stop(); err != nil { - t.Error(err) - } - return nil - case <-time.After(1 * time.Minute): - if err := consensusState.Stop(); err != nil { - t.Error(err) - } - return fmt.Errorf("waited too long for tendermint to produce %d blocks (grep logs for `wal_generator`)", numBlocks) - } -} - -// WALWithNBlocks returns a WAL content with numBlocks. -func WALWithNBlocks(t *testing.T, numBlocks int) (data []byte, err error) { - var b bytes.Buffer - wr := bufio.NewWriter(&b) - - if err := WALGenerateNBlocks(t, wr, numBlocks); err != nil { - return []byte{}, err - } - - wr.Flush() - return b.Bytes(), nil -} - -func randPort() int { - // returns between base and base + spread - base, spread := 20000, 20000 - return base + tmrand.Intn(spread) -} - -func makeAddrs() (string, string, string) { - start := randPort() - return fmt.Sprintf("tcp://127.0.0.1:%d", start), - fmt.Sprintf("tcp://127.0.0.1:%d", start+1), - fmt.Sprintf("tcp://127.0.0.1:%d", start+2) -} - -// getConfig returns a config for test cases -func getConfig(t *testing.T) *cfg.Config { - c := cfg.ResetTestRoot(t.Name()) - - // and we use random ports to run in parallel - tm, rpc, grpc := makeAddrs() - c.P2P.ListenAddress = tm - c.RPC.ListenAddress = rpc - c.RPC.GRPCListenAddress = grpc - return c -} - -// byteBufferWAL is a WAL which writes all msgs to a byte buffer. Writing stops -// when the heightToStop is reached. Client will be notified via -// signalWhenStopsTo channel. -type byteBufferWAL struct { - enc *WALEncoder - stopped bool - heightToStop int64 - signalWhenStopsTo chan<- struct{} - - logger log.Logger -} - -// needed for determinism -var fixedTime, _ = time.Parse(time.RFC3339, "2017-01-02T15:04:05Z") - -func newByteBufferWAL(logger log.Logger, enc *WALEncoder, nBlocks int64, signalStop chan<- struct{}) *byteBufferWAL { - return &byteBufferWAL{ - enc: enc, - heightToStop: nBlocks, - signalWhenStopsTo: signalStop, - logger: logger, - } -} - -// Save writes message to the internal buffer except when heightToStop is -// reached, in which case it will signal the caller via signalWhenStopsTo and -// skip writing. -func (w *byteBufferWAL) Write(m tmcon.WALMessage) error { - if w.stopped { - w.logger.Debug("WAL already stopped. Not writing message", "msg", m) - return nil - } - - if endMsg, ok := m.(tmcon.EndHeightMessage); ok { - w.logger.Debug("WAL write end height message", "height", endMsg.Height, "stopHeight", w.heightToStop) - if endMsg.Height == w.heightToStop { - w.logger.Debug("Stopping WAL at height", "height", endMsg.Height) - w.signalWhenStopsTo <- struct{}{} - w.stopped = true - return nil - } - } - - w.logger.Debug("WAL Write Message", "msg", m) - err := w.enc.Encode(&tmcon.TimedWALMessage{Time: fixedTime, Msg: m}) - if err != nil { - panic(fmt.Sprintf("failed to encode the msg %v", m)) - } - - return nil -} - -func (w *byteBufferWAL) WriteSync(m tmcon.WALMessage) error { - return w.Write(m) -} - -func (w *byteBufferWAL) FlushAndSync() error { return nil } - -func (w *byteBufferWAL) SearchForEndHeight( - height int64, - options *tmcon.WALSearchOptions) (rd io.ReadCloser, found bool, err error) { - return nil, false, nil -} - -func (w *byteBufferWAL) Start() error { return nil } -func (w *byteBufferWAL) Stop() error { return nil } -func (w *byteBufferWAL) Wait() {} diff --git a/test/maverick/main.go b/test/maverick/main.go deleted file mode 100644 index a3c42e28f..000000000 --- a/test/maverick/main.go +++ /dev/null @@ -1,242 +0,0 @@ -package main - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/spf13/cobra" - "github.com/spf13/viper" - - cmd "github.com/tendermint/tendermint/cmd/tendermint/commands" - "github.com/tendermint/tendermint/cmd/tendermint/commands/debug" - cfg "github.com/tendermint/tendermint/config" - "github.com/tendermint/tendermint/libs/cli" - tmflags "github.com/tendermint/tendermint/libs/cli/flags" - "github.com/tendermint/tendermint/libs/log" - tmos "github.com/tendermint/tendermint/libs/os" - tmrand "github.com/tendermint/tendermint/libs/rand" - "github.com/tendermint/tendermint/p2p" - cs "github.com/tendermint/tendermint/test/maverick/consensus" - nd "github.com/tendermint/tendermint/test/maverick/node" - "github.com/tendermint/tendermint/types" - tmtime "github.com/tendermint/tendermint/types/time" -) - -var ( - config = cfg.DefaultConfig() - logger = log.NewTMLogger(log.NewSyncWriter(os.Stdout)) - misbehaviorFlag = "" -) - -func init() { - registerFlagsRootCmd(RootCmd) -} - -func registerFlagsRootCmd(command *cobra.Command) { - command.PersistentFlags().String("log_level", config.LogLevel, "Log level") -} - -func ParseConfig() (*cfg.Config, error) { - conf := cfg.DefaultConfig() - err := viper.Unmarshal(conf) - if err != nil { - return nil, err - } - conf.SetRoot(conf.RootDir) - cfg.EnsureRoot(conf.RootDir) - if err = conf.ValidateBasic(); err != nil { - return nil, fmt.Errorf("error in config file: %v", err) - } - return conf, err -} - -// RootCmd is the root command for Tendermint core. -var RootCmd = &cobra.Command{ - Use: "maverick", - Short: "Tendermint Maverick Node", - Long: "Tendermint Maverick Node for testing with faulty consensus misbehaviors in a testnet. Contains " + - "all the functionality of a normal node but custom misbehaviors can be injected when running the node " + - "through a flag. See maverick node --help for how the misbehavior flag is constructured", - PersistentPreRunE: func(cmd *cobra.Command, args []string) (err error) { - fmt.Printf("use: %v, args: %v", cmd.Use, cmd.Args) - - config, err = ParseConfig() - if err != nil { - return err - } - - if config.LogFormat == cfg.LogFormatJSON { - logger = log.NewTMJSONLogger(log.NewSyncWriter(os.Stdout)) - } - - logger, err = tmflags.ParseLogLevel(config.LogLevel, logger, cfg.DefaultLogLevel) - if err != nil { - return err - } - - if viper.GetBool(cli.TraceFlag) { - logger = log.NewTracingLogger(logger) - } - - logger = logger.With("module", "main") - return nil - }, -} - -func main() { - rootCmd := RootCmd - rootCmd.AddCommand( - ListMisbehaviorCmd, - cmd.GenValidatorCmd, - InitFilesCmd, - cmd.ProbeUpnpCmd, - cmd.ReplayCmd, - cmd.ReplayConsoleCmd, - cmd.ResetAllCmd, - cmd.ResetPrivValidatorCmd, - cmd.ShowValidatorCmd, - cmd.ShowNodeIDCmd, - cmd.GenNodeKeyCmd, - cmd.VersionCmd, - debug.DebugCmd, - cli.NewCompletionCmd(rootCmd, true), - ) - - nodeCmd := &cobra.Command{ - Use: "node", - Short: "Run the maverick node", - RunE: func(command *cobra.Command, args []string) error { - return startNode(config, logger, misbehaviorFlag) - }, - } - - cmd.AddNodeFlags(nodeCmd) - - // Create & start node - rootCmd.AddCommand(nodeCmd) - - // add special flag for misbehaviors - nodeCmd.Flags().StringVar( - &misbehaviorFlag, - "misbehaviors", - "", - "Select the misbehaviors of the node (comma-separated, no spaces in between): \n"+ - "e.g. --misbehaviors double-prevote,3\n"+ - "You can also have multiple misbehaviors: e.g. double-prevote,3,no-vote,5") - - cmd := cli.PrepareBaseCmd(rootCmd, "TM", os.ExpandEnv(filepath.Join("$HOME", cfg.DefaultTendermintDir))) - if err := cmd.Execute(); err != nil { - panic(err) - } -} - -func startNode(config *cfg.Config, logger log.Logger, misbehaviorFlag string) error { - misbehaviors, err := nd.ParseMisbehaviors(misbehaviorFlag) - if err != nil { - return err - } - - node, err := nd.DefaultNewNode(config, logger, misbehaviors) - if err != nil { - return fmt.Errorf("failed to create node: %w", err) - } - - if err := node.Start(); err != nil { - return fmt.Errorf("failed to start node: %w", err) - } - - logger.Info("Started node", "nodeInfo", node.Switch().NodeInfo()) - - // Stop upon receiving SIGTERM or CTRL-C. - tmos.TrapSignal(logger, func() { - if node.IsRunning() { - if err := node.Stop(); err != nil { - logger.Error("unable to stop the node", "error", err) - } - } - }) - - // Run forever. - select {} -} - -var InitFilesCmd = &cobra.Command{ - Use: "init", - Short: "Initialize Tendermint", - RunE: initFiles, -} - -func initFiles(cmd *cobra.Command, args []string) error { - return initFilesWithConfig(config) -} - -func initFilesWithConfig(config *cfg.Config) error { - // private validator - privValKeyFile := config.PrivValidatorKeyFile() - privValStateFile := config.PrivValidatorStateFile() - var pv *nd.FilePV - if tmos.FileExists(privValKeyFile) { - pv = nd.LoadFilePV(privValKeyFile, privValStateFile) - logger.Info("Found private validator", "keyFile", privValKeyFile, - "stateFile", privValStateFile) - } else { - pv = nd.GenFilePV(privValKeyFile, privValStateFile) - pv.Save() - logger.Info("Generated private validator", "keyFile", privValKeyFile, - "stateFile", privValStateFile) - } - - nodeKeyFile := config.NodeKeyFile() - if tmos.FileExists(nodeKeyFile) { - logger.Info("Found node key", "path", nodeKeyFile) - } else { - if _, err := p2p.LoadOrGenNodeKey(nodeKeyFile); err != nil { - return err - } - logger.Info("Generated node key", "path", nodeKeyFile) - } - - // genesis file - genFile := config.GenesisFile() - if tmos.FileExists(genFile) { - logger.Info("Found genesis file", "path", genFile) - } else { - genDoc := types.GenesisDoc{ - ChainID: fmt.Sprintf("test-chain-%v", tmrand.Str(6)), - GenesisTime: tmtime.Now(), - ConsensusParams: types.DefaultConsensusParams(), - } - pubKey, err := pv.GetPubKey() - if err != nil { - return fmt.Errorf("can't get pubkey: %w", err) - } - genDoc.Validators = []types.GenesisValidator{{ - Address: pubKey.Address(), - PubKey: pubKey, - Power: 10, - }} - - if err := genDoc.SaveAs(genFile); err != nil { - return err - } - logger.Info("Generated genesis file", "path", genFile) - } - - return nil -} - -var ListMisbehaviorCmd = &cobra.Command{ - Use: "misbehaviors", - Short: "Lists possible misbehaviors", - RunE: listMisbehaviors, -} - -func listMisbehaviors(cmd *cobra.Command, args []string) error { - str := "Currently registered misbehaviors: \n" - for key := range cs.MisbehaviorList { - str += fmt.Sprintf("- %s\n", key) - } - fmt.Println(str) - return nil -} diff --git a/test/maverick/node/node.go b/test/maverick/node/node.go deleted file mode 100644 index 919554c6d..000000000 --- a/test/maverick/node/node.go +++ /dev/null @@ -1,1484 +0,0 @@ -package node - -import ( - "bytes" - "context" - "errors" - "fmt" - "net" - "net/http" - _ "net/http/pprof" // nolint: gosec // securely exposed on separate, optional port - "strconv" - "strings" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/rs/cors" - - dbm "github.com/tendermint/tm-db" - - abci "github.com/tendermint/tendermint/abci/types" - bcv0 "github.com/tendermint/tendermint/blockchain/v0" - bcv1 "github.com/tendermint/tendermint/blockchain/v1" - bcv2 "github.com/tendermint/tendermint/blockchain/v2" - cfg "github.com/tendermint/tendermint/config" - "github.com/tendermint/tendermint/consensus" - "github.com/tendermint/tendermint/crypto" - "github.com/tendermint/tendermint/evidence" - tmjson "github.com/tendermint/tendermint/libs/json" - "github.com/tendermint/tendermint/libs/log" - tmpubsub "github.com/tendermint/tendermint/libs/pubsub" - "github.com/tendermint/tendermint/libs/service" - "github.com/tendermint/tendermint/light" - mempl "github.com/tendermint/tendermint/mempool" - mempoolv0 "github.com/tendermint/tendermint/mempool/v0" - mempoolv1 "github.com/tendermint/tendermint/mempool/v1" - "github.com/tendermint/tendermint/p2p" - "github.com/tendermint/tendermint/p2p/pex" - "github.com/tendermint/tendermint/privval" - "github.com/tendermint/tendermint/proxy" - rpccore "github.com/tendermint/tendermint/rpc/core" - grpccore "github.com/tendermint/tendermint/rpc/grpc" - rpcserver "github.com/tendermint/tendermint/rpc/jsonrpc/server" - sm "github.com/tendermint/tendermint/state" - "github.com/tendermint/tendermint/state/indexer" - blockidxkv "github.com/tendermint/tendermint/state/indexer/block/kv" - blockidxnull "github.com/tendermint/tendermint/state/indexer/block/null" - "github.com/tendermint/tendermint/state/txindex" - "github.com/tendermint/tendermint/state/txindex/kv" - "github.com/tendermint/tendermint/state/txindex/null" - "github.com/tendermint/tendermint/statesync" - "github.com/tendermint/tendermint/store" - cs "github.com/tendermint/tendermint/test/maverick/consensus" - "github.com/tendermint/tendermint/types" - tmtime "github.com/tendermint/tendermint/types/time" - "github.com/tendermint/tendermint/version" -) - -//------------------------------------------------------------------------------ - -// ParseMisbehaviors is a util function that converts a comma separated string into -// a map of misbehaviors to be executed by the maverick node -func ParseMisbehaviors(str string) (map[int64]cs.Misbehavior, error) { - // check if string is empty in which case we run a normal node - var misbehaviors = make(map[int64]cs.Misbehavior) - if str == "" { - return misbehaviors, nil - } - strs := strings.Split(str, ",") - if len(strs)%2 != 0 { - return misbehaviors, errors.New("missing either height or misbehavior name in the misbehavior flag") - } -OUTER_LOOP: - for i := 0; i < len(strs); i += 2 { - height, err := strconv.ParseInt(strs[i+1], 10, 64) - if err != nil { - return misbehaviors, fmt.Errorf("failed to parse misbehavior height: %w", err) - } - for key, misbehavior := range cs.MisbehaviorList { - if key == strs[i] { - misbehaviors[height] = misbehavior - continue OUTER_LOOP - } - } - return misbehaviors, fmt.Errorf("received unknown misbehavior: %s. Did you forget to add it?", strs[i]) - } - - return misbehaviors, nil -} - -// DBContext specifies config information for loading a new DB. -type DBContext struct { - ID string - Config *cfg.Config -} - -// DBProvider takes a DBContext and returns an instantiated DB. -type DBProvider func(*DBContext) (dbm.DB, error) - -// DefaultDBProvider returns a database using the DBBackend and DBDir -// specified in the ctx.Config. -func DefaultDBProvider(ctx *DBContext) (dbm.DB, error) { - dbType := dbm.BackendType(ctx.Config.DBBackend) - return dbm.NewDB(ctx.ID, dbType, ctx.Config.DBDir()) -} - -// GenesisDocProvider returns a GenesisDoc. -// It allows the GenesisDoc to be pulled from sources other than the -// filesystem, for instance from a distributed key-value store cluster. -type GenesisDocProvider func() (*types.GenesisDoc, error) - -// DefaultGenesisDocProviderFunc returns a GenesisDocProvider that loads -// the GenesisDoc from the config.GenesisFile() on the filesystem. -func DefaultGenesisDocProviderFunc(config *cfg.Config) GenesisDocProvider { - return func() (*types.GenesisDoc, error) { - return types.GenesisDocFromFile(config.GenesisFile()) - } -} - -// Provider takes a config and a logger and returns a ready to go Node. -type Provider func(*cfg.Config, log.Logger) (*Node, error) - -// DefaultNewNode returns a Tendermint node with default settings for the -// PrivValidator, ClientCreator, GenesisDoc, and DBProvider. -// It implements NodeProvider. -func DefaultNewNode(config *cfg.Config, logger log.Logger, misbehaviors map[int64]cs.Misbehavior) (*Node, error) { - nodeKey, err := p2p.LoadOrGenNodeKey(config.NodeKeyFile()) - if err != nil { - return nil, fmt.Errorf("failed to load or gen node key %s, err: %w", config.NodeKeyFile(), err) - } - - return NewNode(config, - LoadOrGenFilePV(config.PrivValidatorKeyFile(), config.PrivValidatorStateFile()), - nodeKey, - proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()), - DefaultGenesisDocProviderFunc(config), - DefaultDBProvider, - DefaultMetricsProvider(config.Instrumentation), - logger, - misbehaviors, - ) - -} - -// MetricsProvider returns a consensus, p2p and mempool Metrics. -type MetricsProvider func(chainID string) (*consensus.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) - -// DefaultMetricsProvider returns Metrics build using Prometheus client library -// if Prometheus is enabled. Otherwise, it returns no-op Metrics. -func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider { - return func(chainID string) (*consensus.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) { - if config.Prometheus { - return consensus.PrometheusMetrics(config.Namespace, "chain_id", chainID), - p2p.PrometheusMetrics(config.Namespace, "chain_id", chainID), - mempl.PrometheusMetrics(config.Namespace, "chain_id", chainID), - sm.PrometheusMetrics(config.Namespace, "chain_id", chainID) - } - return consensus.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics() - } -} - -// Option sets a parameter for the node. -type Option func(*Node) - -// Temporary interface for switching to fast sync, we should get rid of v0 and v1 reactors. -// See: https://github.com/tendermint/tendermint/issues/4595 -type fastSyncReactor interface { - SwitchToFastSync(sm.State) error -} - -// CustomReactors allows you to add custom reactors (name -> p2p.Reactor) to -// the node's Switch. -// -// WARNING: using any name from the below list of the existing reactors will -// result in replacing it with the custom one. -// -// - MEMPOOL -// - BLOCKCHAIN -// - CONSENSUS -// - EVIDENCE -// - PEX -// - STATESYNC -func CustomReactors(reactors map[string]p2p.Reactor) Option { - return func(n *Node) { - for name, reactor := range reactors { - if existingReactor := n.sw.Reactor(name); existingReactor != nil { - n.sw.Logger.Info("Replacing existing reactor with a custom one", - "name", name, "existing", existingReactor, "custom", reactor) - n.sw.RemoveReactor(name, existingReactor) - } - n.sw.AddReactor(name, reactor) - } - } -} - -func CustomReactorsAsConstructors(reactors map[string]func(n *Node) p2p.Reactor) Option { - return func(n *Node) { - for name, customReactor := range reactors { - if existingReactor := n.sw.Reactor(name); existingReactor != nil { - n.sw.Logger.Info("Replacing existing reactor with a custom one", - "name", name) - n.sw.RemoveReactor(name, existingReactor) - } - n.sw.AddReactor(name, customReactor(n)) - } - } -} - -// StateProvider overrides the state provider used by state sync to retrieve trusted app hashes and -// build a State object for bootstrapping the node. -// WARNING: this interface is considered unstable and subject to change. -func StateProvider(stateProvider statesync.StateProvider) Option { - return func(n *Node) { - n.stateSyncProvider = stateProvider - } -} - -//------------------------------------------------------------------------------ - -// Node is the highest level interface to a full Tendermint node. -// It includes all configuration information and running services. -type Node struct { - service.BaseService - - // config - config *cfg.Config - genesisDoc *types.GenesisDoc // initial validator set - privValidator types.PrivValidator // local node's validator key - - // network - transport *p2p.MultiplexTransport - sw *p2p.Switch // p2p connections - addrBook pex.AddrBook // known peers - nodeInfo p2p.NodeInfo - nodeKey *p2p.NodeKey // our node privkey - isListening bool - - // services - eventBus *types.EventBus // pub/sub for services - stateStore sm.Store - blockStore *store.BlockStore // store the blockchain to disk - bcReactor p2p.Reactor // for fast-syncing - mempoolReactor p2p.Reactor // for gossipping transactions - mempool mempl.Mempool - stateSync bool // whether the node should state sync on startup - stateSyncReactor *statesync.Reactor // for hosting and restoring state sync snapshots - stateSyncProvider statesync.StateProvider // provides state data for bootstrapping a node - stateSyncGenesis sm.State // provides the genesis state for state sync - consensusState *cs.State // latest consensus state - consensusReactor *cs.Reactor // for participating in the consensus - pexReactor *pex.Reactor // for exchanging peer addresses - evidencePool *evidence.Pool // tracking evidence - proxyApp proxy.AppConns // connection to the application - rpcListeners []net.Listener // rpc servers - txIndexer txindex.TxIndexer - blockIndexer indexer.BlockIndexer - indexerService *txindex.IndexerService - prometheusSrv *http.Server -} - -func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, err error) { - var blockStoreDB dbm.DB - blockStoreDB, err = dbProvider(&DBContext{"blockstore", config}) - if err != nil { - return - } - blockStore = store.NewBlockStore(blockStoreDB) - - stateDB, err = dbProvider(&DBContext{"state", config}) - if err != nil { - return - } - - return -} - -func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger) (proxy.AppConns, error) { - proxyApp := proxy.NewAppConns(clientCreator) - proxyApp.SetLogger(logger.With("module", "proxy")) - if err := proxyApp.Start(); err != nil { - return nil, fmt.Errorf("error starting proxy app connections: %v", err) - } - return proxyApp, nil -} - -func createAndStartEventBus(logger log.Logger) (*types.EventBus, error) { - eventBus := types.NewEventBus() - eventBus.SetLogger(logger.With("module", "events")) - if err := eventBus.Start(); err != nil { - return nil, err - } - return eventBus, nil -} - -func createAndStartIndexerService( - config *cfg.Config, - dbProvider DBProvider, - eventBus *types.EventBus, - logger log.Logger, -) (*txindex.IndexerService, txindex.TxIndexer, indexer.BlockIndexer, error) { - - var ( - txIndexer txindex.TxIndexer - blockIndexer indexer.BlockIndexer - ) - - switch config.TxIndex.Indexer { - case "kv": - store, err := dbProvider(&DBContext{"tx_index", config}) - if err != nil { - return nil, nil, nil, err - } - - txIndexer = kv.NewTxIndex(store) - blockIndexer = blockidxkv.New(dbm.NewPrefixDB(store, []byte("block_events"))) - default: - txIndexer = &null.TxIndex{} - blockIndexer = &blockidxnull.BlockerIndexer{} - } - - indexerService := txindex.NewIndexerService(txIndexer, blockIndexer, eventBus) - indexerService.SetLogger(logger.With("module", "txindex")) - - if err := indexerService.Start(); err != nil { - return nil, nil, nil, err - } - - return indexerService, txIndexer, blockIndexer, nil -} - -func doHandshake( - stateStore sm.Store, - state sm.State, - blockStore sm.BlockStore, - genDoc *types.GenesisDoc, - eventBus types.BlockEventPublisher, - proxyApp proxy.AppConns, - consensusLogger log.Logger) error { - - handshaker := cs.NewHandshaker(stateStore, state, blockStore, genDoc) - handshaker.SetLogger(consensusLogger) - handshaker.SetEventBus(eventBus) - if err := handshaker.Handshake(proxyApp); err != nil { - return fmt.Errorf("error during handshake: %v", err) - } - return nil -} - -func logNodeStartupInfo(state sm.State, pubKey crypto.PubKey, logger, consensusLogger log.Logger) { - // Log the version info. - logger.Info("Version info", - "software", version.TMCoreSemVer, - "block", version.BlockProtocol, - "p2p", version.P2PProtocol, - ) - - // If the state and software differ in block version, at least log it. - if state.Version.Consensus.Block != version.BlockProtocol { - logger.Info("Software and state have different block protocols", - "software", version.BlockProtocol, - "state", state.Version.Consensus.Block, - ) - } - - addr := pubKey.Address() - // Log whether this node is a validator or an observer - if state.Validators.HasAddress(addr) { - consensusLogger.Info("This node is a validator", "addr", addr, "pubKey", pubKey) - } else { - consensusLogger.Info("This node is not a validator", "addr", addr, "pubKey", pubKey) - } -} - -func onlyValidatorIsUs(state sm.State, pubKey crypto.PubKey) bool { - if state.Validators.Size() > 1 { - return false - } - addr, _ := state.Validators.GetByIndex(0) - return bytes.Equal(pubKey.Address(), addr) -} - -func createMempoolAndMempoolReactor(config *cfg.Config, proxyApp proxy.AppConns, - state sm.State, memplMetrics *mempl.Metrics, logger log.Logger) (p2p.Reactor, mempl.Mempool) { - - switch config.Mempool.Version { - case cfg.MempoolV1: - mp := mempoolv1.NewTxMempool( - logger, - config.Mempool, - proxyApp.Mempool(), - state.LastBlockHeight, - mempoolv1.WithMetrics(memplMetrics), - mempoolv1.WithPreCheck(sm.TxPreCheck(state)), - mempoolv1.WithPostCheck(sm.TxPostCheck(state)), - ) - - reactor := mempoolv1.NewReactor( - config.Mempool, - mp, - ) - if config.Consensus.WaitForTxs() { - mp.EnableTxsAvailable() - } - - return reactor, mp - - case cfg.MempoolV0: - mp := mempoolv0.NewCListMempool( - config.Mempool, - proxyApp.Mempool(), - state.LastBlockHeight, - mempoolv0.WithMetrics(memplMetrics), - mempoolv0.WithPreCheck(sm.TxPreCheck(state)), - mempoolv0.WithPostCheck(sm.TxPostCheck(state)), - ) - - mp.SetLogger(logger) - mp.SetLogger(logger) - - reactor := mempoolv0.NewReactor( - config.Mempool, - mp, - ) - if config.Consensus.WaitForTxs() { - mp.EnableTxsAvailable() - } - - return reactor, mp - - default: - return nil, nil - } -} - -func createEvidenceReactor(config *cfg.Config, dbProvider DBProvider, - stateDB dbm.DB, blockStore *store.BlockStore, logger log.Logger) (*evidence.Reactor, *evidence.Pool, error) { - - evidenceDB, err := dbProvider(&DBContext{"evidence", config}) - if err != nil { - return nil, nil, err - } - evidenceLogger := logger.With("module", "evidence") - evidencePool, err := evidence.NewPool(evidenceDB, sm.NewStore(stateDB), blockStore) - if err != nil { - return nil, nil, err - } - evidenceReactor := evidence.NewReactor(evidencePool) - evidenceReactor.SetLogger(evidenceLogger) - return evidenceReactor, evidencePool, nil -} - -func createBlockchainReactor(config *cfg.Config, - state sm.State, - blockExec *sm.BlockExecutor, - blockStore *store.BlockStore, - fastSync bool, - logger log.Logger) (bcReactor p2p.Reactor, err error) { - - switch config.FastSync.Version { - case "v0": - bcReactor = bcv0.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) - case "v1": - bcReactor = bcv1.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) - case "v2": - bcReactor = bcv2.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) - default: - return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version) - } - - bcReactor.SetLogger(logger.With("module", "blockchain")) - return bcReactor, nil -} - -func createConsensusReactor(config *cfg.Config, - state sm.State, - blockExec *sm.BlockExecutor, - blockStore sm.BlockStore, - mempool mempl.Mempool, - evidencePool *evidence.Pool, - privValidator types.PrivValidator, - csMetrics *consensus.Metrics, - waitSync bool, - eventBus *types.EventBus, - consensusLogger log.Logger, - misbehaviors map[int64]cs.Misbehavior) (*cs.Reactor, *cs.State) { - - consensusState := cs.NewState( - config.Consensus, - state.Copy(), - blockExec, - blockStore, - mempool, - evidencePool, - misbehaviors, - cs.StateMetrics(csMetrics), - ) - consensusState.SetLogger(consensusLogger) - if privValidator != nil { - consensusState.SetPrivValidator(privValidator) - } - consensusReactor := cs.NewReactor(consensusState, waitSync, cs.ReactorMetrics(csMetrics)) - consensusReactor.SetLogger(consensusLogger) - // services which will be publishing and/or subscribing for messages (events) - // consensusReactor will set it on consensusState and blockExecutor - consensusReactor.SetEventBus(eventBus) - return consensusReactor, consensusState -} - -func createTransport( - config *cfg.Config, - nodeInfo p2p.NodeInfo, - nodeKey *p2p.NodeKey, - proxyApp proxy.AppConns, -) ( - *p2p.MultiplexTransport, - []p2p.PeerFilterFunc, -) { - var ( - mConnConfig = p2p.MConnConfig(config.P2P) - transport = p2p.NewMultiplexTransport(nodeInfo, *nodeKey, mConnConfig) - connFilters = []p2p.ConnFilterFunc{} - peerFilters = []p2p.PeerFilterFunc{} - ) - - if !config.P2P.AllowDuplicateIP { - connFilters = append(connFilters, p2p.ConnDuplicateIPFilter()) - } - - // Filter peers by addr or pubkey with an ABCI query. - // If the query return code is OK, add peer. - if config.FilterPeers { - connFilters = append( - connFilters, - // ABCI query for address filtering. - func(_ p2p.ConnSet, c net.Conn, _ []net.IP) error { - res, err := proxyApp.Query().QuerySync(abci.RequestQuery{ - Path: fmt.Sprintf("/p2p/filter/addr/%s", c.RemoteAddr().String()), - }) - if err != nil { - return err - } - if res.IsErr() { - return fmt.Errorf("error querying abci app: %v", res) - } - - return nil - }, - ) - - peerFilters = append( - peerFilters, - // ABCI query for ID filtering. - func(_ p2p.IPeerSet, p p2p.Peer) error { - res, err := proxyApp.Query().QuerySync(abci.RequestQuery{ - Path: fmt.Sprintf("/p2p/filter/id/%s", p.ID()), - }) - if err != nil { - return err - } - if res.IsErr() { - return fmt.Errorf("error querying abci app: %v", res) - } - - return nil - }, - ) - } - - p2p.MultiplexTransportConnFilters(connFilters...)(transport) - - // Limit the number of incoming connections. - max := config.P2P.MaxNumInboundPeers + len(splitAndTrimEmpty(config.P2P.UnconditionalPeerIDs, ",", " ")) - p2p.MultiplexTransportMaxIncomingConnections(max)(transport) - - return transport, peerFilters -} - -func createSwitch(config *cfg.Config, - transport p2p.Transport, - p2pMetrics *p2p.Metrics, - peerFilters []p2p.PeerFilterFunc, - mempoolReactor p2p.Reactor, - bcReactor p2p.Reactor, - stateSyncReactor *statesync.Reactor, - consensusReactor *cs.Reactor, - evidenceReactor *evidence.Reactor, - nodeInfo p2p.NodeInfo, - nodeKey *p2p.NodeKey, - p2pLogger log.Logger) *p2p.Switch { - - sw := p2p.NewSwitch( - config.P2P, - transport, - p2p.WithMetrics(p2pMetrics), - p2p.SwitchPeerFilters(peerFilters...), - ) - sw.SetLogger(p2pLogger) - sw.AddReactor("MEMPOOL", mempoolReactor) - sw.AddReactor("BLOCKCHAIN", bcReactor) - sw.AddReactor("CONSENSUS", consensusReactor) - sw.AddReactor("EVIDENCE", evidenceReactor) - sw.AddReactor("STATESYNC", stateSyncReactor) - - sw.SetNodeInfo(nodeInfo) - sw.SetNodeKey(nodeKey) - - p2pLogger.Info("P2P Node ID", "ID", nodeKey.ID(), "file", config.NodeKeyFile()) - return sw -} - -func createAddrBookAndSetOnSwitch(config *cfg.Config, sw *p2p.Switch, - p2pLogger log.Logger, nodeKey *p2p.NodeKey) (pex.AddrBook, error) { - - addrBook := pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) - addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) - - // Add ourselves to addrbook to prevent dialing ourselves - if config.P2P.ExternalAddress != "" { - addr, err := p2p.NewNetAddressString(p2p.IDAddressString(nodeKey.ID(), config.P2P.ExternalAddress)) - if err != nil { - return nil, fmt.Errorf("p2p.external_address is incorrect: %w", err) - } - addrBook.AddOurAddress(addr) - } - if config.P2P.ListenAddress != "" { - addr, err := p2p.NewNetAddressString(p2p.IDAddressString(nodeKey.ID(), config.P2P.ListenAddress)) - if err != nil { - return nil, fmt.Errorf("p2p.laddr is incorrect: %w", err) - } - addrBook.AddOurAddress(addr) - } - - sw.SetAddrBook(addrBook) - - return addrBook, nil -} - -func createPEXReactorAndAddToSwitch(addrBook pex.AddrBook, config *cfg.Config, - sw *p2p.Switch, logger log.Logger) *pex.Reactor { - - // TODO persistent peers ? so we can have their DNS addrs saved - pexReactor := pex.NewReactor(addrBook, - &pex.ReactorConfig{ - Seeds: splitAndTrimEmpty(config.P2P.Seeds, ",", " "), - SeedMode: config.P2P.SeedMode, - // See consensus/reactor.go: blocksToContributeToBecomeGoodPeer 10000 - // blocks assuming 10s blocks ~ 28 hours. - // TODO (melekes): make it dynamic based on the actual block latencies - // from the live network. - // https://github.com/tendermint/tendermint/issues/3523 - SeedDisconnectWaitPeriod: 28 * time.Hour, - PersistentPeersMaxDialPeriod: config.P2P.PersistentPeersMaxDialPeriod, - }) - pexReactor.SetLogger(logger.With("module", "pex")) - sw.AddReactor("PEX", pexReactor) - return pexReactor -} - -// startStateSync starts an asynchronous state sync process, then switches to fast sync mode. -func startStateSync(ssR *statesync.Reactor, bcR fastSyncReactor, conR *cs.Reactor, - stateProvider statesync.StateProvider, config *cfg.StateSyncConfig, fastSync bool, - stateStore sm.Store, blockStore *store.BlockStore, state sm.State) error { - ssR.Logger.Info("Starting state sync") - - if stateProvider == nil { - var err error - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - stateProvider, err = statesync.NewLightClientStateProvider( - ctx, - state.ChainID, state.Version, state.InitialHeight, - config.RPCServers, light.TrustOptions{ - Period: config.TrustPeriod, - Height: config.TrustHeight, - Hash: config.TrustHashBytes(), - }, ssR.Logger.With("module", "light")) - if err != nil { - return fmt.Errorf("failed to set up light client state provider: %w", err) - } - } - - go func() { - state, commit, err := ssR.Sync(stateProvider, config.DiscoveryTime) - if err != nil { - ssR.Logger.Error("State sync failed", "err", err) - return - } - err = stateStore.Bootstrap(state) - if err != nil { - ssR.Logger.Error("Failed to bootstrap node with new state", "err", err) - return - } - err = blockStore.SaveSeenCommit(state.LastBlockHeight, commit) - if err != nil { - ssR.Logger.Error("Failed to store last seen commit", "err", err) - return - } - - if fastSync { - // FIXME Very ugly to have these metrics bleed through here. - conR.Metrics.StateSyncing.Set(0) - conR.Metrics.FastSyncing.Set(1) - err = bcR.SwitchToFastSync(state) - if err != nil { - ssR.Logger.Error("Failed to switch to fast sync", "err", err) - return - } - } else { - conR.SwitchToConsensus(state, true) - } - }() - return nil -} - -// NewNode returns a new, ready to go, Tendermint Node. -func NewNode(config *cfg.Config, - privValidator types.PrivValidator, - nodeKey *p2p.NodeKey, - clientCreator proxy.ClientCreator, - genesisDocProvider GenesisDocProvider, - dbProvider DBProvider, - metricsProvider MetricsProvider, - logger log.Logger, - misbehaviors map[int64]cs.Misbehavior, - options ...Option) (*Node, error) { - - blockStore, stateDB, err := initDBs(config, dbProvider) - if err != nil { - return nil, err - } - - stateStore := sm.NewStore(stateDB) - - state, genDoc, err := LoadStateFromDBOrGenesisDocProvider(stateDB, genesisDocProvider) - if err != nil { - return nil, err - } - - // Create the proxyApp and establish connections to the ABCI app (consensus, mempool, query). - proxyApp, err := createAndStartProxyAppConns(clientCreator, logger) - if err != nil { - return nil, err - } - - // EventBus and IndexerService must be started before the handshake because - // we might need to index the txs of the replayed block as this might not have happened - // when the node stopped last time (i.e. the node stopped after it saved the block - // but before it indexed the txs, or, endblocker panicked) - eventBus, err := createAndStartEventBus(logger) - if err != nil { - return nil, err - } - - indexerService, txIndexer, blockIndexer, err := createAndStartIndexerService(config, dbProvider, eventBus, logger) - if err != nil { - return nil, err - } - - // If an address is provided, listen on the socket for a connection from an - // external signing process. - if config.PrivValidatorListenAddr != "" { - // FIXME: we should start services inside OnStart - privValidator, err = createAndStartPrivValidatorSocketClient(config.PrivValidatorListenAddr, genDoc.ChainID, logger) - if err != nil { - return nil, fmt.Errorf("error with private validator socket client: %w", err) - } - } - - pubKey, err := privValidator.GetPubKey() - if err != nil { - return nil, fmt.Errorf("can't get pubkey: %w", err) - } - - // Determine whether we should do state and/or fast sync. - // We don't fast-sync when the only validator is us. - fastSync := config.FastSyncMode && !onlyValidatorIsUs(state, pubKey) - stateSync := config.StateSync.Enable && !onlyValidatorIsUs(state, pubKey) - if stateSync && state.LastBlockHeight > 0 { - logger.Info("Found local state with non-zero height, skipping state sync") - stateSync = false - } - - // Create the handshaker, which calls RequestInfo, sets the AppVersion on the state, - // and replays any blocks as necessary to sync tendermint with the app. - consensusLogger := logger.With("module", "consensus") - if !stateSync { - if err := doHandshake(stateStore, state, blockStore, genDoc, eventBus, proxyApp, consensusLogger); err != nil { - return nil, err - } - - // Reload the state. It will have the Version.Consensus.App set by the - // Handshake, and may have other modifications as well (ie. depending on - // what happened during block replay). - state, err = stateStore.Load() - if err != nil { - return nil, fmt.Errorf("cannot load state: %w", err) - } - } - - logNodeStartupInfo(state, pubKey, logger, consensusLogger) - - csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID) - - // Make MempoolReactor - mempoolReactor, mempool := createMempoolAndMempoolReactor(config, proxyApp, state, memplMetrics, logger) - - // Make Evidence Reactor - evidenceReactor, evidencePool, err := createEvidenceReactor(config, dbProvider, stateDB, blockStore, logger) - if err != nil { - return nil, err - } - - // make block executor for consensus and blockchain reactors to execute blocks - blockExec := sm.NewBlockExecutor( - stateStore, - logger.With("module", "state"), - proxyApp.Consensus(), - mempool, - evidencePool, - sm.BlockExecutorWithMetrics(smMetrics), - ) - - // Make BlockchainReactor. Don't start fast sync if we're doing a state sync first. - bcReactor, err := createBlockchainReactor(config, state, blockExec, blockStore, fastSync && !stateSync, logger) - if err != nil { - return nil, fmt.Errorf("could not create blockchain reactor: %w", err) - } - - // Make ConsensusReactor. Don't enable fully if doing a state sync and/or fast sync first. - // FIXME We need to update metrics here, since other reactors don't have access to them. - if stateSync { - csMetrics.StateSyncing.Set(1) - } else if fastSync { - csMetrics.FastSyncing.Set(1) - } - - logger.Info("Setting up maverick consensus reactor", "Misbehaviors", misbehaviors) - consensusReactor, consensusState := createConsensusReactor( - config, state, blockExec, blockStore, mempool, evidencePool, - privValidator, csMetrics, stateSync || fastSync, eventBus, consensusLogger, misbehaviors) - - // Set up state sync reactor, and schedule a sync if requested. - // FIXME The way we do phased startups (e.g. replay -> fast sync -> consensus) is very messy, - // we should clean this whole thing up. See: - // https://github.com/tendermint/tendermint/issues/4644 - stateSyncReactor := statesync.NewReactor( - *config.StateSync, - proxyApp.Snapshot(), - proxyApp.Query(), - config.StateSync.TempDir, - ) - stateSyncReactor.SetLogger(logger.With("module", "statesync")) - - nodeInfo, err := makeNodeInfo(config, nodeKey, txIndexer, genDoc, state) - if err != nil { - return nil, err - } - - // Setup Transport. - transport, peerFilters := createTransport(config, nodeInfo, nodeKey, proxyApp) - - // Setup Switch. - p2pLogger := logger.With("module", "p2p") - sw := createSwitch( - config, transport, p2pMetrics, peerFilters, mempoolReactor, bcReactor, - stateSyncReactor, consensusReactor, evidenceReactor, nodeInfo, nodeKey, p2pLogger, - ) - - err = sw.AddPersistentPeers(splitAndTrimEmpty(config.P2P.PersistentPeers, ",", " ")) - if err != nil { - return nil, fmt.Errorf("could not add peers from persistent_peers field: %w", err) - } - - err = sw.AddUnconditionalPeerIDs(splitAndTrimEmpty(config.P2P.UnconditionalPeerIDs, ",", " ")) - if err != nil { - return nil, fmt.Errorf("could not add peer ids from unconditional_peer_ids field: %w", err) - } - - addrBook, err := createAddrBookAndSetOnSwitch(config, sw, p2pLogger, nodeKey) - if err != nil { - return nil, fmt.Errorf("could not create addrbook: %w", err) - } - - // Optionally, start the pex reactor - // - // TODO: - // - // We need to set Seeds and PersistentPeers on the switch, - // since it needs to be able to use these (and their DNS names) - // even if the PEX is off. We can include the DNS name in the NetAddress, - // but it would still be nice to have a clear list of the current "PersistentPeers" - // somewhere that we can return with net_info. - // - // If PEX is on, it should handle dialing the seeds. Otherwise the switch does it. - // Note we currently use the addrBook regardless at least for AddOurAddress - var pexReactor *pex.Reactor - if config.P2P.PexReactor { - pexReactor = createPEXReactorAndAddToSwitch(addrBook, config, sw, logger) - } - - if config.RPC.PprofListenAddress != "" { - go func() { - logger.Info("Starting pprof server", "laddr", config.RPC.PprofListenAddress) - logger.Error("pprof server error", "err", http.ListenAndServe(config.RPC.PprofListenAddress, nil)) - }() - } - - node := &Node{ - config: config, - genesisDoc: genDoc, - privValidator: privValidator, - - transport: transport, - sw: sw, - addrBook: addrBook, - nodeInfo: nodeInfo, - nodeKey: nodeKey, - - stateStore: stateStore, - blockStore: blockStore, - bcReactor: bcReactor, - mempoolReactor: mempoolReactor, - mempool: mempool, - consensusState: consensusState, - consensusReactor: consensusReactor, - stateSyncReactor: stateSyncReactor, - stateSync: stateSync, - stateSyncGenesis: state, // Shouldn't be necessary, but need a way to pass the genesis state - pexReactor: pexReactor, - evidencePool: evidencePool, - proxyApp: proxyApp, - txIndexer: txIndexer, - indexerService: indexerService, - blockIndexer: blockIndexer, - eventBus: eventBus, - } - node.BaseService = *service.NewBaseService(logger, "Node", node) - - for _, option := range options { - option(node) - } - - return node, nil -} - -// OnStart starts the Node. It implements service.Service. -func (n *Node) OnStart() error { - now := tmtime.Now() - genTime := n.genesisDoc.GenesisTime - if genTime.After(now) { - n.Logger.Info("Genesis time is in the future. Sleeping until then...", "genTime", genTime) - time.Sleep(genTime.Sub(now)) - } - - // Add private IDs to addrbook to block those peers being added - n.addrBook.AddPrivateIDs(splitAndTrimEmpty(n.config.P2P.PrivatePeerIDs, ",", " ")) - - // Start the RPC server before the P2P server - // so we can eg. receive txs for the first block - if n.config.RPC.ListenAddress != "" { - listeners, err := n.startRPC() - if err != nil { - return err - } - n.rpcListeners = listeners - } - - if n.config.Instrumentation.Prometheus && - n.config.Instrumentation.PrometheusListenAddr != "" { - n.prometheusSrv = n.startPrometheusServer(n.config.Instrumentation.PrometheusListenAddr) - } - - // Start the transport. - addr, err := p2p.NewNetAddressString(p2p.IDAddressString(n.nodeKey.ID(), n.config.P2P.ListenAddress)) - if err != nil { - return err - } - if err := n.transport.Listen(*addr); err != nil { - return err - } - - n.isListening = true - - // Start the switch (the P2P server). - err = n.sw.Start() - if err != nil { - return err - } - - // Always connect to persistent peers - err = n.sw.DialPeersAsync(splitAndTrimEmpty(n.config.P2P.PersistentPeers, ",", " ")) - if err != nil { - return fmt.Errorf("could not dial peers from persistent_peers field: %w", err) - } - - // Run state sync - if n.stateSync { - bcR, ok := n.bcReactor.(fastSyncReactor) - if !ok { - return fmt.Errorf("this blockchain reactor does not support switching from state sync") - } - err := startStateSync(n.stateSyncReactor, bcR, n.consensusReactor, n.stateSyncProvider, - n.config.StateSync, n.config.FastSyncMode, n.stateStore, n.blockStore, n.stateSyncGenesis) - if err != nil { - return fmt.Errorf("failed to start state sync: %w", err) - } - } - - return nil -} - -// OnStop stops the Node. It implements service.Service. -func (n *Node) OnStop() { - n.BaseService.OnStop() - - n.Logger.Info("Stopping Node") - - // first stop the non-reactor services - if err := n.eventBus.Stop(); err != nil { - n.Logger.Error("Error closing eventBus", "err", err) - } - if err := n.indexerService.Stop(); err != nil { - n.Logger.Error("Error closing indexerService", "err", err) - } - - // now stop the reactors - if err := n.sw.Stop(); err != nil { - n.Logger.Error("Error closing switch", "err", err) - } - - if err := n.transport.Close(); err != nil { - n.Logger.Error("Error closing transport", "err", err) - } - - n.isListening = false - - // finally stop the listeners / external services - for _, l := range n.rpcListeners { - n.Logger.Info("Closing rpc listener", "listener", l) - if err := l.Close(); err != nil { - n.Logger.Error("Error closing listener", "listener", l, "err", err) - } - } - - if pvsc, ok := n.privValidator.(service.Service); ok { - if err := pvsc.Stop(); err != nil { - n.Logger.Error("Error closing private validator", "err", err) - } - } - - if n.prometheusSrv != nil { - if err := n.prometheusSrv.Shutdown(context.Background()); err != nil { - // Error from closing listeners, or context timeout: - n.Logger.Error("Prometheus HTTP server Shutdown", "err", err) - } - } -} - -// ConfigureRPC makes sure RPC has all the objects it needs to operate. -func (n *Node) ConfigureRPC() error { - pubKey, err := n.privValidator.GetPubKey() - if err != nil { - return fmt.Errorf("can't get pubkey: %w", err) - } - rpccore.SetEnvironment(&rpccore.Environment{ - ProxyAppQuery: n.proxyApp.Query(), - ProxyAppMempool: n.proxyApp.Mempool(), - - StateStore: n.stateStore, - BlockStore: n.blockStore, - EvidencePool: n.evidencePool, - ConsensusState: n.consensusState, - P2PPeers: n.sw, - P2PTransport: n, - - PubKey: pubKey, - GenDoc: n.genesisDoc, - TxIndexer: n.txIndexer, - BlockIndexer: n.blockIndexer, - ConsensusReactor: &consensus.Reactor{}, - EventBus: n.eventBus, - Mempool: n.mempool, - - Logger: n.Logger.With("module", "rpc"), - - Config: *n.config.RPC, - }) - return nil -} - -func (n *Node) startRPC() ([]net.Listener, error) { - err := n.ConfigureRPC() - if err != nil { - return nil, err - } - - listenAddrs := splitAndTrimEmpty(n.config.RPC.ListenAddress, ",", " ") - - if n.config.RPC.Unsafe { - rpccore.AddUnsafeRoutes() - } - - config := rpcserver.DefaultConfig() - config.MaxBodyBytes = n.config.RPC.MaxBodyBytes - config.MaxHeaderBytes = n.config.RPC.MaxHeaderBytes - config.MaxOpenConnections = n.config.RPC.MaxOpenConnections - // If necessary adjust global WriteTimeout to ensure it's greater than - // TimeoutBroadcastTxCommit. - // See https://github.com/tendermint/tendermint/issues/3435 - if config.WriteTimeout <= n.config.RPC.TimeoutBroadcastTxCommit { - config.WriteTimeout = n.config.RPC.TimeoutBroadcastTxCommit + 1*time.Second - } - - // we may expose the rpc over both a unix and tcp socket - listeners := make([]net.Listener, len(listenAddrs)) - for i, listenAddr := range listenAddrs { - mux := http.NewServeMux() - rpcLogger := n.Logger.With("module", "rpc-server") - wmLogger := rpcLogger.With("protocol", "websocket") - wm := rpcserver.NewWebsocketManager(rpccore.Routes, - rpcserver.OnDisconnect(func(remoteAddr string) { - err := n.eventBus.UnsubscribeAll(context.Background(), remoteAddr) - if err != nil && err != tmpubsub.ErrSubscriptionNotFound { - wmLogger.Error("Failed to unsubscribe addr from events", "addr", remoteAddr, "err", err) - } - }), - rpcserver.ReadLimit(config.MaxBodyBytes), - ) - wm.SetLogger(wmLogger) - mux.HandleFunc("/websocket", wm.WebsocketHandler) - rpcserver.RegisterRPCFuncs(mux, rpccore.Routes, rpcLogger) - listener, err := rpcserver.Listen( - listenAddr, - config, - ) - if err != nil { - return nil, err - } - - var rootHandler http.Handler = mux - if n.config.RPC.IsCorsEnabled() { - corsMiddleware := cors.New(cors.Options{ - AllowedOrigins: n.config.RPC.CORSAllowedOrigins, - AllowedMethods: n.config.RPC.CORSAllowedMethods, - AllowedHeaders: n.config.RPC.CORSAllowedHeaders, - }) - rootHandler = corsMiddleware.Handler(mux) - } - if n.config.RPC.IsTLSEnabled() { - go func() { - if err := rpcserver.ServeTLS( - listener, - rootHandler, - n.config.RPC.CertFile(), - n.config.RPC.KeyFile(), - rpcLogger, - config, - ); err != nil { - n.Logger.Error("Error serving server with TLS", "err", err) - } - }() - } else { - go func() { - if err := rpcserver.Serve( - listener, - rootHandler, - rpcLogger, - config, - ); err != nil { - n.Logger.Error("Error serving server", "err", err) - } - }() - } - - listeners[i] = listener - } - - // we expose a simplified api over grpc for convenience to app devs - grpcListenAddr := n.config.RPC.GRPCListenAddress - if grpcListenAddr != "" { - config := rpcserver.DefaultConfig() - config.MaxBodyBytes = n.config.RPC.MaxBodyBytes - config.MaxHeaderBytes = n.config.RPC.MaxHeaderBytes - // NOTE: GRPCMaxOpenConnections is used, not MaxOpenConnections - config.MaxOpenConnections = n.config.RPC.GRPCMaxOpenConnections - // If necessary adjust global WriteTimeout to ensure it's greater than - // TimeoutBroadcastTxCommit. - // See https://github.com/tendermint/tendermint/issues/3435 - if config.WriteTimeout <= n.config.RPC.TimeoutBroadcastTxCommit { - config.WriteTimeout = n.config.RPC.TimeoutBroadcastTxCommit + 1*time.Second - } - listener, err := rpcserver.Listen(grpcListenAddr, config) - if err != nil { - return nil, err - } - go func() { - if err := grpccore.StartGRPCServer(listener); err != nil { - n.Logger.Error("Error starting gRPC server", "err", err) - } - }() - listeners = append(listeners, listener) - } - - return listeners, nil -} - -// startPrometheusServer starts a Prometheus HTTP server, listening for metrics -// collectors on addr. -func (n *Node) startPrometheusServer(addr string) *http.Server { - srv := &http.Server{ - Addr: addr, - Handler: promhttp.InstrumentMetricHandler( - prometheus.DefaultRegisterer, promhttp.HandlerFor( - prometheus.DefaultGatherer, - promhttp.HandlerOpts{MaxRequestsInFlight: n.config.Instrumentation.MaxOpenConnections}, - ), - ), - } - go func() { - if err := srv.ListenAndServe(); err != http.ErrServerClosed { - // Error starting or closing listener: - n.Logger.Error("Prometheus HTTP server ListenAndServe", "err", err) - } - }() - return srv -} - -// Switch returns the Node's Switch. -func (n *Node) Switch() *p2p.Switch { - return n.sw -} - -// BlockStore returns the Node's BlockStore. -func (n *Node) BlockStore() *store.BlockStore { - return n.blockStore -} - -// ConsensusState returns the Node's ConsensusState. -func (n *Node) ConsensusState() *cs.State { - return n.consensusState -} - -// ConsensusReactor returns the Node's ConsensusReactor. -func (n *Node) ConsensusReactor() *cs.Reactor { - return n.consensusReactor -} - -// MempoolReactor returns the Node's mempool reactor. -func (n *Node) MempoolReactor() p2p.Reactor { - return n.mempoolReactor -} - -// Mempool returns the Node's mempool. -func (n *Node) Mempool() mempl.Mempool { - return n.mempool -} - -// PEXReactor returns the Node's PEXReactor. It returns nil if PEX is disabled. -func (n *Node) PEXReactor() *pex.Reactor { - return n.pexReactor -} - -// EvidencePool returns the Node's EvidencePool. -func (n *Node) EvidencePool() *evidence.Pool { - return n.evidencePool -} - -// EventBus returns the Node's EventBus. -func (n *Node) EventBus() *types.EventBus { - return n.eventBus -} - -// PrivValidator returns the Node's PrivValidator. -// XXX: for convenience only! -func (n *Node) PrivValidator() types.PrivValidator { - return n.privValidator -} - -// GenesisDoc returns the Node's GenesisDoc. -func (n *Node) GenesisDoc() *types.GenesisDoc { - return n.genesisDoc -} - -// ProxyApp returns the Node's AppConns, representing its connections to the ABCI application. -func (n *Node) ProxyApp() proxy.AppConns { - return n.proxyApp -} - -// Config returns the Node's config. -func (n *Node) Config() *cfg.Config { - return n.config -} - -//------------------------------------------------------------------------------ - -func (n *Node) Listeners() []string { - return []string{ - fmt.Sprintf("Listener(@%v)", n.config.P2P.ExternalAddress), - } -} - -func (n *Node) IsListening() bool { - return n.isListening -} - -// NodeInfo returns the Node's Info from the Switch. -func (n *Node) NodeInfo() p2p.NodeInfo { - return n.nodeInfo -} - -func makeNodeInfo( - config *cfg.Config, - nodeKey *p2p.NodeKey, - txIndexer txindex.TxIndexer, - genDoc *types.GenesisDoc, - state sm.State, -) (p2p.NodeInfo, error) { - txIndexerStatus := "on" - if _, ok := txIndexer.(*null.TxIndex); ok { - txIndexerStatus = "off" - } - - var bcChannel byte - switch config.FastSync.Version { - case "v0": - bcChannel = bcv0.BlockchainChannel - case "v1": - bcChannel = bcv1.BlockchainChannel - case "v2": - bcChannel = bcv2.BlockchainChannel - default: - return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version) - } - - nodeInfo := p2p.DefaultNodeInfo{ - ProtocolVersion: p2p.NewProtocolVersion( - version.P2PProtocol, // global - state.Version.Consensus.Block, - state.Version.Consensus.App, - ), - DefaultNodeID: nodeKey.ID(), - Network: genDoc.ChainID, - Version: version.TMCoreSemVer, - Channels: []byte{ - bcChannel, - cs.StateChannel, cs.DataChannel, cs.VoteChannel, cs.VoteSetBitsChannel, - mempl.MempoolChannel, - evidence.EvidenceChannel, - statesync.SnapshotChannel, statesync.ChunkChannel, - }, - Moniker: config.Moniker, - Other: p2p.DefaultNodeInfoOther{ - TxIndex: txIndexerStatus, - RPCAddress: config.RPC.ListenAddress, - }, - } - - if config.P2P.PexReactor { - nodeInfo.Channels = append(nodeInfo.Channels, pex.PexChannel) - } - - lAddr := config.P2P.ExternalAddress - - if lAddr == "" { - lAddr = config.P2P.ListenAddress - } - - nodeInfo.ListenAddr = lAddr - - err := nodeInfo.Validate() - return nodeInfo, err -} - -//------------------------------------------------------------------------------ - -var ( - genesisDocKey = []byte("genesisDoc") -) - -// LoadStateFromDBOrGenesisDocProvider attempts to load the state from the -// database, or creates one using the given genesisDocProvider and persists the -// result to the database. On success this also returns the genesis doc loaded -// through the given provider. -func LoadStateFromDBOrGenesisDocProvider( - stateDB dbm.DB, - genesisDocProvider GenesisDocProvider, -) (sm.State, *types.GenesisDoc, error) { - // Get genesis doc - genDoc, err := loadGenesisDoc(stateDB) - if err != nil { - genDoc, err = genesisDocProvider() - if err != nil { - return sm.State{}, nil, err - } - // save genesis doc to prevent a certain class of user errors (e.g. when it - // was changed, accidentally or not). Also good for audit trail. - saveGenesisDoc(stateDB, genDoc) - } - stateStore := sm.NewStore(stateDB) - state, err := stateStore.LoadFromDBOrGenesisDoc(genDoc) - if err != nil { - return sm.State{}, nil, err - } - return state, genDoc, nil -} - -// panics if failed to unmarshal bytes -func loadGenesisDoc(db dbm.DB) (*types.GenesisDoc, error) { - b, err := db.Get(genesisDocKey) - if err != nil { - panic(err) - } - if len(b) == 0 { - return nil, errors.New("genesis doc not found") - } - var genDoc *types.GenesisDoc - err = tmjson.Unmarshal(b, &genDoc) - if err != nil { - panic(fmt.Sprintf("Failed to load genesis doc due to unmarshaling error: %v (bytes: %X)", err, b)) - } - return genDoc, nil -} - -// panics if failed to marshal the given genesis document -func saveGenesisDoc(db dbm.DB, genDoc *types.GenesisDoc) { - b, err := tmjson.Marshal(genDoc) - if err != nil { - panic(fmt.Sprintf("Failed to save genesis doc due to marshaling error: %v", err)) - } - if err := db.SetSync(genesisDocKey, b); err != nil { - panic(fmt.Sprintf("Failed to save genesis doc: %v", err)) - } -} - -func createAndStartPrivValidatorSocketClient( - listenAddr, - chainID string, - logger log.Logger, -) (types.PrivValidator, error) { - pve, err := privval.NewSignerListener(listenAddr, logger) - if err != nil { - return nil, fmt.Errorf("failed to start private validator: %w", err) - } - - pvsc, err := privval.NewSignerClient(pve, chainID) - if err != nil { - return nil, fmt.Errorf("failed to start private validator: %w", err) - } - - // try to get a pubkey from private validate first time - _, err = pvsc.GetPubKey() - if err != nil { - return nil, fmt.Errorf("can't get pubkey: %w", err) - } - - const ( - retries = 50 // 50 * 100ms = 5s total - timeout = 100 * time.Millisecond - ) - pvscWithRetries := privval.NewRetrySignerClient(pvsc, retries, timeout) - - return pvscWithRetries, nil -} - -// splitAndTrimEmpty slices s into all subslices separated by sep and returns a -// slice of the string s with all leading and trailing Unicode code points -// contained in cutset removed. If sep is empty, SplitAndTrim splits after each -// UTF-8 sequence. First part is equivalent to strings.SplitN with a count of -// -1. also filter out empty strings, only return non-empty strings. -func splitAndTrimEmpty(s, sep, cutset string) []string { - if s == "" { - return []string{} - } - - spl := strings.Split(s, sep) - nonEmptyStrings := make([]string, 0, len(spl)) - for i := 0; i < len(spl); i++ { - element := strings.Trim(spl[i], cutset) - if element != "" { - nonEmptyStrings = append(nonEmptyStrings, element) - } - } - return nonEmptyStrings -} diff --git a/test/maverick/node/privval.go b/test/maverick/node/privval.go deleted file mode 100644 index 441b6ca9d..000000000 --- a/test/maverick/node/privval.go +++ /dev/null @@ -1,358 +0,0 @@ -package node - -import ( - "errors" - "fmt" - "io/ioutil" - - "github.com/tendermint/tendermint/crypto" - "github.com/tendermint/tendermint/crypto/ed25519" - tmbytes "github.com/tendermint/tendermint/libs/bytes" - tmjson "github.com/tendermint/tendermint/libs/json" - tmos "github.com/tendermint/tendermint/libs/os" - "github.com/tendermint/tendermint/libs/tempfile" - tmproto "github.com/tendermint/tendermint/proto/tendermint/types" - "github.com/tendermint/tendermint/types" -) - -// ******************************************************************************************************************* -// -// WARNING: FOR TESTING ONLY. DO NOT USE THIS FILE OUTSIDE MAVERICK -// -// ******************************************************************************************************************* - -const ( - stepNone int8 = 0 // Used to distinguish the initial state - stepPropose int8 = 1 - stepPrevote int8 = 2 - stepPrecommit int8 = 3 -) - -// A vote is either stepPrevote or stepPrecommit. -func voteToStep(vote *tmproto.Vote) int8 { - switch vote.Type { - case tmproto.PrevoteType: - return stepPrevote - case tmproto.PrecommitType: - return stepPrecommit - default: - panic(fmt.Sprintf("Unknown vote type: %v", vote.Type)) - } -} - -//------------------------------------------------------------------------------- - -// FilePVKey stores the immutable part of PrivValidator. -type FilePVKey struct { - Address types.Address `json:"address"` - PubKey crypto.PubKey `json:"pub_key"` - PrivKey crypto.PrivKey `json:"priv_key"` - - filePath string -} - -// Save persists the FilePVKey to its filePath. -func (pvKey FilePVKey) Save() { - outFile := pvKey.filePath - if outFile == "" { - panic("cannot save PrivValidator key: filePath not set") - } - - jsonBytes, err := tmjson.MarshalIndent(pvKey, "", " ") - if err != nil { - panic(err) - } - err = tempfile.WriteFileAtomic(outFile, jsonBytes, 0600) - if err != nil { - panic(err) - } - -} - -//------------------------------------------------------------------------------- - -// FilePVLastSignState stores the mutable part of PrivValidator. -type FilePVLastSignState struct { - Height int64 `json:"height"` - Round int32 `json:"round"` - Step int8 `json:"step"` - Signature []byte `json:"signature,omitempty"` - SignBytes tmbytes.HexBytes `json:"signbytes,omitempty"` - - filePath string -} - -// CheckHRS checks the given height, round, step (HRS) against that of the -// FilePVLastSignState. It returns an error if the arguments constitute a regression, -// or if they match but the SignBytes are empty. -// The returned boolean indicates whether the last Signature should be reused - -// it returns true if the HRS matches the arguments and the SignBytes are not empty (indicating -// we have already signed for this HRS, and can reuse the existing signature). -// It panics if the HRS matches the arguments, there's a SignBytes, but no Signature. -func (lss *FilePVLastSignState) CheckHRS(height int64, round int32, step int8) (bool, error) { - - if lss.Height > height { - return false, fmt.Errorf("height regression. Got %v, last height %v", height, lss.Height) - } - - if lss.Height == height { - if lss.Round > round { - return false, fmt.Errorf("round regression at height %v. Got %v, last round %v", height, round, lss.Round) - } - - if lss.Round == round { - if lss.Step > step { - return false, fmt.Errorf( - "step regression at height %v round %v. Got %v, last step %v", - height, - round, - step, - lss.Step, - ) - } else if lss.Step == step { - if lss.SignBytes != nil { - if lss.Signature == nil { - panic("pv: Signature is nil but SignBytes is not!") - } - return true, nil - } - return false, errors.New("no SignBytes found") - } - } - } - return false, nil -} - -// Save persists the FilePvLastSignState to its filePath. -func (lss *FilePVLastSignState) Save() { - outFile := lss.filePath - if outFile == "" { - panic("cannot save FilePVLastSignState: filePath not set") - } - jsonBytes, err := tmjson.MarshalIndent(lss, "", " ") - if err != nil { - panic(err) - } - err = tempfile.WriteFileAtomic(outFile, jsonBytes, 0600) - if err != nil { - panic(err) - } -} - -//------------------------------------------------------------------------------- - -// FilePV implements PrivValidator using data persisted to disk -// to prevent double signing. -// NOTE: the directories containing pv.Key.filePath and pv.LastSignState.filePath must already exist. -// It includes the LastSignature and LastSignBytes so we don't lose the signature -// if the process crashes after signing but before the resulting consensus message is processed. -type FilePV struct { - Key FilePVKey - LastSignState FilePVLastSignState -} - -// GenFilePV generates a new validator with randomly generated private key -// and sets the filePaths, but does not call Save(). -func GenFilePV(keyFilePath, stateFilePath string) *FilePV { - privKey := ed25519.GenPrivKey() - - return &FilePV{ - Key: FilePVKey{ - Address: privKey.PubKey().Address(), - PubKey: privKey.PubKey(), - PrivKey: privKey, - filePath: keyFilePath, - }, - LastSignState: FilePVLastSignState{ - Step: stepNone, - filePath: stateFilePath, - }, - } -} - -// LoadFilePV loads a FilePV from the filePaths. The FilePV handles double -// signing prevention by persisting data to the stateFilePath. If either file path -// does not exist, the program will exit. -func LoadFilePV(keyFilePath, stateFilePath string) *FilePV { - return loadFilePV(keyFilePath, stateFilePath, true) -} - -// LoadFilePVEmptyState loads a FilePV from the given keyFilePath, with an empty LastSignState. -// If the keyFilePath does not exist, the program will exit. -func LoadFilePVEmptyState(keyFilePath, stateFilePath string) *FilePV { - return loadFilePV(keyFilePath, stateFilePath, false) -} - -// If loadState is true, we load from the stateFilePath. Otherwise, we use an empty LastSignState. -func loadFilePV(keyFilePath, stateFilePath string, loadState bool) *FilePV { - keyJSONBytes, err := ioutil.ReadFile(keyFilePath) - if err != nil { - tmos.Exit(err.Error()) - } - pvKey := FilePVKey{} - err = tmjson.Unmarshal(keyJSONBytes, &pvKey) - if err != nil { - tmos.Exit(fmt.Sprintf("Error reading PrivValidator key from %v: %v\n", keyFilePath, err)) - } - - // overwrite pubkey and address for convenience - pvKey.PubKey = pvKey.PrivKey.PubKey() - pvKey.Address = pvKey.PubKey.Address() - pvKey.filePath = keyFilePath - - pvState := FilePVLastSignState{} - - if loadState { - stateJSONBytes, err := ioutil.ReadFile(stateFilePath) - if err != nil { - tmos.Exit(err.Error()) - } - err = tmjson.Unmarshal(stateJSONBytes, &pvState) - if err != nil { - tmos.Exit(fmt.Sprintf("Error reading PrivValidator state from %v: %v\n", stateFilePath, err)) - } - } - - pvState.filePath = stateFilePath - - return &FilePV{ - Key: pvKey, - LastSignState: pvState, - } -} - -// LoadOrGenFilePV loads a FilePV from the given filePaths -// or else generates a new one and saves it to the filePaths. -func LoadOrGenFilePV(keyFilePath, stateFilePath string) *FilePV { - var pv *FilePV - if tmos.FileExists(keyFilePath) { - pv = LoadFilePV(keyFilePath, stateFilePath) - } else { - pv = GenFilePV(keyFilePath, stateFilePath) - pv.Save() - } - return pv -} - -// GetAddress returns the address of the validator. -// Implements PrivValidator. -func (pv *FilePV) GetAddress() types.Address { - return pv.Key.Address -} - -// GetPubKey returns the public key of the validator. -// Implements PrivValidator. -func (pv *FilePV) GetPubKey() (crypto.PubKey, error) { - return pv.Key.PubKey, nil -} - -// SignVote signs a canonical representation of the vote, along with the -// chainID. Implements PrivValidator. -func (pv *FilePV) SignVote(chainID string, vote *tmproto.Vote) error { - if err := pv.signVote(chainID, vote); err != nil { - return fmt.Errorf("error signing vote: %v", err) - } - return nil -} - -// SignProposal signs a canonical representation of the proposal, along with -// the chainID. Implements PrivValidator. -func (pv *FilePV) SignProposal(chainID string, proposal *tmproto.Proposal) error { - if err := pv.signProposal(chainID, proposal); err != nil { - return fmt.Errorf("error signing proposal: %v", err) - } - return nil -} - -// Save persists the FilePV to disk. -func (pv *FilePV) Save() { - pv.Key.Save() - pv.LastSignState.Save() -} - -// Reset resets all fields in the FilePV. -// NOTE: Unsafe! -func (pv *FilePV) Reset() { - var sig []byte - pv.LastSignState.Height = 0 - pv.LastSignState.Round = 0 - pv.LastSignState.Step = 0 - pv.LastSignState.Signature = sig - pv.LastSignState.SignBytes = nil - pv.Save() -} - -// String returns a string representation of the FilePV. -func (pv *FilePV) String() string { - return fmt.Sprintf( - "PrivValidator{%v LH:%v, LR:%v, LS:%v}", - pv.GetAddress(), - pv.LastSignState.Height, - pv.LastSignState.Round, - pv.LastSignState.Step, - ) -} - -//------------------------------------------------------------------------------------ - -// signVote checks if the vote is good to sign and sets the vote signature. -// It may need to set the timestamp as well if the vote is otherwise the same as -// a previously signed vote (ie. we crashed after signing but before the vote hit the WAL). -func (pv *FilePV) signVote(chainID string, vote *tmproto.Vote) error { - height, round, step := vote.Height, vote.Round, voteToStep(vote) - - lss := pv.LastSignState - - _, err := lss.CheckHRS(height, round, step) - if err != nil { - return err - } - - signBytes := types.VoteSignBytes(chainID, vote) - - // It passed the checks. Sign the vote - sig, err := pv.Key.PrivKey.Sign(signBytes) - if err != nil { - return err - } - pv.saveSigned(height, round, step, signBytes, sig) - vote.Signature = sig - return nil -} - -// signProposal checks if the proposal is good to sign and sets the proposal signature. -// It may need to set the timestamp as well if the proposal is otherwise the same as -// a previously signed proposal ie. we crashed after signing but before the proposal hit the WAL). -func (pv *FilePV) signProposal(chainID string, proposal *tmproto.Proposal) error { - height, round, step := proposal.Height, proposal.Round, stepPropose - - lss := pv.LastSignState - - _, err := lss.CheckHRS(height, round, step) - if err != nil { - return err - } - - signBytes := types.ProposalSignBytes(chainID, proposal) - - // It passed the checks. Sign the proposal - sig, err := pv.Key.PrivKey.Sign(signBytes) - if err != nil { - return err - } - pv.saveSigned(height, round, step, signBytes, sig) - proposal.Signature = sig - return nil -} - -// Persist height/round/step and signature -func (pv *FilePV) saveSigned(height int64, round int32, step int8, - signBytes []byte, sig []byte) { - - pv.LastSignState.Height = height - pv.LastSignState.Round = round - pv.LastSignState.Step = step - pv.LastSignState.Signature = sig - pv.LastSignState.SignBytes = signBytes - pv.LastSignState.Save() -} diff --git a/tools/tm-signer-harness/internal/test_harness_test.go b/tools/tm-signer-harness/internal/test_harness_test.go index cf22bc836..85a589185 100644 --- a/tools/tm-signer-harness/internal/test_harness_test.go +++ b/tools/tm-signer-harness/internal/test_harness_test.go @@ -2,7 +2,6 @@ package internal import ( "fmt" - "io/ioutil" "os" "testing" "time" @@ -187,7 +186,7 @@ func cleanup(cfg TestHarnessConfig) { } func makeTempFile(name, content string) string { - tempFile, err := ioutil.TempFile("", fmt.Sprintf("%s-*", name)) + tempFile, err := os.CreateTemp("", fmt.Sprintf("%s-*", name)) if err != nil { panic(err) } diff --git a/tools/tm-signer-harness/main.go b/tools/tm-signer-harness/main.go index d624234ae..6d75abe7e 100644 --- a/tools/tm-signer-harness/main.go +++ b/tools/tm-signer-harness/main.go @@ -3,7 +3,6 @@ package main import ( "flag" "fmt" - "io/ioutil" "os" "path/filepath" "time" @@ -136,7 +135,7 @@ func extractKey(tmhome, outputPath string) { stateFile := filepath.Join(internal.ExpandPath(tmhome), "data", "priv_validator_state.json") fpv := privval.LoadFilePV(keyFile, stateFile) pkb := []byte(fpv.Key.PrivKey.(ed25519.PrivKey)) - if err := ioutil.WriteFile(internal.ExpandPath(outputPath), pkb[:32], 0600); err != nil { + if err := os.WriteFile(internal.ExpandPath(outputPath), pkb[:32], 0600); err != nil { logger.Info("Failed to write private key", "output", outputPath, "err", err) os.Exit(1) } diff --git a/tools/tools.go b/tools/tools.go index adfaa7f14..23d2366bb 100644 --- a/tools/tools.go +++ b/tools/tools.go @@ -9,5 +9,6 @@ package tools import ( _ "github.com/bufbuild/buf/cmd/buf" _ "github.com/golangci/golangci-lint/cmd/golangci-lint" + _ "github.com/pointlander/peg" _ "github.com/vektra/mockery/v2" ) diff --git a/types/genesis.go b/types/genesis.go index 20fc79721..974bb8112 100644 --- a/types/genesis.go +++ b/types/genesis.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" + "os" "time" "github.com/tendermint/tendermint/crypto" @@ -126,7 +126,7 @@ func GenesisDocFromJSON(jsonBlob []byte) (*GenesisDoc, error) { // GenesisDocFromFile reads JSON data from a file and unmarshalls it into a GenesisDoc. func GenesisDocFromFile(genDocFile string) (*GenesisDoc, error) { - jsonBlob, err := ioutil.ReadFile(genDocFile) + jsonBlob, err := os.ReadFile(genDocFile) if err != nil { return nil, fmt.Errorf("couldn't read GenesisDoc file: %w", err) } diff --git a/types/genesis_test.go b/types/genesis_test.go index bfb122e64..213b1330f 100644 --- a/types/genesis_test.go +++ b/types/genesis_test.go @@ -1,7 +1,6 @@ package types import ( - "io/ioutil" "os" "testing" @@ -122,7 +121,7 @@ func TestGenesisGood(t *testing.T) { } func TestGenesisSaveAs(t *testing.T) { - tmpfile, err := ioutil.TempFile("", "genesis") + tmpfile, err := os.CreateTemp("", "genesis") require.NoError(t, err) defer os.Remove(tmpfile.Name()) diff --git a/types/part_set_test.go b/types/part_set_test.go index c6ea0f452..2dfe12263 100644 --- a/types/part_set_test.go +++ b/types/part_set_test.go @@ -1,7 +1,7 @@ package types import ( - "io/ioutil" + "io" "testing" "github.com/stretchr/testify/assert" @@ -57,7 +57,7 @@ func TestBasicPartSet(t *testing.T) { // Reconstruct data, assert that they are equal. data2Reader := partSet2.GetReader() - data2, err := ioutil.ReadAll(data2Reader) + data2, err := io.ReadAll(data2Reader) require.NoError(t, err) assert.Equal(t, data, data2) diff --git a/types/results.go b/types/results.go index 9181450bc..d7168bc75 100644 --- a/types/results.go +++ b/types/results.go @@ -46,9 +46,7 @@ func (a ABCIResults) toByteSlices() [][]byte { // ResponseDeliverTx and returns another ResponseDeliverTx. func deterministicResponseDeliverTx(response *abci.ResponseDeliverTx) *abci.ResponseDeliverTx { return &abci.ResponseDeliverTx{ - Code: response.Code, - Data: response.Data, - GasWanted: response.GasWanted, - GasUsed: response.GasUsed, + Code: response.Code, + Data: response.Data, } } diff --git a/version/version.go b/version/version.go index a5227c12d..d14f5cc31 100644 --- a/version/version.go +++ b/version/version.go @@ -15,7 +15,7 @@ const ( ) var ( - // P2PProtocol versions all p2p behaviur and msgs. + // P2PProtocol versions all p2p behavior and msgs. // This includes proposer selection. P2PProtocol uint64 = 8