Compare commits
35 Commits
wb/impleme
...
wb/issue-9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06050e4324 | ||
|
|
4fbfea79ad | ||
|
|
241be64da5 | ||
|
|
2c40ca52c1 | ||
|
|
3136b7a084 | ||
|
|
c755d03611 | ||
|
|
7a2e44184b | ||
|
|
77e7318613 | ||
|
|
caa75ae791 | ||
|
|
af2981a2f7 | ||
|
|
f9307cac51 | ||
|
|
3bd2153136 | ||
|
|
301211c2cb | ||
|
|
58ee42ca52 | ||
|
|
6e38fff9ed | ||
|
|
93ab364abc | ||
|
|
1c60efc0bc | ||
|
|
6768b98568 | ||
|
|
3cdfbda2eb | ||
|
|
4552cfc271 | ||
|
|
91fba07e49 | ||
|
|
38d1b2f873 | ||
|
|
e4fb662c8d | ||
|
|
810b9c613b | ||
|
|
341cabec0e | ||
|
|
5b98095ac3 | ||
|
|
59b28e71a0 | ||
|
|
071d787a45 | ||
|
|
a54a424478 | ||
|
|
c961fb58eb | ||
|
|
c7140bf817 | ||
|
|
5df9c410ff | ||
|
|
c8f203293d | ||
|
|
b06e1cea54 | ||
|
|
6ea968d576 |
6
.github/workflows/docker.yml
vendored
@@ -41,17 +41,17 @@ jobs:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Build
|
||||
uses: docker/setup-buildx-action@v2.0.0
|
||||
uses: docker/setup-buildx-action@v2.1.0
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
uses: docker/login-action@v2.0.0
|
||||
uses: docker/login-action@v2.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Publish to Docker Hub
|
||||
uses: docker/build-push-action@v3.1.1
|
||||
uses: docker/build-push-action@v3.2.0
|
||||
with:
|
||||
context: .
|
||||
file: ./DOCKER/Dockerfile
|
||||
|
||||
4
.github/workflows/e2e-nightly-34x.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
4
.github/workflows/e2e-nightly-37x.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
4
.github/workflows/e2e-nightly-main.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> that caused the failure."
|
||||
"text": ":skull: Nightly E2E tests for `${{ env.BRANCH }}` failed. See the <${{ env.RUN_URL }}|run details> and the <${{ env.COMMIT_URL }}|commit> related to the failure."
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
2
.github/workflows/fuzz-nightly.yml
vendored
@@ -76,7 +76,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Notify Slack on failure
|
||||
uses: slackapi/slack-github-action@v1.22.0
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
|
||||
|
||||
2
.github/workflows/janitor.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
steps:
|
||||
- uses: styfle/cancel-workflow-action@0.10.1
|
||||
- uses: styfle/cancel-workflow-action@0.11.0
|
||||
with:
|
||||
workflow_id: 1041851,1401230,2837803
|
||||
access_token: ${{ github.token }}
|
||||
|
||||
2
.gitignore
vendored
@@ -55,3 +55,5 @@ proto/spec/**/*.pb.go
|
||||
*.pdf
|
||||
*.gz
|
||||
*.dvi
|
||||
# Python virtual environments
|
||||
.venv
|
||||
|
||||
30
CHANGELOG.md
@@ -2,6 +2,36 @@
|
||||
|
||||
Friendly reminder, we have a [bug bounty program](https://hackerone.com/cosmos).
|
||||
|
||||
## v0.34.22
|
||||
|
||||
This release includes several bug fixes, [one of
|
||||
which](https://github.com/tendermint/tendermint/pull/9518) we discovered while
|
||||
building up a baseline for v0.34 against which to compare our upcoming v0.37
|
||||
release during our [QA process](./docs/qa/).
|
||||
|
||||
Special thanks to external contributors on this release: @RiccardoM
|
||||
|
||||
### FEATURES
|
||||
|
||||
- [rpc] [\#9423](https://github.com/tendermint/tendermint/pull/9423) Support
|
||||
HTTPS URLs from the WebSocket client (@RiccardoM, @cmwaters)
|
||||
|
||||
### BUG FIXES
|
||||
|
||||
- [config] [\#9483](https://github.com/tendermint/tendermint/issues/9483)
|
||||
Calling `tendermint init` would incorrectly leave out the new `[storage]`
|
||||
section delimiter in the generated configuration file - this has now been
|
||||
fixed
|
||||
- [p2p] [\#9500](https://github.com/tendermint/tendermint/issues/9500) Prevent
|
||||
peers who have errored being added to the peer set (@jmalicevic)
|
||||
- [indexer] [\#9473](https://github.com/tendermint/tendermint/issues/9473) Fix
|
||||
bug that caused the psql indexer to index empty blocks whenever one of the
|
||||
transactions returned a non zero code. The relevant deduplication logic has
|
||||
been moved within the kv indexer only (@cmwaters)
|
||||
- [blocksync] [\#9518](https://github.com/tendermint/tendermint/issues/9518) A
|
||||
block sync stall was observed during our QA process whereby the node was
|
||||
unable to make progress. Retrying block requests after a timeout fixes this.
|
||||
|
||||
## v0.34.21
|
||||
|
||||
Release highlights include:
|
||||
|
||||
2
Makefile
@@ -271,7 +271,7 @@ format:
|
||||
|
||||
lint:
|
||||
@echo "--> Running linter"
|
||||
@golangci-lint run
|
||||
@go run github.com/golangci/golangci-lint/cmd/golangci-lint run
|
||||
.PHONY: lint
|
||||
|
||||
DESTINATION = ./index.html.md
|
||||
|
||||
@@ -19,6 +19,58 @@ const (
|
||||
BlockResponseMessageFieldKeySize
|
||||
)
|
||||
|
||||
// EncodeMsg encodes a Protobuf message
|
||||
func EncodeMsg(pb proto.Message) ([]byte, error) {
|
||||
msg := bcproto.Message{}
|
||||
|
||||
switch pb := pb.(type) {
|
||||
case *bcproto.BlockRequest:
|
||||
msg.Sum = &bcproto.Message_BlockRequest{BlockRequest: pb}
|
||||
case *bcproto.BlockResponse:
|
||||
msg.Sum = &bcproto.Message_BlockResponse{BlockResponse: pb}
|
||||
case *bcproto.NoBlockResponse:
|
||||
msg.Sum = &bcproto.Message_NoBlockResponse{NoBlockResponse: pb}
|
||||
case *bcproto.StatusRequest:
|
||||
msg.Sum = &bcproto.Message_StatusRequest{StatusRequest: pb}
|
||||
case *bcproto.StatusResponse:
|
||||
msg.Sum = &bcproto.Message_StatusResponse{StatusResponse: pb}
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message type %T", pb)
|
||||
}
|
||||
|
||||
bz, err := proto.Marshal(&msg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to marshal %T: %w", pb, err)
|
||||
}
|
||||
|
||||
return bz, nil
|
||||
}
|
||||
|
||||
// DecodeMsg decodes a Protobuf message.
|
||||
func DecodeMsg(bz []byte) (proto.Message, error) {
|
||||
pb := &bcproto.Message{}
|
||||
|
||||
err := proto.Unmarshal(bz, pb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch msg := pb.Sum.(type) {
|
||||
case *bcproto.Message_BlockRequest:
|
||||
return msg.BlockRequest, nil
|
||||
case *bcproto.Message_BlockResponse:
|
||||
return msg.BlockResponse, nil
|
||||
case *bcproto.Message_NoBlockResponse:
|
||||
return msg.NoBlockResponse, nil
|
||||
case *bcproto.Message_StatusRequest:
|
||||
return msg.StatusRequest, nil
|
||||
case *bcproto.Message_StatusResponse:
|
||||
return msg.StatusResponse, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message type %T", msg)
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateMsg validates a message.
|
||||
func ValidateMsg(pb proto.Message) error {
|
||||
if pb == nil {
|
||||
|
||||
@@ -143,20 +143,21 @@ func (bcR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
SendQueueCapacity: 1000,
|
||||
RecvBufferCapacity: 50 * 4096,
|
||||
RecvMessageCapacity: MaxMsgSize,
|
||||
MessageType: &bcproto.Message{},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// AddPeer implements Reactor by sending our state to peer.
|
||||
func (bcR *Reactor) AddPeer(peer p2p.Peer) {
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: BlocksyncChannel,
|
||||
Message: &bcproto.StatusResponse{
|
||||
Base: bcR.store.Base(),
|
||||
Height: bcR.store.Height(),
|
||||
},
|
||||
})
|
||||
msgBytes, err := EncodeMsg(&bcproto.StatusResponse{
|
||||
Base: bcR.store.Base(),
|
||||
Height: bcR.store.Height()})
|
||||
if err != nil {
|
||||
bcR.Logger.Error("could not convert msg to protobuf", "err", err)
|
||||
return
|
||||
}
|
||||
|
||||
peer.Send(BlocksyncChannel, msgBytes)
|
||||
// it's OK if send fails. will try later in poolRoutine
|
||||
|
||||
// peer is added to the pool once we receive the first
|
||||
@@ -181,53 +182,69 @@ func (bcR *Reactor) respondToPeer(msg *bcproto.BlockRequest,
|
||||
return false
|
||||
}
|
||||
|
||||
return src.TrySend(p2p.Envelope{
|
||||
ChannelID: BlocksyncChannel,
|
||||
Message: &bcproto.BlockResponse{Block: bl},
|
||||
})
|
||||
msgBytes, err := EncodeMsg(&bcproto.BlockResponse{Block: bl})
|
||||
if err != nil {
|
||||
bcR.Logger.Error("could not marshal msg", "err", err)
|
||||
return false
|
||||
}
|
||||
|
||||
return src.TrySend(BlocksyncChannel, msgBytes)
|
||||
}
|
||||
|
||||
bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height)
|
||||
return src.TrySend(p2p.Envelope{
|
||||
ChannelID: BlocksyncChannel,
|
||||
Message: &bcproto.NoBlockResponse{Height: msg.Height},
|
||||
})
|
||||
|
||||
msgBytes, err := EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height})
|
||||
if err != nil {
|
||||
bcR.Logger.Error("could not convert msg to protobuf", "err", err)
|
||||
return false
|
||||
}
|
||||
|
||||
return src.TrySend(BlocksyncChannel, msgBytes)
|
||||
}
|
||||
|
||||
// Receive implements Reactor by handling 4 types of messages (look below).
|
||||
func (bcR *Reactor) Receive(e p2p.Envelope) {
|
||||
if err := ValidateMsg(e.Message); err != nil {
|
||||
bcR.Logger.Error("Peer sent us invalid msg", "peer", e.Src, "msg", e.Message, "err", err)
|
||||
bcR.Switch.StopPeerForError(e.Src, err)
|
||||
func (bcR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
msg, err := DecodeMsg(msgBytes)
|
||||
if err != nil {
|
||||
bcR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
|
||||
bcR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
|
||||
bcR.Logger.Debug("Receive", "e.Src", e.Src, "chID", e.ChannelID, "msg", e.Message)
|
||||
if err = ValidateMsg(msg); err != nil {
|
||||
bcR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err)
|
||||
bcR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
|
||||
switch msg := e.Message.(type) {
|
||||
bcR.Logger.Debug("Receive", "src", src, "chID", chID, "msg", msg)
|
||||
|
||||
switch msg := msg.(type) {
|
||||
case *bcproto.BlockRequest:
|
||||
bcR.respondToPeer(msg, e.Src)
|
||||
bcR.respondToPeer(msg, src)
|
||||
case *bcproto.BlockResponse:
|
||||
bi, err := types.BlockFromProto(msg.Block)
|
||||
if err != nil {
|
||||
bcR.Logger.Error("Block content is invalid", "err", err)
|
||||
return
|
||||
}
|
||||
bcR.pool.AddBlock(e.Src.ID(), bi, msg.Block.Size())
|
||||
bcR.pool.AddBlock(src.ID(), bi, len(msgBytes))
|
||||
case *bcproto.StatusRequest:
|
||||
// Send peer our state.
|
||||
e.Src.TrySend(p2p.Envelope{
|
||||
ChannelID: BlocksyncChannel,
|
||||
Message: &bcproto.StatusResponse{
|
||||
Height: bcR.store.Height(),
|
||||
Base: bcR.store.Base(),
|
||||
},
|
||||
msgBytes, err := EncodeMsg(&bcproto.StatusResponse{
|
||||
Height: bcR.store.Height(),
|
||||
Base: bcR.store.Base(),
|
||||
})
|
||||
if err != nil {
|
||||
bcR.Logger.Error("could not convert msg to protobut", "err", err)
|
||||
return
|
||||
}
|
||||
src.TrySend(BlocksyncChannel, msgBytes)
|
||||
case *bcproto.StatusResponse:
|
||||
// Got a peer status. Unverified.
|
||||
bcR.pool.SetPeerRange(e.Src.ID(), msg.Base, msg.Height)
|
||||
bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height)
|
||||
case *bcproto.NoBlockResponse:
|
||||
bcR.Logger.Debug("Peer does not have requested block", "peer", e.Src, "height", msg.Height)
|
||||
bcR.Logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height)
|
||||
default:
|
||||
bcR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
|
||||
}
|
||||
@@ -268,10 +285,13 @@ func (bcR *Reactor) poolRoutine(stateSynced bool) {
|
||||
if peer == nil {
|
||||
continue
|
||||
}
|
||||
queued := peer.TrySend(p2p.Envelope{
|
||||
ChannelID: BlocksyncChannel,
|
||||
Message: &bcproto.BlockRequest{Height: request.Height},
|
||||
})
|
||||
msgBytes, err := EncodeMsg(&bcproto.BlockRequest{Height: request.Height})
|
||||
if err != nil {
|
||||
bcR.Logger.Error("could not convert msg to proto", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
queued := peer.TrySend(BlocksyncChannel, msgBytes)
|
||||
if !queued {
|
||||
bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height)
|
||||
}
|
||||
@@ -410,9 +430,13 @@ FOR_LOOP:
|
||||
|
||||
// BroadcastStatusRequest broadcasts `BlockStore` base and height.
|
||||
func (bcR *Reactor) BroadcastStatusRequest() error {
|
||||
bcR.Switch.NewBroadcast(p2p.Envelope{
|
||||
ChannelID: BlocksyncChannel,
|
||||
Message: &bcproto.StatusRequest{},
|
||||
})
|
||||
bm, err := EncodeMsg(&bcproto.StatusRequest{})
|
||||
if err != nil {
|
||||
bcR.Logger.Error("could not convert msg to proto", "err", err)
|
||||
return fmt.Errorf("could not convert msg to proto: %w", err)
|
||||
}
|
||||
|
||||
bcR.Switch.Broadcast(BlocksyncChannel, bm)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -703,9 +703,6 @@ type MempoolConfig struct {
|
||||
// Mempool version to use:
|
||||
// 1) "v0" - (default) FIFO mempool.
|
||||
// 2) "v1" - prioritized mempool.
|
||||
// WARNING: There's a known memory leak with the prioritized mempool
|
||||
// that the team are working on. Read more here:
|
||||
// https://github.com/tendermint/tendermint/issues/8775
|
||||
Version string `mapstructure:"version"`
|
||||
RootDir string `mapstructure:"home"`
|
||||
Recheck bool `mapstructure:"recheck"`
|
||||
|
||||
@@ -26,7 +26,6 @@ import (
|
||||
mempoolv0 "github.com/tendermint/tendermint/mempool/v0"
|
||||
mempoolv1 "github.com/tendermint/tendermint/mempool/v1"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus"
|
||||
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
|
||||
sm "github.com/tendermint/tendermint/state"
|
||||
"github.com/tendermint/tendermint/store"
|
||||
@@ -166,16 +165,10 @@ func TestByzantinePrevoteEquivocation(t *testing.T) {
|
||||
for i, peer := range peerList {
|
||||
if i < len(peerList)/2 {
|
||||
bcs.Logger.Info("Signed and pushed vote", "vote", prevote1, "peer", peer)
|
||||
peer.Send(p2p.Envelope{
|
||||
Message: &tmcons.Vote{prevote1.ToProto()},
|
||||
ChannelID: VoteChannel,
|
||||
})
|
||||
peer.Send(VoteChannel, MustEncode(&VoteMessage{prevote1}))
|
||||
} else {
|
||||
bcs.Logger.Info("Signed and pushed vote", "vote", prevote2, "peer", peer)
|
||||
peer.Send(p2p.Envelope{
|
||||
Message: &tmcons.Vote{prevote2.ToProto()},
|
||||
ChannelID: VoteChannel,
|
||||
})
|
||||
peer.Send(VoteChannel, MustEncode(&VoteMessage{prevote2}))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -527,26 +520,18 @@ func sendProposalAndParts(
|
||||
parts *types.PartSet,
|
||||
) {
|
||||
// proposal
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: DataChannel,
|
||||
Message: &tmcons.Proposal{Proposal: *proposal.ToProto()},
|
||||
})
|
||||
msg := &ProposalMessage{Proposal: proposal}
|
||||
peer.Send(DataChannel, MustEncode(msg))
|
||||
|
||||
// parts
|
||||
for i := 0; i < int(parts.Total()); i++ {
|
||||
part := parts.GetPart(i)
|
||||
pp, err := part.ToProto()
|
||||
if err != nil {
|
||||
panic(err) // TODO: wbanfield better error handling
|
||||
msg := &BlockPartMessage{
|
||||
Height: height, // This tells peer that this part applies to us.
|
||||
Round: round, // This tells peer that this part applies to us.
|
||||
Part: part,
|
||||
}
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: DataChannel,
|
||||
Message: &tmcons.BlockPart{
|
||||
Height: height, // This tells peer that this part applies to us.
|
||||
Round: round, // This tells peer that this part applies to us.
|
||||
Part: *pp,
|
||||
},
|
||||
})
|
||||
peer.Send(DataChannel, MustEncode(msg))
|
||||
}
|
||||
|
||||
// votes
|
||||
@@ -554,14 +539,9 @@ func sendProposalAndParts(
|
||||
prevote, _ := cs.signVote(tmproto.PrevoteType, blockHash, parts.Header())
|
||||
precommit, _ := cs.signVote(tmproto.PrecommitType, blockHash, parts.Header())
|
||||
cs.mtx.Unlock()
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: VoteChannel,
|
||||
Message: &tmcons.Vote{prevote.ToProto()},
|
||||
})
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: VoteChannel,
|
||||
Message: &tmcons.Vote{precommit.ToProto()},
|
||||
})
|
||||
|
||||
peer.Send(VoteChannel, MustEncode(&VoteMessage{prevote}))
|
||||
peer.Send(VoteChannel, MustEncode(&VoteMessage{precommit}))
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@@ -599,7 +579,7 @@ func (br *ByzantineReactor) AddPeer(peer p2p.Peer) {
|
||||
func (br *ByzantineReactor) RemovePeer(peer p2p.Peer, reason interface{}) {
|
||||
br.reactor.RemovePeer(peer, reason)
|
||||
}
|
||||
func (br *ByzantineReactor) Receive(e p2p.Envelope) {
|
||||
br.reactor.Receive(e)
|
||||
func (br *ByzantineReactor) Receive(chID byte, peer p2p.Peer, msgBytes []byte) {
|
||||
br.reactor.Receive(chID, peer, msgBytes)
|
||||
}
|
||||
func (br *ByzantineReactor) InitPeer(peer p2p.Peer) p2p.Peer { return peer }
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
tmrand "github.com/tendermint/tendermint/libs/rand"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus"
|
||||
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
@@ -95,10 +94,7 @@ func invalidDoPrevoteFunc(t *testing.T, height int64, round int32, cs *State, sw
|
||||
peers := sw.Peers().List()
|
||||
for _, peer := range peers {
|
||||
cs.Logger.Info("Sending bad vote", "block", blockHash, "peer", peer)
|
||||
peer.Send(p2p.Envelope{
|
||||
Message: &tmcons.Vote{precommit.ToProto()},
|
||||
ChannelID: VoteChannel,
|
||||
})
|
||||
peer.Send(VoteChannel, MustEncode(&VoteMessage{precommit}))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
|
||||
cstypes "github.com/tendermint/tendermint/consensus/types"
|
||||
"github.com/tendermint/tendermint/libs/bits"
|
||||
tmmath "github.com/tendermint/tendermint/libs/math"
|
||||
@@ -13,9 +15,7 @@ import (
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
|
||||
// MsgToProto takes a consensus message type and returns the proto defined consensus message.
|
||||
//
|
||||
// TODO: This needs to be removed, but WALToProto depends on this.
|
||||
// MsgToProto takes a consensus message type and returns the proto defined consensus message
|
||||
func MsgToProto(msg Message) (*tmcons.Message, error) {
|
||||
if msg == nil {
|
||||
return nil, errors.New("consensus: message is nil")
|
||||
@@ -260,6 +260,20 @@ func MsgFromProto(msg *tmcons.Message) (Message, error) {
|
||||
return pb, nil
|
||||
}
|
||||
|
||||
// MustEncode takes the reactors msg, makes it proto and marshals it
|
||||
// this mimics `MustMarshalBinaryBare` in that is panics on error
|
||||
func MustEncode(msg Message) []byte {
|
||||
pb, err := MsgToProto(msg)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
enc, err := proto.Marshal(pb)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return enc
|
||||
}
|
||||
|
||||
// WALToProto takes a WAL message and return a proto walMessage and error
|
||||
func WALToProto(msg WALMessage) (*tmcons.WALMessage, error) {
|
||||
var pb tmcons.WALMessage
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
|
||||
cstypes "github.com/tendermint/tendermint/consensus/types"
|
||||
"github.com/tendermint/tendermint/libs/bits"
|
||||
tmevents "github.com/tendermint/tendermint/libs/events"
|
||||
@@ -146,7 +148,6 @@ func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
Priority: 6,
|
||||
SendQueueCapacity: 100,
|
||||
RecvMessageCapacity: maxMsgSize,
|
||||
MessageType: &tmcons.Message{},
|
||||
},
|
||||
{
|
||||
ID: DataChannel, // maybe split between gossiping current block and catchup stuff
|
||||
@@ -155,7 +156,6 @@ func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
SendQueueCapacity: 100,
|
||||
RecvBufferCapacity: 50 * 4096,
|
||||
RecvMessageCapacity: maxMsgSize,
|
||||
MessageType: &tmcons.Message{},
|
||||
},
|
||||
{
|
||||
ID: VoteChannel,
|
||||
@@ -163,7 +163,6 @@ func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
SendQueueCapacity: 100,
|
||||
RecvBufferCapacity: 100 * 100,
|
||||
RecvMessageCapacity: maxMsgSize,
|
||||
MessageType: &tmcons.Message{},
|
||||
},
|
||||
{
|
||||
ID: VoteSetBitsChannel,
|
||||
@@ -171,7 +170,6 @@ func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
SendQueueCapacity: 2,
|
||||
RecvBufferCapacity: 1024,
|
||||
RecvMessageCapacity: maxMsgSize,
|
||||
MessageType: &tmcons.Message{},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -225,44 +223,34 @@ func (conR *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {
|
||||
// Peer state updates can happen in parallel, but processing of
|
||||
// proposals, block parts, and votes are ordered by the receiveRoutine
|
||||
// NOTE: blocks on consensus state for proposals, block parts, and votes
|
||||
func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
if !conR.IsRunning() {
|
||||
conR.Logger.Debug("Receive", "src", e.Src, "chId", e.ChannelID)
|
||||
conR.Logger.Debug("Receive", "src", src, "chId", chID, "bytes", msgBytes)
|
||||
return
|
||||
}
|
||||
|
||||
if w, ok := e.Message.(p2p.Wrapper); ok {
|
||||
var err error
|
||||
e.Message, err = w.Wrap()
|
||||
if err != nil {
|
||||
conR.Logger.Error("Error wrapping message", "src", e.Src, "chId", e.ChannelID, "err", err)
|
||||
conR.Switch.StopPeerForError(e.Src, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
msg, err := MsgFromProto(e.Message.(*tmcons.Message))
|
||||
msg, err := decodeMsg(msgBytes)
|
||||
if err != nil {
|
||||
conR.Logger.Error("Error decoding message", "src", e.Src, "chId", e.ChannelID, "err", err)
|
||||
conR.Switch.StopPeerForError(e.Src, err)
|
||||
conR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
|
||||
conR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err = msg.ValidateBasic(); err != nil {
|
||||
conR.Logger.Error("Peer sent us invalid msg", "peer", e.Src, "msg", e.Message, "err", err)
|
||||
conR.Switch.StopPeerForError(e.Src, err)
|
||||
conR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err)
|
||||
conR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
|
||||
conR.Logger.Debug("Receive", "src", e.Src, "chId", e.ChannelID, "msg", msg)
|
||||
conR.Logger.Debug("Receive", "src", src, "chId", chID, "msg", msg)
|
||||
|
||||
// Get peer states
|
||||
ps, ok := e.Src.Get(types.PeerStateKey).(*PeerState)
|
||||
ps, ok := src.Get(types.PeerStateKey).(*PeerState)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("Peer %v has no state", e.Src))
|
||||
panic(fmt.Sprintf("Peer %v has no state", src))
|
||||
}
|
||||
|
||||
switch e.ChannelID {
|
||||
switch chID {
|
||||
case StateChannel:
|
||||
switch msg := msg.(type) {
|
||||
case *NewRoundStepMessage:
|
||||
@@ -270,8 +258,8 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
initialHeight := conR.conS.state.InitialHeight
|
||||
conR.conS.mtx.Unlock()
|
||||
if err = msg.ValidateHeight(initialHeight); err != nil {
|
||||
conR.Logger.Error("Peer sent us invalid msg", "peer", e.Src, "msg", msg, "err", err)
|
||||
conR.Switch.StopPeerForError(e.Src, err)
|
||||
conR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err)
|
||||
conR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
ps.ApplyNewRoundStepMessage(msg)
|
||||
@@ -290,7 +278,7 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
// Peer claims to have a maj23 for some BlockID at H,R,S,
|
||||
err := votes.SetPeerMaj23(msg.Round, msg.Type, ps.peer.ID(), msg.BlockID)
|
||||
if err != nil {
|
||||
conR.Switch.StopPeerForError(e.Src, err)
|
||||
conR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
// Respond with a VoteSetBitsMessage showing which votes we have.
|
||||
@@ -304,19 +292,13 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
default:
|
||||
panic("Bad VoteSetBitsMessage field Type. Forgot to add a check in ValidateBasic?")
|
||||
}
|
||||
eMsg := &tmcons.VoteSetBits{
|
||||
src.TrySend(VoteSetBitsChannel, MustEncode(&VoteSetBitsMessage{
|
||||
Height: msg.Height,
|
||||
Round: msg.Round,
|
||||
Type: msg.Type,
|
||||
BlockID: msg.BlockID.ToProto(),
|
||||
}
|
||||
if votes := ourVotes.ToProto(); votes != nil {
|
||||
eMsg.Votes = *votes
|
||||
}
|
||||
e.Src.TrySend(p2p.Envelope{
|
||||
ChannelID: VoteSetBitsChannel,
|
||||
Message: eMsg,
|
||||
})
|
||||
BlockID: msg.BlockID,
|
||||
Votes: ourVotes,
|
||||
}))
|
||||
default:
|
||||
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
|
||||
}
|
||||
@@ -329,13 +311,13 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
switch msg := msg.(type) {
|
||||
case *ProposalMessage:
|
||||
ps.SetHasProposal(msg.Proposal)
|
||||
conR.conS.peerMsgQueue <- msgInfo{msg, e.Src.ID()}
|
||||
conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()}
|
||||
case *ProposalPOLMessage:
|
||||
ps.ApplyProposalPOLMessage(msg)
|
||||
case *BlockPartMessage:
|
||||
ps.SetHasProposalBlockPart(msg.Height, msg.Round, int(msg.Part.Index))
|
||||
conR.Metrics.BlockParts.With("peer_id", string(e.Src.ID())).Add(1)
|
||||
conR.conS.peerMsgQueue <- msgInfo{msg, e.Src.ID()}
|
||||
conR.Metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
|
||||
conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()}
|
||||
default:
|
||||
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
|
||||
}
|
||||
@@ -355,7 +337,7 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
ps.EnsureVoteBitArrays(height-1, lastCommitSize)
|
||||
ps.SetHasVote(msg.Vote)
|
||||
|
||||
cs.peerMsgQueue <- msgInfo{msg, e.Src.ID()}
|
||||
cs.peerMsgQueue <- msgInfo{msg, src.ID()}
|
||||
|
||||
default:
|
||||
// don't punish (leave room for soft upgrades)
|
||||
@@ -394,7 +376,7 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
|
||||
}
|
||||
|
||||
default:
|
||||
conR.Logger.Error(fmt.Sprintf("Unknown chId %X", e.ChannelID))
|
||||
conR.Logger.Error(fmt.Sprintf("Unknown chId %X", chID))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -448,39 +430,29 @@ func (conR *Reactor) unsubscribeFromBroadcastEvents() {
|
||||
|
||||
func (conR *Reactor) broadcastNewRoundStepMessage(rs *cstypes.RoundState) {
|
||||
nrsMsg := makeRoundStepMessage(rs)
|
||||
conR.Switch.NewBroadcast(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: nrsMsg,
|
||||
})
|
||||
conR.Switch.Broadcast(StateChannel, MustEncode(nrsMsg))
|
||||
}
|
||||
|
||||
func (conR *Reactor) broadcastNewValidBlockMessage(rs *cstypes.RoundState) {
|
||||
psh := rs.ProposalBlockParts.Header()
|
||||
csMsg := &tmcons.NewValidBlock{
|
||||
csMsg := &NewValidBlockMessage{
|
||||
Height: rs.Height,
|
||||
Round: rs.Round,
|
||||
BlockPartSetHeader: psh.ToProto(),
|
||||
BlockParts: rs.ProposalBlockParts.BitArray().ToProto(),
|
||||
BlockPartSetHeader: rs.ProposalBlockParts.Header(),
|
||||
BlockParts: rs.ProposalBlockParts.BitArray(),
|
||||
IsCommit: rs.Step == cstypes.RoundStepCommit,
|
||||
}
|
||||
conR.Switch.NewBroadcast(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: csMsg,
|
||||
})
|
||||
conR.Switch.Broadcast(StateChannel, MustEncode(csMsg))
|
||||
}
|
||||
|
||||
// Broadcasts HasVoteMessage to peers that care.
|
||||
func (conR *Reactor) broadcastHasVoteMessage(vote *types.Vote) {
|
||||
msg := &tmcons.HasVote{
|
||||
msg := &HasVoteMessage{
|
||||
Height: vote.Height,
|
||||
Round: vote.Round,
|
||||
Type: vote.Type,
|
||||
Index: vote.ValidatorIndex,
|
||||
}
|
||||
conR.Switch.NewBroadcast(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: msg,
|
||||
})
|
||||
conR.Switch.Broadcast(StateChannel, MustEncode(msg))
|
||||
/*
|
||||
// TODO: Make this broadcast more selective.
|
||||
for _, peer := range conR.Switch.Peers().List() {
|
||||
@@ -491,11 +463,7 @@ func (conR *Reactor) broadcastHasVoteMessage(vote *types.Vote) {
|
||||
prs := ps.GetRoundState()
|
||||
if prs.Height == vote.Height {
|
||||
// TODO: Also filter on round?
|
||||
e := p2p.Envelope{
|
||||
ChannelID: StateChannel, struct{ ConsensusMessage }{msg},
|
||||
Message: p,
|
||||
}
|
||||
peer.TrySend(e)
|
||||
peer.TrySend(StateChannel, struct{ ConsensusMessage }{msg})
|
||||
} else {
|
||||
// Height doesn't match
|
||||
// TODO: check a field, maybe CatchupCommitRound?
|
||||
@@ -505,11 +473,11 @@ func (conR *Reactor) broadcastHasVoteMessage(vote *types.Vote) {
|
||||
*/
|
||||
}
|
||||
|
||||
func makeRoundStepMessage(rs *cstypes.RoundState) (nrsMsg *tmcons.NewRoundStep) {
|
||||
nrsMsg = &tmcons.NewRoundStep{
|
||||
func makeRoundStepMessage(rs *cstypes.RoundState) (nrsMsg *NewRoundStepMessage) {
|
||||
nrsMsg = &NewRoundStepMessage{
|
||||
Height: rs.Height,
|
||||
Round: rs.Round,
|
||||
Step: uint32(rs.Step),
|
||||
Step: rs.Step,
|
||||
SecondsSinceStartTime: int64(time.Since(rs.StartTime).Seconds()),
|
||||
LastCommitRound: rs.LastCommit.GetRound(),
|
||||
}
|
||||
@@ -519,10 +487,7 @@ func makeRoundStepMessage(rs *cstypes.RoundState) (nrsMsg *tmcons.NewRoundStep)
|
||||
func (conR *Reactor) sendNewRoundStepMessage(peer p2p.Peer) {
|
||||
rs := conR.getRoundState()
|
||||
nrsMsg := makeRoundStepMessage(rs)
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: nrsMsg,
|
||||
})
|
||||
peer.Send(StateChannel, MustEncode(nrsMsg))
|
||||
}
|
||||
|
||||
func (conR *Reactor) updateRoundStateRoutine() {
|
||||
@@ -561,19 +526,13 @@ OUTER_LOOP:
|
||||
if rs.ProposalBlockParts.HasHeader(prs.ProposalBlockPartSetHeader) {
|
||||
if index, ok := rs.ProposalBlockParts.BitArray().Sub(prs.ProposalBlockParts.Copy()).PickRandom(); ok {
|
||||
part := rs.ProposalBlockParts.GetPart(index)
|
||||
parts, err := part.ToProto()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
msg := &BlockPartMessage{
|
||||
Height: rs.Height, // This tells peer that this part applies to us.
|
||||
Round: rs.Round, // This tells peer that this part applies to us.
|
||||
Part: part,
|
||||
}
|
||||
logger.Debug("Sending block part", "height", prs.Height, "round", prs.Round)
|
||||
if peer.Send(p2p.Envelope{
|
||||
ChannelID: DataChannel,
|
||||
Message: &tmcons.BlockPart{
|
||||
Height: rs.Height, // This tells peer that this part applies to us.
|
||||
Round: rs.Round, // This tells peer that this part applies to us.
|
||||
Part: *parts,
|
||||
},
|
||||
}) {
|
||||
if peer.Send(DataChannel, MustEncode(msg)) {
|
||||
ps.SetHasProposalBlockPart(prs.Height, prs.Round, index)
|
||||
}
|
||||
continue OUTER_LOOP
|
||||
@@ -619,11 +578,9 @@ OUTER_LOOP:
|
||||
if rs.Proposal != nil && !prs.Proposal {
|
||||
// Proposal: share the proposal metadata with peer.
|
||||
{
|
||||
msg := &ProposalMessage{Proposal: rs.Proposal}
|
||||
logger.Debug("Sending proposal", "height", prs.Height, "round", prs.Round)
|
||||
if peer.Send(p2p.Envelope{
|
||||
ChannelID: DataChannel,
|
||||
Message: &tmcons.Proposal{Proposal: *rs.Proposal.ToProto()},
|
||||
}) {
|
||||
if peer.Send(DataChannel, MustEncode(msg)) {
|
||||
// NOTE[ZM]: A peer might have received different proposal msg so this Proposal msg will be rejected!
|
||||
ps.SetHasProposal(rs.Proposal)
|
||||
}
|
||||
@@ -633,15 +590,13 @@ OUTER_LOOP:
|
||||
// rs.Proposal was validated, so rs.Proposal.POLRound <= rs.Round,
|
||||
// so we definitely have rs.Votes.Prevotes(rs.Proposal.POLRound).
|
||||
if 0 <= rs.Proposal.POLRound {
|
||||
msg := &ProposalPOLMessage{
|
||||
Height: rs.Height,
|
||||
ProposalPOLRound: rs.Proposal.POLRound,
|
||||
ProposalPOL: rs.Votes.Prevotes(rs.Proposal.POLRound).BitArray(),
|
||||
}
|
||||
logger.Debug("Sending POL", "height", prs.Height, "round", prs.Round)
|
||||
peer.Send(p2p.Envelope{
|
||||
ChannelID: DataChannel,
|
||||
Message: &tmcons.ProposalPOL{
|
||||
Height: rs.Height,
|
||||
ProposalPolRound: rs.Proposal.POLRound,
|
||||
ProposalPol: *rs.Votes.Prevotes(rs.Proposal.POLRound).BitArray().ToProto(),
|
||||
},
|
||||
})
|
||||
peer.Send(DataChannel, MustEncode(msg))
|
||||
}
|
||||
continue OUTER_LOOP
|
||||
}
|
||||
@@ -678,20 +633,13 @@ func (conR *Reactor) gossipDataForCatchup(logger log.Logger, rs *cstypes.RoundSt
|
||||
return
|
||||
}
|
||||
// Send the part
|
||||
logger.Debug("Sending block part for catchup", "round", prs.Round, "index", index)
|
||||
pp, err := part.ToProto()
|
||||
if err != nil {
|
||||
logger.Error("Could not convert part to proto", "index", index, "error", err)
|
||||
return
|
||||
msg := &BlockPartMessage{
|
||||
Height: prs.Height, // Not our height, so it doesn't matter.
|
||||
Round: prs.Round, // Not our height, so it doesn't matter.
|
||||
Part: part,
|
||||
}
|
||||
if peer.Send(p2p.Envelope{
|
||||
ChannelID: DataChannel,
|
||||
Message: &tmcons.BlockPart{
|
||||
Height: prs.Height, // Not our height, so it doesn't matter.
|
||||
Round: prs.Round, // Not our height, so it doesn't matter.
|
||||
Part: *pp,
|
||||
},
|
||||
}) {
|
||||
logger.Debug("Sending block part for catchup", "round", prs.Round, "index", index)
|
||||
if peer.Send(DataChannel, MustEncode(msg)) {
|
||||
ps.SetHasProposalBlockPart(prs.Height, prs.Round, index)
|
||||
} else {
|
||||
logger.Debug("Sending block part for catchup failed")
|
||||
@@ -850,16 +798,12 @@ OUTER_LOOP:
|
||||
prs := ps.GetRoundState()
|
||||
if rs.Height == prs.Height {
|
||||
if maj23, ok := rs.Votes.Prevotes(prs.Round).TwoThirdsMajority(); ok {
|
||||
|
||||
peer.TrySend(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: &tmcons.VoteSetMaj23{
|
||||
Height: prs.Height,
|
||||
Round: prs.Round,
|
||||
Type: tmproto.PrevoteType,
|
||||
BlockID: maj23.ToProto(),
|
||||
},
|
||||
})
|
||||
peer.TrySend(StateChannel, MustEncode(&VoteSetMaj23Message{
|
||||
Height: prs.Height,
|
||||
Round: prs.Round,
|
||||
Type: tmproto.PrevoteType,
|
||||
BlockID: maj23,
|
||||
}))
|
||||
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
|
||||
}
|
||||
}
|
||||
@@ -871,15 +815,12 @@ OUTER_LOOP:
|
||||
prs := ps.GetRoundState()
|
||||
if rs.Height == prs.Height {
|
||||
if maj23, ok := rs.Votes.Precommits(prs.Round).TwoThirdsMajority(); ok {
|
||||
peer.TrySend(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: &tmcons.VoteSetMaj23{
|
||||
Height: prs.Height,
|
||||
Round: prs.Round,
|
||||
Type: tmproto.PrecommitType,
|
||||
BlockID: maj23.ToProto(),
|
||||
},
|
||||
})
|
||||
peer.TrySend(StateChannel, MustEncode(&VoteSetMaj23Message{
|
||||
Height: prs.Height,
|
||||
Round: prs.Round,
|
||||
Type: tmproto.PrecommitType,
|
||||
BlockID: maj23,
|
||||
}))
|
||||
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
|
||||
}
|
||||
}
|
||||
@@ -891,16 +832,12 @@ OUTER_LOOP:
|
||||
prs := ps.GetRoundState()
|
||||
if rs.Height == prs.Height && prs.ProposalPOLRound >= 0 {
|
||||
if maj23, ok := rs.Votes.Prevotes(prs.ProposalPOLRound).TwoThirdsMajority(); ok {
|
||||
|
||||
peer.TrySend(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: &tmcons.VoteSetMaj23{
|
||||
Height: prs.Height,
|
||||
Round: prs.ProposalPOLRound,
|
||||
Type: tmproto.PrevoteType,
|
||||
BlockID: maj23.ToProto(),
|
||||
},
|
||||
})
|
||||
peer.TrySend(StateChannel, MustEncode(&VoteSetMaj23Message{
|
||||
Height: prs.Height,
|
||||
Round: prs.ProposalPOLRound,
|
||||
Type: tmproto.PrevoteType,
|
||||
BlockID: maj23,
|
||||
}))
|
||||
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
|
||||
}
|
||||
}
|
||||
@@ -915,15 +852,12 @@ OUTER_LOOP:
|
||||
if prs.CatchupCommitRound != -1 && prs.Height > 0 && prs.Height <= conR.conS.blockStore.Height() &&
|
||||
prs.Height >= conR.conS.blockStore.Base() {
|
||||
if commit := conR.conS.LoadCommit(prs.Height); commit != nil {
|
||||
peer.TrySend(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Message: &tmcons.VoteSetMaj23{
|
||||
Height: prs.Height,
|
||||
Round: commit.Round,
|
||||
Type: tmproto.PrecommitType,
|
||||
BlockID: commit.BlockID.ToProto(),
|
||||
},
|
||||
})
|
||||
peer.TrySend(StateChannel, MustEncode(&VoteSetMaj23Message{
|
||||
Height: prs.Height,
|
||||
Round: commit.Round,
|
||||
Type: tmproto.PrecommitType,
|
||||
BlockID: commit.BlockID,
|
||||
}))
|
||||
time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration)
|
||||
}
|
||||
}
|
||||
@@ -1137,13 +1071,9 @@ func (ps *PeerState) SetHasProposalBlockPart(height int64, round int32, index in
|
||||
// Returns true if vote was sent.
|
||||
func (ps *PeerState) PickSendVote(votes types.VoteSetReader) bool {
|
||||
if vote, ok := ps.PickVoteToSend(votes); ok {
|
||||
msg := &VoteMessage{vote}
|
||||
ps.logger.Debug("Sending vote message", "ps", ps, "vote", vote)
|
||||
if ps.peer.Send(p2p.Envelope{
|
||||
ChannelID: VoteChannel,
|
||||
Message: &tmcons.Vote{
|
||||
Vote: vote.ToProto(),
|
||||
},
|
||||
}) {
|
||||
if ps.peer.Send(VoteChannel, MustEncode(msg)) {
|
||||
ps.SetHasVote(vote)
|
||||
return true
|
||||
}
|
||||
@@ -1509,6 +1439,15 @@ func init() {
|
||||
tmjson.RegisterType(&VoteSetBitsMessage{}, "tendermint/VoteSetBits")
|
||||
}
|
||||
|
||||
func decodeMsg(bz []byte) (msg Message, err error) {
|
||||
pb := &tmcons.Message{}
|
||||
if err = proto.Unmarshal(bz, pb); err != nil {
|
||||
return msg, err
|
||||
}
|
||||
|
||||
return MsgFromProto(pb)
|
||||
}
|
||||
|
||||
//-------------------------------------
|
||||
|
||||
// NewRoundStepMessage is sent for every step taken in the ConsensusState.
|
||||
|
||||
@@ -33,7 +33,6 @@ import (
|
||||
mempoolv1 "github.com/tendermint/tendermint/mempool/v1"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
p2pmock "github.com/tendermint/tendermint/p2p/mock"
|
||||
tmcons "github.com/tendermint/tendermint/proto/tendermint/consensus"
|
||||
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
|
||||
sm "github.com/tendermint/tendermint/state"
|
||||
statemocks "github.com/tendermint/tendermint/state/mocks"
|
||||
@@ -266,18 +265,15 @@ func TestReactorReceiveDoesNotPanicIfAddPeerHasntBeenCalledYet(t *testing.T) {
|
||||
var (
|
||||
reactor = reactors[0]
|
||||
peer = p2pmock.NewPeer(nil)
|
||||
msg = MustEncode(&HasVoteMessage{Height: 1,
|
||||
Round: 1, Index: 1, Type: tmproto.PrevoteType})
|
||||
)
|
||||
|
||||
reactor.InitPeer(peer)
|
||||
|
||||
// simulate switch calling Receive before AddPeer
|
||||
assert.NotPanics(t, func() {
|
||||
reactor.Receive(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Src: peer,
|
||||
Message: &tmcons.HasVote{Height: 1,
|
||||
Round: 1, Index: 1, Type: tmproto.PrevoteType},
|
||||
})
|
||||
reactor.Receive(StateChannel, peer, msg)
|
||||
reactor.AddPeer(peer)
|
||||
})
|
||||
}
|
||||
@@ -292,18 +288,15 @@ func TestReactorReceivePanicsIfInitPeerHasntBeenCalledYet(t *testing.T) {
|
||||
var (
|
||||
reactor = reactors[0]
|
||||
peer = p2pmock.NewPeer(nil)
|
||||
msg = MustEncode(&HasVoteMessage{Height: 1,
|
||||
Round: 1, Index: 1, Type: tmproto.PrevoteType})
|
||||
)
|
||||
|
||||
// we should call InitPeer here
|
||||
|
||||
// simulate switch calling Receive before AddPeer
|
||||
assert.Panics(t, func() {
|
||||
reactor.Receive(p2p.Envelope{
|
||||
ChannelID: StateChannel,
|
||||
Src: peer,
|
||||
Message: &tmcons.HasVote{Height: 1,
|
||||
Round: 1, Index: 1, Type: tmproto.PrevoteType},
|
||||
})
|
||||
reactor.Receive(StateChannel, peer, msg)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -99,4 +99,4 @@ configuration file that we can update with PRs.
|
||||
Because the build processes are identical (as is the information contained
|
||||
herein), this file should be kept in sync as much as possible with its
|
||||
[counterpart in the Cosmos SDK
|
||||
repo](https://github.com/cosmos/cosmos-sdk/blob/master/docs/DOCS_README.md).
|
||||
repo](https://github.com/cosmos/cosmos-sdk/blob/main/docs/README.md).
|
||||
|
||||
@@ -61,7 +61,7 @@ The following protocols and application features require a reliable source of ti
|
||||
* Tendermint Light Clients [rely on correspondence between their known time](https://github.com/tendermint/tendermint/blob/main/spec/light-client/verification/README.md#definitions-1) and the block time for block verification.
|
||||
* Tendermint Evidence validity is determined [either in terms of heights or in terms of time](https://github.com/tendermint/tendermint/blob/8029cf7a0fcc89a5004e173ec065aa48ad5ba3c8/spec/consensus/evidence.md#verification).
|
||||
* Unbonding of staked assets in the Cosmos Hub [occurs after a period of 21 days](https://github.com/cosmos/governance/blob/ce75de4019b0129f6efcbb0e752cd2cc9e6136d3/params-change/Staking.md#unbondingtime).
|
||||
* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.44/ibc/overview.html#acknowledgements)
|
||||
* IBC packets can use either a [timestamp or a height to timeout packet delivery](https://docs.cosmos.network/v0.45/ibc/overview.html#acknowledgements)
|
||||
|
||||
Finally, inflation distribution in the Cosmos Hub uses an approximation of time to calculate an annual percentage rate.
|
||||
This approximation of time is calculated using [block heights with an estimated number of blocks produced in a year](https://github.com/cosmos/governance/blob/master/params-change/Mint.md#blocksperyear).
|
||||
|
||||
23
docs/qa/README.md
Normal file
@@ -0,0 +1,23 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance
|
||||
description: This is a report on the process followed and results obtained when running v0.34.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# Tendermint Quality Assurance
|
||||
|
||||
This directory keeps track of the process followed by the Tendermint Core team
|
||||
for Quality Assurance before cutting a release.
|
||||
This directory is to live in multiple branches. On each release branch,
|
||||
the contents of this directory reflect the status of the process
|
||||
at the time the Quality Assurance process was applied for that release.
|
||||
|
||||
File [method](./method.md) keeps track of the process followed to obtain the results
|
||||
used to decide if a release is passing the Quality Assurance process.
|
||||
The results obtained in each release are stored in their own directory.
|
||||
The following releases have undergone the Quality Assurance process:
|
||||
|
||||
* [v0.34.x](./v034/), which was tested just before releasing v0.34.22
|
||||
* [v0.37.x](./v037/), with v.34.x acting as a baseline
|
||||
214
docs/qa/method.md
Normal file
@@ -0,0 +1,214 @@
|
||||
---
|
||||
order: 1
|
||||
title: Method
|
||||
---
|
||||
|
||||
# Method
|
||||
|
||||
This document provides a detailed description of the QA process.
|
||||
It is intended to be used by engineers reproducing the experimental setup for future tests of Tendermint.
|
||||
|
||||
The (first iteration of the) QA process as described [in the RELEASES.md document][releases]
|
||||
was applied to version v0.34.x in order to have a set of results acting as benchmarking baseline.
|
||||
This baseline is then compared with results obtained in later versions.
|
||||
|
||||
Out of the testnet-based test cases described in [the releases document][releases] we focused on two of them:
|
||||
_200 Node Test_, and _Rotating Nodes Test_.
|
||||
|
||||
[releases]: https://github.com/tendermint/tendermint/blob/v0.37.x/RELEASES.md#large-scale-testnets
|
||||
|
||||
## Software Dependencies
|
||||
|
||||
### Infrastructure Requirements to Run the Tests
|
||||
|
||||
* An account at Digital Ocean (DO), with a high droplet limit (>202)
|
||||
* The machine to orchestrate the tests should have the following installed:
|
||||
* A clone of the [testnet repository][testnet-repo]
|
||||
* This repository contains all the scripts mentioned in the reminder of this section
|
||||
* [Digital Ocean CLI][doctl]
|
||||
* [Terraform CLI][Terraform]
|
||||
* [Ansible CLI][Ansible]
|
||||
|
||||
[testnet-repo]: https://github.com/interchainio/tendermint-testnet
|
||||
[Ansible]: https://docs.ansible.com/ansible/latest/index.html
|
||||
[Terraform]: https://www.terraform.io/docs
|
||||
[doctl]: https://docs.digitalocean.com/reference/doctl/how-to/install/
|
||||
|
||||
### Requirements for Result Extraction
|
||||
|
||||
* Matlab or Octave
|
||||
* [Prometheus][prometheus] server installed
|
||||
* blockstore DB of one of the full nodes in the testnet
|
||||
* Prometheus DB
|
||||
|
||||
[prometheus]: https://prometheus.io/
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Running the test
|
||||
|
||||
This section explains how the tests were carried out for reproducibility purposes.
|
||||
|
||||
1. [If you haven't done it before]
|
||||
Follow steps 1-4 of the `README.md` at the top of the testnet repository to configure Terraform, and `doctl`.
|
||||
2. Copy file `testnets/testnet200.toml` onto `testnet.toml` (do NOT commit this change)
|
||||
3. Set the variable `VERSION_TAG` in the `Makefile` to the git hash that is to be tested.
|
||||
4. Follow steps 5-10 of the `README.md` to configure and start the 200 node testnet
|
||||
* WARNING: Do NOT forget to run `make terraform-destroy` as soon as you are done with the tests (see step 9)
|
||||
5. As a sanity check, connect to the Prometheus node's web interface and check the graph for the `tendermint_consensus_height` metric.
|
||||
All nodes should be increasing their heights.
|
||||
6. `ssh` into the `testnet-load-runner`, then copy script `script/200-node-loadscript.sh` and run it from the load runner node.
|
||||
* Before running it, you need to edit the script to provide the IP address of a full node.
|
||||
This node will receive all transactions from the load runner node.
|
||||
* This script will take about 40 mins to run
|
||||
* It is running 90-seconds-long experiments in a loop with different loads
|
||||
7. Run `make retrieve-data` to gather all relevant data from the testnet into the orchestrating machine
|
||||
8. Verify that the data was collected without errors
|
||||
* at least one blockstore DB for a Tendermint validator
|
||||
* the Prometheus database from the Prometheus node
|
||||
* for extra care, you can run `zip -T` on the `prometheus.zip` file and (one of) the `blockstore.db.zip` file(s)
|
||||
9. **Run `make terraform-destroy`**
|
||||
* Don't forget to type `yes`! Otherwise you're in trouble.
|
||||
|
||||
### Result Extraction
|
||||
|
||||
The method for extracting the results described here is highly manual (and exploratory) at this stage.
|
||||
The Core team should improve it at every iteration to increase the amount of automation.
|
||||
|
||||
#### Steps
|
||||
|
||||
1. Unzip the blockstore into a directory
|
||||
2. Extract the latency report and the raw latencies for all the experiments. Run these commands from the directory containing the blockstore
|
||||
* `go run github.com/tendermint/tendermint/test/loadtime/cmd/report@3ec6e424d --database-type goleveldb --data-dir ./ > results/report.txt`
|
||||
* `go run github.com/tendermint/tendermint/test/loadtime/cmd/report@3ec6e424d --database-type goleveldb --data-dir ./ --csv results/raw.csv`
|
||||
3. File `report.txt` contains an unordered list of experiments with varying concurrent connections and transaction rate
|
||||
* Create files `report01.txt`, `report02.txt`, `report04.txt` and, for each experiment in file `report.txt`,
|
||||
copy its related lines to the filename that matches the number of connections.
|
||||
* Sort the experiments in `report01.txt` in ascending tx rate order. Likewise for `report02.txt` and `report04.txt`.
|
||||
4. Generate file `report_tabbed.txt` by showing the contents `report01.txt`, `report02.txt`, `report04.txt` side by side
|
||||
* This effectively creates a table where rows are a particular tx rate and columns are a particular number of websocket connections.
|
||||
5. Extract the raw latencies from file `raw.csv` using the following bash loop. This creates a `.csv` file and a `.dat` file per experiment.
|
||||
The format of the `.dat` files is amenable to loading them as matrices in Octave
|
||||
|
||||
```bash
|
||||
uuids=($(cat report01.txt report02.txt report04.txt | grep '^Experiment ID: ' | awk '{ print $3 }'))
|
||||
c=1
|
||||
for i in 01 02 04; do
|
||||
for j in 0025 0050 0100 0200; do
|
||||
echo $i $j $c "${uuids[$c]}"
|
||||
filename=c${i}_r${j}
|
||||
grep ${uuids[$c]} raw.csv > ${filename}.csv
|
||||
cat ${filename}.csv | tr , ' ' | awk '{ print $2, $3 }' > ${filename}.dat
|
||||
c=$(expr $c + 1)
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
6. Enter Octave
|
||||
7. Load all `.dat` files generated in step 5 into matrices using this Octave code snippet
|
||||
|
||||
```octave
|
||||
conns = { "01"; "02"; "04" };
|
||||
rates = { "0025"; "0050"; "0100"; "0200" };
|
||||
for i = 1:length(conns)
|
||||
for j = 1:length(rates)
|
||||
filename = strcat("c", conns{i}, "_r", rates{j}, ".dat");
|
||||
load("-ascii", filename);
|
||||
endfor
|
||||
endfor
|
||||
```
|
||||
|
||||
8. Set variable release to the current release undergoing QA
|
||||
|
||||
```octave
|
||||
release = "v0.34.x";
|
||||
```
|
||||
|
||||
9. Generate a plot with all (or some) experiments, where the X axis is the experiment time,
|
||||
and the y axis is the latency of transactions.
|
||||
The following snippet plots all experiments.
|
||||
|
||||
```octave
|
||||
legends = {};
|
||||
hold off;
|
||||
for i = 1:length(conns)
|
||||
for j = 1:length(rates)
|
||||
data_name = strcat("c", conns{i}, "_r", rates{j});
|
||||
l = strcat("c=", conns{i}, " r=", rates{j});
|
||||
m = eval(data_name); plot((m(:,1) - min(m(:,1))) / 1e+9, m(:,2) / 1e+9, ".");
|
||||
hold on;
|
||||
legends(1, end+1) = l;
|
||||
endfor
|
||||
endfor
|
||||
legend(legends, "location", "northeastoutside");
|
||||
xlabel("experiment time (s)");
|
||||
ylabel("latency (s)");
|
||||
t = sprintf("200-node testnet - %s", release);
|
||||
title(t);
|
||||
```
|
||||
|
||||
10. Consider adjusting the axis, in case you want to compare your results to the baseline, for instance
|
||||
|
||||
```octave
|
||||
axis([0, 100, 0, 30], "tic");
|
||||
```
|
||||
|
||||
11. Use Octave's GUI menu to save the plot (e.g. as `.png`)
|
||||
|
||||
12. Repeat steps 9 and 10 to obtain as many plots as deemed necessary.
|
||||
|
||||
13. To generate a latency vs throughput plot, using the raw CSV file generated
|
||||
in step 2, follow the instructions for the [`latency_throughput.py`] script.
|
||||
|
||||
[`latency_throughput.py`]: ../../scripts/qa/reporting/README.md
|
||||
|
||||
#### Extracting Prometheus Metrics
|
||||
|
||||
1. Stop the prometheus server if it is running as a service (e.g. a `systemd` unit).
|
||||
2. Unzip the prometheus database retrieved from the testnet, and move it to replace the
|
||||
local prometheus database.
|
||||
3. Start the prometheus server and make sure no error logs appear at start up.
|
||||
4. Introduce the metrics you want to gather or plot.
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
### Running the test
|
||||
|
||||
This section explains how the tests were carried out for reproducibility purposes.
|
||||
|
||||
1. [If you haven't done it before]
|
||||
Follow steps 1-4 of the `README.md` at the top of the testnet repository to configure Terraform, and `doctl`.
|
||||
2. Copy file `testnet_rotating.toml` onto `testnet.toml` (do NOT commit this change)
|
||||
3. Set variable `VERSION_TAG` to the git hash that is to be tested.
|
||||
4. Run `make terraform-apply EPHEMERAL_SIZE=25`
|
||||
* WARNING: Do NOT forget to run `make terraform-destroy` as soon as you are done with the tests
|
||||
5. Follow steps 6-10 of the `README.md` to configure and start the "stable" part of the rotating node testnet
|
||||
6. As a sanity check, connect to the Prometheus node's web interface and check the graph for the `tendermint_consensus_height` metric.
|
||||
All nodes should be increasing their heights.
|
||||
7. On a different shell,
|
||||
* run `make runload ROTATE_CONNECTIONS=X ROTATE_TX_RATE=Y`
|
||||
* `X` and `Y` should reflect a load below the saturation point (see, e.g.,
|
||||
[this paragraph](./v034/README.md#finding-the-saturation-point) for further info)
|
||||
8. Run `make rotate` to start the script that creates the ephemeral nodes, and kills them when they are caught up.
|
||||
* WARNING: If you run this command from your laptop, the laptop needs to be up and connected for full length
|
||||
of the experiment.
|
||||
9. When the height of the chain reaches 3000, stop the `make rotate` script
|
||||
10. When the rotate script has made two iterations (i.e., all ephemeral nodes have caught up twice)
|
||||
after height 3000 was reached, stop `make rotate`
|
||||
11. Run `make retrieve-data` to gather all relevant data from the testnet into the orchestrating machine
|
||||
12. Verify that the data was collected without errors
|
||||
* at least one blockstore DB for a Tendermint validator
|
||||
* the Prometheus database from the Prometheus node
|
||||
* for extra care, you can run `zip -T` on the `prometheus.zip` file and (one of) the `blockstore.db.zip` file(s)
|
||||
13. **Run `make terraform-destroy`**
|
||||
|
||||
Steps 8 to 10 are highly manual at the moment and will be improved in next iterations.
|
||||
|
||||
### Result Extraction
|
||||
|
||||
In order to obtain a latency plot, follow the instructions above for the 200 node experiment, but:
|
||||
|
||||
* The `results.txt` file contains only one experiment
|
||||
* Therefore, no need for any `for` loops
|
||||
|
||||
As for prometheus, the same method as for the 200 node experiment can be applied.
|
||||
278
docs/qa/v034/README.md
Normal file
@@ -0,0 +1,278 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance Results for v0.34.x
|
||||
description: This is a report on the results obtained when running v0.34.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# v0.34.x
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Finding the Saturation Point
|
||||
|
||||
The first goal when examining the results of the tests is identifying the saturation point.
|
||||
The saturation point is a setup with a transaction load big enough to prevent the testnet
|
||||
from being stable: the load runner tries to produce slightly more transactions than can
|
||||
be processed by the testnet.
|
||||
|
||||
The following table summarizes the results for v0.34.x, for the different experiments
|
||||
(extracted from file [`v034_report_tabbed.txt`](./img/v034_report_tabbed.txt)).
|
||||
|
||||
The X axis of this table is `c`, the number of connections created by the load runner process to the target node.
|
||||
The Y axis of this table is `r`, the rate or number of transactions issued per second.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35600 |
|
||||
| r=200 | 17800 | 35600 | 38660 |
|
||||
|
||||
The table shows the number of 1024-byte-long transactions that were produced by the load runner,
|
||||
and processed by Tendermint, during the 90 seconds of the experiment's duration.
|
||||
Each cell in the table refers to an experiment with a particular number of websocket connections (`c`)
|
||||
to a chosen validator, and the number of transactions per second that the load runner
|
||||
tries to produce (`r`). Note that the overall load that the tool attempts to generate is $c \cdot r$.
|
||||
|
||||
We can see that the saturation point is beyond the diagonal that spans cells
|
||||
|
||||
* `r=200,c=2`
|
||||
* `r=100,c=4`
|
||||
|
||||
given that the total transactions should be close to the product of the rate, the number of connections,
|
||||
and the experiment time (89 seconds, since the last batch never gets sent).
|
||||
|
||||
All experiments below the saturation diagonal (`r=200,c=4`) have in common that the total
|
||||
number of transactions processed is noticeably less than the product $c \cdot r \cdot 89$,
|
||||
which is the expected number of transactions when the system is able to deal well with the
|
||||
load.
|
||||
With `r=200,c=4`, we obtained 38660 whereas the theoretical number of transactions should
|
||||
have been $200 \cdot 4 \cdot 89 = 71200$.
|
||||
|
||||
At this point, we chose an experiment at the limit of the saturation diagonal,
|
||||
in order to further study the performance of this release.
|
||||
**The chosen experiment is `r=200,c=2`**.
|
||||
|
||||
This is a plot of the CPU load (average over 1 minute, as output by `top`) of the load runner for `r=200,c=2`,
|
||||
where we can see that the load stays close to 0 most of the time.
|
||||
|
||||

|
||||
|
||||
### Examining latencies
|
||||
|
||||
The method described [here](../method.md) allows us to plot the latencies of transactions
|
||||
for all experiments.
|
||||
|
||||

|
||||
|
||||
As we can see, even the experiments beyond the saturation diagonal managed to keep
|
||||
transaction latency stable (i.e. not constantly increasing).
|
||||
Our interpretation for this is that contention within Tendermint was propagated,
|
||||
via the websockets, to the load runner,
|
||||
hence the load runner could not produce the target load, but a fraction of it.
|
||||
|
||||
Further examination of the Prometheus data (see below), showed that the mempool contained many transactions
|
||||
at steady state, but did not grow much without quickly returning to this steady state. This demonstrates
|
||||
that the transactions were able to be processed by the Tendermint network at least as quickly as they
|
||||
were submitted to the mempool. Finally, the test script made sure that, at the end of an experiment, the
|
||||
mempool was empty so that all transactions submitted to the chain were processed.
|
||||
|
||||
Finally, the number of points present in the plot appears to be much less than expected given the
|
||||
number of transactions in each experiment, particularly close to or above the saturation diagonal.
|
||||
This is a visual effect of the plot; what appear to be points in the plot are actually potentially huge
|
||||
clusters of points. To corroborate this, we have zoomed in the plot above by setting (carefully chosen)
|
||||
tiny axis intervals. The cluster shown below looks like a single point in the plot above.
|
||||
|
||||

|
||||
|
||||
The plot of latencies can we used as a baseline to compare with other releases.
|
||||
|
||||
The following plot summarizes average latencies versus overall throughputs
|
||||
across different numbers of WebSocket connections to the node into which
|
||||
transactions are being loaded.
|
||||
|
||||

|
||||
|
||||
### Prometheus Metrics on the Chosen Experiment
|
||||
|
||||
As mentioned [above](#finding-the-saturation-point), the chosen experiment is `r=200,c=2`.
|
||||
This section further examines key metrics for this experiment extracted from Prometheus data.
|
||||
|
||||
#### Mempool Size
|
||||
|
||||
The mempool size, a count of the number of transactions in the mempool, was shown to be stable and homogeneous
|
||||
at all full nodes. It did not exhibit any unconstrained growth.
|
||||
The plot below shows the evolution over time of the cumulative number of transactions inside all full nodes' mempools
|
||||
at a given time.
|
||||
The two spikes that can be observed correspond to a period where consensus instances proceeded beyond the initial round
|
||||
at some nodes.
|
||||
|
||||

|
||||
|
||||
The plot below shows evolution of the average over all full nodes, which oscillates between 1500 and 2000
|
||||
outstanding transactions.
|
||||
|
||||

|
||||
|
||||
The peaks observed coincide with the moments when some nodes proceeded beyond the initial round of consensus (see below).
|
||||
|
||||
#### Peers
|
||||
|
||||
The number of peers was stable at all nodes.
|
||||
It was higher for the seed nodes (around 140) than for the rest (between 21 and 74).
|
||||
The fact that non-seed nodes reach more than 50 peers is due to #9548.
|
||||
|
||||

|
||||
|
||||
#### Consensus Rounds per Height
|
||||
|
||||
Most heights took just one round, but some nodes needed to advance to round 1 at some point.
|
||||
|
||||

|
||||
|
||||
#### Blocks Produced per Minute, Transactions Processed per Minute
|
||||
|
||||
The blocks produced per minute are the slope of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the height goes from 530 to 569.
|
||||
This results in an average of 19.5 blocks produced per minute.
|
||||
|
||||
The transactions processed per minute are the slope of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the total goes from 64525 to 100125 transactions,
|
||||
resulting in 17800 transactions per minute. However, we can see in the plot that
|
||||
all transactions in the load are processed long before the two minutes.
|
||||
If we adjust the time window when transactions are processed (approx. 105 seconds),
|
||||
we obtain 20343 transactions per minute.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
Resident Set Size of all monitored processes is plotted below.
|
||||
|
||||

|
||||
|
||||
The average over all processes oscillates around 1.2 GiB and does not demonstrate unconstrained growth.
|
||||
|
||||

|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The best metric from Prometheus to gauge CPU utilization in a Unix machine is `load1`,
|
||||
as it usually appears in the
|
||||
[output of `top`](https://www.digitalocean.com/community/tutorials/load-average-in-linux).
|
||||
|
||||

|
||||
|
||||
It is contained in most cases below 5, which is generally considered acceptable load.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: N/A** (v0.34.x is the baseline)
|
||||
|
||||
Date: 2022-10-14
|
||||
|
||||
Version: 3ec6e424d6ae4c96867c2dcf8310572156068bb6
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
For this testnet, we will use a load that can safely be considered below the saturation
|
||||
point for the size of this testnet (between 13 and 38 full nodes): `c=4,r=800`.
|
||||
|
||||
N.B.: The version of Tendermint used for these tests is affected by #9539.
|
||||
However, the reduced load that reaches the mempools is orthogonal to functionality
|
||||
we are focusing on here.
|
||||
|
||||
### Latencies
|
||||
|
||||
The plot of all latencies can be seen in the following plot.
|
||||
|
||||

|
||||
|
||||
We can observe there are some very high latencies, towards the end of the test.
|
||||
Upon suspicion that they are duplicate transactions, we examined the latencies
|
||||
raw file and discovered there are more than 100K duplicate transactions.
|
||||
|
||||
The following plot shows the latencies file where all duplicate transactions have
|
||||
been removed, i.e., only the first occurrence of a duplicate transaction is kept.
|
||||
|
||||

|
||||
|
||||
This problem, existing in `v0.34.x`, will need to be addressed, perhaps in the same way
|
||||
we addressed it when running the 200 node test with high loads: increasing the `cache_size`
|
||||
configuration parameter.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
The set of metrics shown here are less than for the 200 node experiment.
|
||||
We are only interested in those for which the catch-up process (blocksync) may have an impact.
|
||||
|
||||
#### Blocks and Transactions per minute
|
||||
|
||||
Just as shown for the 200 node test, the blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 5229 seconds, the height goes from 2 to 3638.
|
||||
This results in an average of 41 blocks produced per minute.
|
||||
|
||||
The following plot shows only the heights reported by ephemeral nodes
|
||||
(which are also included in the plot above). Note that the _height_ metric
|
||||
is only showed _once the node has switched to consensus_, hence the gaps
|
||||
when nodes are killed, wiped out, started from scratch, and catching up.
|
||||
|
||||

|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
The small lines we see periodically close to `y=0` are the transactions that
|
||||
ephemeral nodes start processing when they are caught up.
|
||||
|
||||
Over a period of 5229 minutes, the total goes from 0 to 387697 transactions,
|
||||
resulting in 4449 transactions per minute. We can see some abrupt changes in
|
||||
the plot's gradient. This will need to be investigated.
|
||||
|
||||
#### Peers
|
||||
|
||||
The plot below shows the evolution in peers throughout the experiment.
|
||||
The periodic changes observed are due to the ephemeral nodes being stopped,
|
||||
wiped out, and recreated.
|
||||
|
||||

|
||||
|
||||
The validators' plots are concentrated at the higher part of the graph, whereas the ephemeral nodes
|
||||
are mostly at the lower part.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
The average Resident Set Size (RSS) over all processes seems stable, and slightly growing toward the end.
|
||||
This might be related to the increased in transaction load observed above.
|
||||
|
||||

|
||||
|
||||
The memory taken by the validators and the ephemeral nodes (when they are up) is comparable.
|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The plot shows metric `load1` for all nodes.
|
||||
|
||||

|
||||
|
||||
It is contained under 5 most of the time, which is considered normal load.
|
||||
The purple line, which follows a different pattern is the validator receiving all
|
||||
transactions, via RPC, from the load runner process.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: N/A**
|
||||
|
||||
Date: 2022-10-10
|
||||
|
||||
Version: a28c987f5a604ff66b515dd415270063e6fb069d
|
||||
BIN
docs/qa/v034/img/v034_200node_latencies.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/qa/v034/img/v034_200node_latencies_zoomed.png
Normal file
|
After Width: | Height: | Size: 34 KiB |
BIN
docs/qa/v034/img/v034_latency_throughput.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/qa/v034/img/v034_r200c2_heights.png
Normal file
|
After Width: | Height: | Size: 378 KiB |
BIN
docs/qa/v034/img/v034_r200c2_load-runner.png
Normal file
|
After Width: | Height: | Size: 150 KiB |
BIN
docs/qa/v034/img/v034_r200c2_load1.png
Normal file
|
After Width: | Height: | Size: 759 KiB |
BIN
docs/qa/v034/img/v034_r200c2_mempool_size.png
Normal file
|
After Width: | Height: | Size: 2.4 MiB |
BIN
docs/qa/v034/img/v034_r200c2_mempool_size_avg.png
Normal file
|
After Width: | Height: | Size: 192 KiB |
BIN
docs/qa/v034/img/v034_r200c2_peers.png
Normal file
|
After Width: | Height: | Size: 130 KiB |
BIN
docs/qa/v034/img/v034_r200c2_rounds.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
docs/qa/v034/img/v034_r200c2_rss.png
Normal file
|
After Width: | Height: | Size: 926 KiB |
BIN
docs/qa/v034/img/v034_r200c2_rss_avg.png
Normal file
|
After Width: | Height: | Size: 157 KiB |
BIN
docs/qa/v034/img/v034_r200c2_total-txs.png
Normal file
|
After Width: | Height: | Size: 534 KiB |
52
docs/qa/v034/img/v034_report_tabbed.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
Experiment ID: 3d5cf4ef-1a1a-4b46-aa2d-da5643d2e81e │Experiment ID: 80e472ec-13a1-4772-a827-3b0c907fb51d │Experiment ID: 07aca6cf-c5a4-4696-988f-e3270fc6333b
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 25 │ Rate: 25 │ Rate: 25
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 2225 │ Total Valid Tx: 4450 │ Total Valid Tx: 8900
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 599.404362ms │ Minimum Latency: 448.145181ms │ Minimum Latency: 412.485729ms
|
||||
Maximum Latency: 3.539686885s │ Maximum Latency: 3.237392049s │ Maximum Latency: 12.026665368s
|
||||
Average Latency: 1.441485349s │ Average Latency: 1.441267946s │ Average Latency: 2.150192457s
|
||||
Standard Deviation: 541.049869ms │ Standard Deviation: 525.040007ms │ Standard Deviation: 2.233852478s
|
||||
│ │
|
||||
Experiment ID: 953dc544-dd40-40e8-8712-20c34c3ce45e │Experiment ID: d31fc258-16e7-45cd-9dc8-13ab87bc0b0a │Experiment ID: 15d90a7e-b941-42f4-b411-2f15f857739e
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 50 │ Rate: 50 │ Rate: 50
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 4450 │ Total Valid Tx: 8900 │ Total Valid Tx: 17800
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 482.046942ms │ Minimum Latency: 435.458913ms │ Minimum Latency: 510.746448ms
|
||||
Maximum Latency: 3.761483455s │ Maximum Latency: 7.175583584s │ Maximum Latency: 6.551497882s
|
||||
Average Latency: 1.450408183s │ Average Latency: 1.681673116s │ Average Latency: 1.738083875s
|
||||
Standard Deviation: 587.560056ms │ Standard Deviation: 1.147902047s │ Standard Deviation: 943.46522ms
|
||||
│ │
|
||||
Experiment ID: 9a0b9980-9ce6-4db5-a80a-65ca70294b87 │Experiment ID: df8fa4f4-80af-4ded-8a28-356d15018b43 │Experiment ID: d0e41c2c-89c0-4f38-8e34-ca07adae593a
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 100 │ Rate: 100 │ Rate: 100
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 8900 │ Total Valid Tx: 17800 │ Total Valid Tx: 35600
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 477.417219ms │ Minimum Latency: 564.29247ms │ Minimum Latency: 840.71089ms
|
||||
Maximum Latency: 6.63744785s │ Maximum Latency: 6.988553219s │ Maximum Latency: 9.555312398s
|
||||
Average Latency: 1.561216103s │ Average Latency: 1.76419063s │ Average Latency: 3.200941683s
|
||||
Standard Deviation: 1.011333552s │ Standard Deviation: 1.068459423s │ Standard Deviation: 1.732346601s
|
||||
│ │
|
||||
Experiment ID: 493df3ee-4a36-4bce-80f8-6d65da66beda │Experiment ID: 13060525-f04f-46f6-8ade-286684b2fe50 │Experiment ID: 1777cbd2-8c96-42e4-9ec7-9b21f2225e4d
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 200 │ Rate: 200 │ Rate: 200
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 17800 │ Total Valid Tx: 35600 │ Total Valid Tx: 38660
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 493.705261ms │ Minimum Latency: 955.090573ms │ Minimum Latency: 1.9485821s
|
||||
Maximum Latency: 7.440921872s │ Maximum Latency: 10.086673491s │ Maximum Latency: 17.73103976s
|
||||
Average Latency: 1.875510582s │ Average Latency: 3.438130099s │ Average Latency: 8.143862237s
|
||||
Standard Deviation: 1.304336995s │ Standard Deviation: 1.966391574s │ Standard Deviation: 3.943140002s
|
||||
|
||||
BIN
docs/qa/v034/img/v034_rotating_heights.png
Normal file
|
After Width: | Height: | Size: 157 KiB |
BIN
docs/qa/v034/img/v034_rotating_heights_ephe.png
Normal file
|
After Width: | Height: | Size: 140 KiB |
BIN
docs/qa/v034/img/v034_rotating_latencies.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v034/img/v034_rotating_latencies_uniq.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v034/img/v034_rotating_load1.png
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
BIN
docs/qa/v034/img/v034_rotating_peers.png
Normal file
|
After Width: | Height: | Size: 486 KiB |
BIN
docs/qa/v034/img/v034_rotating_rss_avg.png
Normal file
|
After Width: | Height: | Size: 193 KiB |
BIN
docs/qa/v034/img/v034_rotating_total-txs.png
Normal file
|
After Width: | Height: | Size: 197 KiB |
326
docs/qa/v037/README.md
Normal file
@@ -0,0 +1,326 @@
|
||||
---
|
||||
order: 1
|
||||
parent:
|
||||
title: Tendermint Quality Assurance Results for v0.37.x
|
||||
description: This is a report on the results obtained when running v0.37.x on testnets
|
||||
order: 2
|
||||
---
|
||||
|
||||
# v0.37.x
|
||||
|
||||
## Issues discovered
|
||||
|
||||
During this iteration of the QA process, the following issues were found:
|
||||
|
||||
* (critical, fixed) [\#9533] - This bug caused full nodes to sometimes get stuck
|
||||
when blocksyncing, requiring a manual restart to unblock them. Importantly,
|
||||
this bug was also present in v0.34.x and the fix was also backported in
|
||||
[\#9534].
|
||||
* (critical, fixed) [\#9539] - `loadtime` is very likely to include more than
|
||||
one "=" character in transactions, with is rejected by the e2e application.
|
||||
* (critical, fixed) [\#9581] - Absent prometheus label makes Tendermint crash
|
||||
when enabling Prometheus metric collection
|
||||
* (non-critical, not fixed) [\#9548] - Full nodes can go over 50 connected
|
||||
peers, which is not intended by the default configuration.
|
||||
* (non-critical, not fixed) [\#9537] - With the default mempool cache setting,
|
||||
duplicated transactions are not rejected when gossipped and eventually flood
|
||||
all mempools. The 200 node testnets were thus run with a value of 200000 (as
|
||||
opposed to the default 10000)
|
||||
|
||||
## 200 Node Testnet
|
||||
|
||||
### Finding the Saturation Point
|
||||
|
||||
The first goal is to identify the saturation point and compare it with the baseline (v0.34.x).
|
||||
For further details, see [this paragraph](../v034/README.md#finding-the-saturation-point)
|
||||
in the baseline version.
|
||||
|
||||
The following table summarizes the results for v0.37.x, for the different experiments
|
||||
(extracted from file [`v037_report_tabbed.txt`](./img/v037_report_tabbed.txt)).
|
||||
|
||||
The X axis of this table is `c`, the number of connections created by the load runner process to the target node.
|
||||
The Y axis of this table is `r`, the rate or number of transactions issued per second.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35600 |
|
||||
| r=200 | 17800 | 35600 | 38660 |
|
||||
|
||||
For comparison, this is the table with the baseline version.
|
||||
|
||||
| | c=1 | c=2 | c=4 |
|
||||
| :--- | ----: | ----: | ----: |
|
||||
| r=25 | 2225 | 4450 | 8900 |
|
||||
| r=50 | 4450 | 8900 | 17800 |
|
||||
| r=100 | 8900 | 17800 | 35400 |
|
||||
| r=200 | 17800 | 35600 | 37358 |
|
||||
|
||||
The saturation point is beyond the diagonal:
|
||||
|
||||
* `r=200,c=2`
|
||||
* `r=100,c=4`
|
||||
|
||||
which is at the same place as the baseline. For more details on the saturation point, see
|
||||
[this paragraph](../v034/README.md#finding-the-saturation-point) in the baseline version.
|
||||
|
||||
The experiment chosen to examine Prometheus metrics is the same as in the baseline:
|
||||
**`r=200,c=2`**.
|
||||
|
||||
The load runner's CPU load was negligible (near 0) when running `r=200,c=2`.
|
||||
|
||||
### Examining latencies
|
||||
|
||||
The method described [here](../method.md) allows us to plot the latencies of transactions
|
||||
for all experiments.
|
||||
|
||||

|
||||
|
||||
The data seen in the plot is similar to that of the baseline.
|
||||
|
||||

|
||||
|
||||
Therefore, for further details on these plots,
|
||||
see [this paragraph](../v034/README.md#examining-latencies) in the baseline version.
|
||||
|
||||
The following plot summarizes average latencies versus overall throughputs
|
||||
across different numbers of WebSocket connections to the node into which
|
||||
transactions are being loaded.
|
||||
|
||||

|
||||
|
||||
This is similar to that of the baseline plot:
|
||||
|
||||

|
||||
|
||||
### Prometheus Metrics on the Chosen Experiment
|
||||
|
||||
As mentioned [above](#finding-the-saturation-point), the chosen experiment is `r=200,c=2`.
|
||||
This section further examines key metrics for this experiment extracted from Prometheus data.
|
||||
|
||||
#### Mempool Size
|
||||
|
||||
The mempool size, a count of the number of transactions in the mempool, was shown to be stable and homogeneous
|
||||
at all full nodes. It did not exhibit any unconstrained growth.
|
||||
The plot below shows the evolution over time of the cumulative number of transactions inside all full nodes' mempools
|
||||
at a given time.
|
||||
|
||||

|
||||
|
||||
The plot below shows evolution of the average over all full nodes, which oscillate between 1500 and 2000 outstanding transactions.
|
||||
|
||||

|
||||
|
||||
The peaks observed coincide with the moments when some nodes reached round 1 of consensus (see below).
|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Peers
|
||||
|
||||
The number of peers was stable at all nodes.
|
||||
It was higher for the seed nodes (around 140) than for the rest (between 16 and 78).
|
||||
|
||||

|
||||
|
||||
Just as in the baseline, the fact that non-seed nodes reach more than 50 peers is due to #9548.
|
||||
|
||||
**This plot yields similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||
#### Consensus Rounds per Height
|
||||
|
||||
Most heights took just one round, but some nodes needed to advance to round 1 at some point.
|
||||
|
||||

|
||||
|
||||
**This plot yields slightly better results than the baseline**:
|
||||
|
||||

|
||||
|
||||
#### Blocks Produced per Minute, Transactions Processed per Minute
|
||||
|
||||
The blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the height goes from 477 to 524.
|
||||
This results in an average of 23.5 blocks produced per minute.
|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 2 minutes, the total goes from 64525 to 100125 transactions,
|
||||
resulting in 17800 transactions per minute. However, we can see in the plot that
|
||||
all transactions in the load are process long before the two minutes.
|
||||
If we adjust the time window when transactions are processed (approx. 90 seconds),
|
||||
we obtain 23733 transactions per minute.
|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
Resident Set Size of all monitored processes is plotted below.
|
||||
|
||||

|
||||
|
||||
The average over all processes oscillates around 380 MiB and does not demonstrate unconstrained growth.
|
||||
|
||||

|
||||
|
||||
**These plots yield similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The best metric from Prometheus to gauge CPU utilization in a Unix machine is `load1`,
|
||||
as it usually appears in the
|
||||
[output of `top`](https://www.digitalocean.com/community/tutorials/load-average-in-linux).
|
||||
|
||||

|
||||
|
||||
It is contained below 5 on most nodes.
|
||||
|
||||
**This plot yields similar results to the baseline**:
|
||||
|
||||

|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: PASS**
|
||||
|
||||
Date: 2022-10-14
|
||||
|
||||
Version: 1cf9d8e276afe8595cba960b51cd056514965fd1
|
||||
|
||||
## Rotating Node Testnet
|
||||
|
||||
We use the same load as in the baseline: `c=4,r=800`.
|
||||
|
||||
Just as in the baseline tests, the version of Tendermint used for these tests is affected by #9539.
|
||||
See this paragraph in the [baseline report](../v034/README.md#rotating-node-testnet) for further details.
|
||||
Finally, note that this setup allows for a fairer comparison between this version and the baseline.
|
||||
|
||||
### Latencies
|
||||
|
||||
The plot of all latencies can be seen here.
|
||||
|
||||

|
||||
|
||||
Which is similar to the baseline.
|
||||
|
||||

|
||||
|
||||
Note that we are comparing against the baseline plot with _unique_
|
||||
transactions. This is because the problem with duplicate transactions
|
||||
detected during the baseline experiment did not show up for `v0.37`,
|
||||
which is _not_ proof that the problem is not present in `v0.37`.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
The set of metrics shown here match those shown on the baseline (`v0.34`) for the same experiment.
|
||||
We also show the baseline results for comparison.
|
||||
|
||||
#### Blocks and Transactions per minute
|
||||
|
||||
The blocks produced per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 4446 seconds, the height goes from 5 to 3323.
|
||||
This results in an average of 45 blocks produced per minute,
|
||||
which is similar to the baseline, shown below.
|
||||
|
||||

|
||||
|
||||
The following two plots show only the heights reported by ephemeral nodes.
|
||||
The second plot is the baseline plot for comparison.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
By the length of the segments, we can see that ephemeral nodes in `v0.37`
|
||||
catch up slightly faster.
|
||||
|
||||
The transactions processed per minute are the gradient of this plot.
|
||||
|
||||

|
||||
|
||||
Over a period of 3852 seconds, the total goes from 597 to 267298 transactions in one of the validators,
|
||||
resulting in 4154 transactions per minute, which is slightly lower than the baseline,
|
||||
although the baseline had to deal with duplicate transactions.
|
||||
|
||||
For comparison, this is the baseline plot.
|
||||
|
||||

|
||||
|
||||
#### Peers
|
||||
|
||||
The plot below shows the evolution of the number of peers throughout the experiment.
|
||||
|
||||

|
||||
|
||||
This is the baseline plot, for comparison.
|
||||
|
||||

|
||||
|
||||
The plotted values and their evolution are comparable in both plots.
|
||||
|
||||
For further details on these plots, see the baseline report.
|
||||
|
||||
#### Memory Resident Set Size
|
||||
|
||||
The average Resident Set Size (RSS) over all processes looks slightly more stable
|
||||
on `v0.37` (first plot) than on the baseline (second plot).
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
The memory taken by the validators and the ephemeral nodes when they are up is comparable (not shown in the plots),
|
||||
just as observed in the baseline.
|
||||
|
||||
#### CPU utilization
|
||||
|
||||
The plot shows metric `load1` for all nodes.
|
||||
|
||||

|
||||
|
||||
This is the baseline plot.
|
||||
|
||||

|
||||
|
||||
In both cases, it is contained under 5 most of the time, which is considered normal load.
|
||||
The green line in the `v0.37` plot and the purple line in the baseline plot (`v0.34`)
|
||||
correspond to the validators receiving all transactions, via RPC, from the load runner process.
|
||||
In both cases, they oscillate around 5 (normal load). The main difference is that other
|
||||
nodes are generally less loaded in `v0.37`.
|
||||
|
||||
### Test Result
|
||||
|
||||
**Result: PASS**
|
||||
|
||||
Date: 2022-10-10
|
||||
|
||||
Version: 155110007b9d8b83997a799016c1d0844c8efbaf
|
||||
|
||||
[\#9533]: https://github.com/tendermint/tendermint/pull/9533
|
||||
[\#9534]: https://github.com/tendermint/tendermint/pull/9534
|
||||
[\#9539]: https://github.com/tendermint/tendermint/issues/9539
|
||||
[\#9548]: https://github.com/tendermint/tendermint/issues/9548
|
||||
[\#9537]: https://github.com/tendermint/tendermint/issues/9537
|
||||
[\#9581]: https://github.com/tendermint/tendermint/issues/9581
|
||||
BIN
docs/qa/v037/img/v037_200node_latencies.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/qa/v037/img/v037_latency_throughput.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/qa/v037/img/v037_r200c2_heights.png
Normal file
|
After Width: | Height: | Size: 411 KiB |
BIN
docs/qa/v037/img/v037_r200c2_load1.png
Normal file
|
After Width: | Height: | Size: 887 KiB |
BIN
docs/qa/v037/img/v037_r200c2_mempool_size.png
Normal file
|
After Width: | Height: | Size: 2.3 MiB |
BIN
docs/qa/v037/img/v037_r200c2_mempool_size_avg.png
Normal file
|
After Width: | Height: | Size: 183 KiB |
BIN
docs/qa/v037/img/v037_r200c2_peers.png
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rounds.png
Normal file
|
After Width: | Height: | Size: 589 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rss.png
Normal file
|
After Width: | Height: | Size: 816 KiB |
BIN
docs/qa/v037/img/v037_r200c2_rss_avg.png
Normal file
|
After Width: | Height: | Size: 154 KiB |
BIN
docs/qa/v037/img/v037_r200c2_total-txs.png
Normal file
|
After Width: | Height: | Size: 538 KiB |
52
docs/qa/v037/img/v037_report_tabbed.txt
Normal file
@@ -0,0 +1,52 @@
|
||||
Experiment ID: af129eae-7039-4c76-8c37-cff9ac636a84 │Experiment ID: 0f88bd33-9bf0-4197-8d1d-9a737c301ec6 │Experiment ID: 88227cad-2ba8-4eb6-b493-041d8120b46f
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 25 │ Rate: 25 │ Rate: 25
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 2225 │ Total Valid Tx: 4450 │ Total Valid Tx: 8900
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 506.248587ms │ Minimum Latency: 469.53452ms │ Minimum Latency: 588.900721ms
|
||||
Maximum Latency: 3.032125789s │ Maximum Latency: 6.548830955s │ Maximum Latency: 6.533739843s
|
||||
Average Latency: 1.427767726s │ Average Latency: 1.448582257s │ Average Latency: 1.717432341s
|
||||
Standard Deviation: 524.11782ms │ Standard Deviation: 768.684133ms │ Standard Deviation: 1.000015768s
|
||||
│ │
|
||||
Experiment ID: f03d39bd-0233-4b3c-b461-543445ae1d4b │Experiment ID: 46674f1c-e591-4e36-bb9b-f375c19fc475 │Experiment ID: 5385c159-8d4d-455b-bced-dcd4a3209988
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 50 │ Rate: 50 │ Rate: 50
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 4450 │ Total Valid Tx: 8900 │ Total Valid Tx: 17800
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 477.46027ms │ Minimum Latency: 455.757111ms │ Minimum Latency: 594.749081ms
|
||||
Maximum Latency: 2.483895394s │ Maximum Latency: 2.904715695s │ Maximum Latency: 9.294950389s
|
||||
Average Latency: 1.407374662s │ Average Latency: 1.397385779s │ Average Latency: 2.621122536s
|
||||
Standard Deviation: 505.150067ms │ Standard Deviation: 551.67603ms │ Standard Deviation: 1.772725794s
|
||||
│ │
|
||||
Experiment ID: 9161b4a7-d75c-455f-b82d-2b5235d533cf │Experiment ID: 993a13a8-9db1-4b2b-9c20-71a5b85e4bbf │Experiment ID: ad1eb9e1-f4d6-41fd-9ba7-0f1f7dde1e3e
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 100 │ Rate: 100 │ Rate: 100
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 8900 │ Total Valid Tx: 17800 │ Total Valid Tx: 35400
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 448.050467ms │ Minimum Latency: 605.436195ms │ Minimum Latency: 1.16816912s
|
||||
Maximum Latency: 3.789711139s │ Maximum Latency: 7.292770222s │ Maximum Latency: 11.378681842s
|
||||
Average Latency: 1.451342158s │ Average Latency: 2.07457999s │ Average Latency: 3.918384209s
|
||||
Standard Deviation: 644.075973ms │ Standard Deviation: 1.230204022s │ Standard Deviation: 2.172400458s
|
||||
│ │
|
||||
Experiment ID: 3cbe9c3d-9c43-4c9f-b5ca-b567d20bbd57 │Experiment ID: af836c5e-d9b6-4d5d-971c-2fc7f07aa2a0 │Experiment ID: 77606397-4989-41d4-b13b-f1f4d1af063f
|
||||
│ │
|
||||
Connections: 1 │ Connections: 2 │ Connections: 4
|
||||
Rate: 200 │ Rate: 200 │ Rate: 200
|
||||
Size: 1024 │ Size: 1024 │ Size: 1024
|
||||
│ │
|
||||
Total Valid Tx: 17800 │ Total Valid Tx: 35600 │ Total Valid Tx: 37358
|
||||
Total Negative Latencies: 0 │ Total Negative Latencies: 0 │ Total Negative Latencies: 0
|
||||
Minimum Latency: 519.984701ms │ Minimum Latency: 820.755087ms │ Minimum Latency: 1.712574804s
|
||||
Maximum Latency: 12.609056712s │ Maximum Latency: 9.260798095s │ Maximum Latency: 25.739223696s
|
||||
Average Latency: 2.717853101s │ Average Latency: 3.477731881s │ Average Latency: 8.547725264s
|
||||
Standard Deviation: 2.390778155s │ Standard Deviation: 1.675000913s │ Standard Deviation: 4.76961569s
|
||||
|
||||
BIN
docs/qa/v037/img/v037_rotating_heights.png
Normal file
|
After Width: | Height: | Size: 167 KiB |
BIN
docs/qa/v037/img/v037_rotating_heights_ephe.png
Normal file
|
After Width: | Height: | Size: 138 KiB |
BIN
docs/qa/v037/img/v037_rotating_latencies.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
docs/qa/v037/img/v037_rotating_load1.png
Normal file
|
After Width: | Height: | Size: 1.3 MiB |
BIN
docs/qa/v037/img/v037_rotating_peers.png
Normal file
|
After Width: | Height: | Size: 577 KiB |
BIN
docs/qa/v037/img/v037_rotating_rss_avg.png
Normal file
|
After Width: | Height: | Size: 217 KiB |
BIN
docs/qa/v037/img/v037_rotating_total-txs.png
Normal file
|
After Width: | Height: | Size: 181 KiB |
@@ -4,7 +4,6 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
clist "github.com/tendermint/tendermint/libs/clist"
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
@@ -56,7 +55,6 @@ func (evR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
ID: EvidenceChannel,
|
||||
Priority: 6,
|
||||
RecvMessageCapacity: maxMsgSize,
|
||||
MessageType: &tmproto.EvidenceList{},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -68,11 +66,11 @@ func (evR *Reactor) AddPeer(peer p2p.Peer) {
|
||||
|
||||
// Receive implements Reactor.
|
||||
// It adds any received evidence to the evpool.
|
||||
func (evR *Reactor) Receive(e p2p.Envelope) {
|
||||
evis, err := evidenceListFromProto(e.Message)
|
||||
func (evR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
evis, err := decodeMsg(msgBytes)
|
||||
if err != nil {
|
||||
evR.Logger.Error("Error decoding message", "src", e.Src, "chId", e.ChannelID, "err", err)
|
||||
evR.Switch.StopPeerForError(e.Src, err)
|
||||
evR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
|
||||
evR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -82,7 +80,7 @@ func (evR *Reactor) Receive(e p2p.Envelope) {
|
||||
case *types.ErrInvalidEvidence:
|
||||
evR.Logger.Error(err.Error())
|
||||
// punish peer
|
||||
evR.Switch.StopPeerForError(e.Src, err)
|
||||
evR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
case nil:
|
||||
default:
|
||||
@@ -128,15 +126,11 @@ func (evR *Reactor) broadcastEvidenceRoutine(peer p2p.Peer) {
|
||||
evis := evR.prepareEvidenceMessage(peer, ev)
|
||||
if len(evis) > 0 {
|
||||
evR.Logger.Debug("Gossiping evidence to peer", "ev", ev, "peer", peer)
|
||||
evp, err := evidenceListToProto(evis)
|
||||
msgBytes, err := encodeMsg(evis)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
success := peer.Send(p2p.Envelope{
|
||||
ChannelID: EvidenceChannel,
|
||||
Message: evp,
|
||||
})
|
||||
success := peer.Send(EvidenceChannel, msgBytes)
|
||||
if !success {
|
||||
time.Sleep(peerRetryMessageIntervalMS * time.Millisecond)
|
||||
continue
|
||||
@@ -216,7 +210,7 @@ type PeerState interface {
|
||||
|
||||
// encodemsg takes a array of evidence
|
||||
// returns the byte encoding of the List Message
|
||||
func evidenceListToProto(evis []types.Evidence) (*tmproto.EvidenceList, error) {
|
||||
func encodeMsg(evis []types.Evidence) ([]byte, error) {
|
||||
evi := make([]tmproto.Evidence, len(evis))
|
||||
for i := 0; i < len(evis); i++ {
|
||||
ev, err := types.EvidenceToProto(evis[i])
|
||||
@@ -228,13 +222,19 @@ func evidenceListToProto(evis []types.Evidence) (*tmproto.EvidenceList, error) {
|
||||
epl := tmproto.EvidenceList{
|
||||
Evidence: evi,
|
||||
}
|
||||
return &epl, nil
|
||||
|
||||
return epl.Marshal()
|
||||
}
|
||||
|
||||
func evidenceListFromProto(m proto.Message) ([]types.Evidence, error) {
|
||||
lm := m.(*tmproto.EvidenceList)
|
||||
// decodemsg takes an array of bytes
|
||||
// returns an array of evidence
|
||||
func decodeMsg(bz []byte) (evis []types.Evidence, err error) {
|
||||
lm := tmproto.EvidenceList{}
|
||||
if err := lm.Unmarshal(bz); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
evis := make([]types.Evidence, len(lm.Evidence))
|
||||
evis = make([]types.Evidence, len(lm.Evidence))
|
||||
for i := 0; i < len(lm.Evidence); i++ {
|
||||
ev, err := types.EvidenceFromProto(&lm.Evidence[i])
|
||||
if err != nil {
|
||||
|
||||
@@ -208,10 +208,7 @@ func TestReactorBroadcastEvidenceMemoryLeak(t *testing.T) {
|
||||
// i.e. broadcastEvidenceRoutine finishes when peer is stopped
|
||||
defer leaktest.CheckTimeout(t, 10*time.Second)()
|
||||
|
||||
p.On("Send", mock.MatchedBy(func(i interface{}) bool {
|
||||
e, ok := i.(p2p.Envelope)
|
||||
return ok && e.ChannelID == evidence.EvidenceChannel
|
||||
})).Return(false)
|
||||
p.On("Send", evidence.EvidenceChannel, mock.AnythingOfType("[]uint8")).Return(false)
|
||||
quitChan := make(<-chan struct{})
|
||||
p.On("Quit").Return(quitChan)
|
||||
ps := peerState{2}
|
||||
|
||||
6
go.mod
@@ -3,7 +3,7 @@ module github.com/tendermint/tendermint
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.2.0
|
||||
github.com/BurntSushi/toml v1.2.1
|
||||
github.com/adlio/schema v1.3.3
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
|
||||
github.com/fortytw2/leaktest v1.3.0
|
||||
@@ -28,13 +28,13 @@ require (
|
||||
github.com/rs/cors v1.8.2
|
||||
github.com/sasha-s/go-deadlock v0.3.1
|
||||
github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa
|
||||
github.com/spf13/cobra v1.5.0
|
||||
github.com/spf13/cobra v1.6.0
|
||||
github.com/spf13/viper v1.13.0
|
||||
github.com/stretchr/testify v1.8.0
|
||||
github.com/tendermint/tm-db v0.6.6
|
||||
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa
|
||||
golang.org/x/net v0.0.0-20220812174116-3211cb980234
|
||||
google.golang.org/grpc v1.50.0
|
||||
google.golang.org/grpc v1.50.1
|
||||
)
|
||||
|
||||
require (
|
||||
|
||||
12
go.sum
@@ -53,8 +53,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0=
|
||||
github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak=
|
||||
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d/go.mod h1:URdX5+vg25ts3aCh8H5IFZybJYKWhJHYMTnf+ULtoC4=
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60=
|
||||
@@ -1042,8 +1042,8 @@ github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tL
|
||||
github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE=
|
||||
github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI=
|
||||
github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
|
||||
github.com/spf13/cobra v1.5.0 h1:X+jTBEBqF0bHN+9cSMgmfuvv2VHJ9ezmFNf9Y/XstYU=
|
||||
github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM=
|
||||
github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI=
|
||||
github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
|
||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
|
||||
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
|
||||
@@ -1696,8 +1696,8 @@ google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQ
|
||||
google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34=
|
||||
google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k=
|
||||
google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU=
|
||||
google.golang.org/grpc v1.50.0 h1:fPVVDxY9w++VjTZsYvXWqEf9Rqar/e+9zYfxKK+W+YU=
|
||||
google.golang.org/grpc v1.50.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI=
|
||||
google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY=
|
||||
google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
|
||||
@@ -134,7 +134,6 @@ func (memR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
ID: mempool.MempoolChannel,
|
||||
Priority: 5,
|
||||
RecvMessageCapacity: batchMsg.Size(),
|
||||
MessageType: &protomem.Message{},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -155,34 +154,28 @@ func (memR *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {
|
||||
|
||||
// Receive implements Reactor.
|
||||
// It adds any received transactions to the mempool.
|
||||
func (memR *Reactor) Receive(e p2p.Envelope) {
|
||||
memR.Logger.Debug("Receive", "src", e.Src, "chId", e.ChannelID, "msg", e.Message)
|
||||
switch msg := e.Message.(type) {
|
||||
case *protomem.Txs:
|
||||
protoTxs := msg.GetTxs()
|
||||
if len(protoTxs) == 0 {
|
||||
memR.Logger.Error("received tmpty txs from peer", "src", e.Src)
|
||||
return
|
||||
}
|
||||
txInfo := mempool.TxInfo{SenderID: memR.ids.GetForPeer(e.Src)}
|
||||
if e.Src != nil {
|
||||
txInfo.SenderP2PID = e.Src.ID()
|
||||
}
|
||||
|
||||
var err error
|
||||
for _, tx := range protoTxs {
|
||||
ntx := types.Tx(tx)
|
||||
err = memR.mempool.CheckTx(ntx, nil, txInfo)
|
||||
if errors.Is(err, mempool.ErrTxInCache) {
|
||||
memR.Logger.Debug("Tx already exists in cache", "tx", ntx.String())
|
||||
} else if err != nil {
|
||||
memR.Logger.Info("Could not check tx", "tx", ntx.String(), "err", err)
|
||||
}
|
||||
}
|
||||
default:
|
||||
memR.Logger.Error("unknown message type", "src", e.Src, "chId", e.ChannelID, "msg", e.Message)
|
||||
func (memR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
msg, err := memR.decodeMsg(msgBytes)
|
||||
if err != nil {
|
||||
memR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
|
||||
memR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
memR.Logger.Debug("Receive", "src", src, "chId", chID, "msg", msg)
|
||||
|
||||
txInfo := mempool.TxInfo{SenderID: memR.ids.GetForPeer(src)}
|
||||
if src != nil {
|
||||
txInfo.SenderP2PID = src.ID()
|
||||
}
|
||||
|
||||
for _, tx := range msg.Txs {
|
||||
err = memR.mempool.CheckTx(tx, nil, txInfo)
|
||||
if errors.Is(err, mempool.ErrTxInCache) {
|
||||
memR.Logger.Debug("Tx already exists in cache", "tx", tx.String())
|
||||
} else if err != nil {
|
||||
memR.Logger.Info("Could not check tx", "tx", tx.String(), "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// broadcasting happens from go routines per peer
|
||||
}
|
||||
@@ -241,14 +234,18 @@ func (memR *Reactor) broadcastTxRoutine(peer p2p.Peer) {
|
||||
// https://github.com/tendermint/tendermint/issues/5796
|
||||
|
||||
if _, ok := memTx.senders.Load(peerID); !ok {
|
||||
success := peer.Send(p2p.Envelope{
|
||||
ChannelID: mempool.MempoolChannel,
|
||||
Message: &protomem.Message{
|
||||
Sum: &protomem.Message_Txs{
|
||||
Txs: &protomem.Txs{Txs: [][]byte{memTx.tx}},
|
||||
},
|
||||
msg := protomem.Message{
|
||||
Sum: &protomem.Message_Txs{
|
||||
Txs: &protomem.Txs{Txs: [][]byte{memTx.tx}},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
bz, err := msg.Marshal()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
success := peer.Send(mempool.MempoolChannel, bz)
|
||||
if !success {
|
||||
time.Sleep(mempool.PeerCatchupSleepIntervalMS * time.Millisecond)
|
||||
continue
|
||||
@@ -267,6 +264,35 @@ func (memR *Reactor) broadcastTxRoutine(peer p2p.Peer) {
|
||||
}
|
||||
}
|
||||
|
||||
func (memR *Reactor) decodeMsg(bz []byte) (TxsMessage, error) {
|
||||
msg := protomem.Message{}
|
||||
err := msg.Unmarshal(bz)
|
||||
if err != nil {
|
||||
return TxsMessage{}, err
|
||||
}
|
||||
|
||||
var message TxsMessage
|
||||
|
||||
if i, ok := msg.Sum.(*protomem.Message_Txs); ok {
|
||||
txs := i.Txs.GetTxs()
|
||||
|
||||
if len(txs) == 0 {
|
||||
return message, errors.New("empty TxsMessage")
|
||||
}
|
||||
|
||||
decoded := make([]types.Tx, len(txs))
|
||||
for j, tx := range txs {
|
||||
decoded[j] = types.Tx(tx)
|
||||
}
|
||||
|
||||
message = TxsMessage{
|
||||
Txs: decoded,
|
||||
}
|
||||
return message, nil
|
||||
}
|
||||
return message, fmt.Errorf("msg type: %T is not supported", msg)
|
||||
}
|
||||
|
||||
// TxsMessage is a Message containing transactions.
|
||||
type TxsMessage struct {
|
||||
Txs []types.Tx
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
"github.com/tendermint/tendermint/p2p/mock"
|
||||
memproto "github.com/tendermint/tendermint/proto/tendermint/mempool"
|
||||
protomem "github.com/tendermint/tendermint/proto/tendermint/mempool"
|
||||
"github.com/tendermint/tendermint/proxy"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
@@ -280,12 +279,7 @@ func TestDontExhaustMaxActiveIDs(t *testing.T) {
|
||||
|
||||
for i := 0; i < mempool.MaxActiveIDs+1; i++ {
|
||||
peer := mock.NewPeer(nil)
|
||||
reactor.Receive(p2p.Envelope{
|
||||
ChannelID: mempool.MempoolChannel,
|
||||
Src: peer,
|
||||
Message: &protomem.Txs{
|
||||
Txs: [][]byte{{0x01, 0x02, 0x03}},
|
||||
}})
|
||||
reactor.Receive(mempool.MempoolChannel, peer, []byte{0x1, 0x2, 0x3})
|
||||
reactor.AddPeer(peer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,7 +133,6 @@ func (memR *Reactor) GetChannels() []*p2p.ChannelDescriptor {
|
||||
ID: mempool.MempoolChannel,
|
||||
Priority: 5,
|
||||
RecvMessageCapacity: batchMsg.Size(),
|
||||
MessageType: &protomem.Message{},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -154,35 +153,27 @@ func (memR *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {
|
||||
|
||||
// Receive implements Reactor.
|
||||
// It adds any received transactions to the mempool.
|
||||
func (memR *Reactor) Receive(e p2p.Envelope) {
|
||||
memR.Logger.Debug("Receive", "src", e.Src, "chId", e.ChannelID, "msg", e.Message)
|
||||
switch msg := e.Message.(type) {
|
||||
case *protomem.Txs:
|
||||
protoTxs := msg.GetTxs()
|
||||
if len(protoTxs) == 0 {
|
||||
memR.Logger.Error("received tmpty txs from peer", "src", e.Src)
|
||||
return
|
||||
}
|
||||
txInfo := mempool.TxInfo{SenderID: memR.ids.GetForPeer(e.Src)}
|
||||
if e.Src != nil {
|
||||
txInfo.SenderP2PID = e.Src.ID()
|
||||
}
|
||||
|
||||
var err error
|
||||
for _, tx := range protoTxs {
|
||||
ntx := types.Tx(tx)
|
||||
err = memR.mempool.CheckTx(ntx, nil, txInfo)
|
||||
if errors.Is(err, mempool.ErrTxInCache) {
|
||||
memR.Logger.Debug("Tx already exists in cache", "tx", ntx.String())
|
||||
} else if err != nil {
|
||||
memR.Logger.Info("Could not check tx", "tx", ntx.String(), "err", err)
|
||||
}
|
||||
}
|
||||
default:
|
||||
memR.Logger.Error("unknown message type", "src", e.Src, "chId", e.ChannelID, "msg", e.Message)
|
||||
func (memR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
msg, err := memR.decodeMsg(msgBytes)
|
||||
if err != nil {
|
||||
memR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
|
||||
memR.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
memR.Logger.Debug("Receive", "src", src, "chId", chID, "msg", msg)
|
||||
|
||||
txInfo := mempool.TxInfo{SenderID: memR.ids.GetForPeer(src)}
|
||||
if src != nil {
|
||||
txInfo.SenderP2PID = src.ID()
|
||||
}
|
||||
for _, tx := range msg.Txs {
|
||||
err = memR.mempool.CheckTx(tx, nil, txInfo)
|
||||
if err == mempool.ErrTxInCache {
|
||||
memR.Logger.Debug("Tx already exists in cache", "tx", tx.String())
|
||||
} else if err != nil {
|
||||
memR.Logger.Info("Could not check tx", "tx", tx.String(), "err", err)
|
||||
}
|
||||
}
|
||||
// broadcasting happens from go routines per peer
|
||||
}
|
||||
|
||||
@@ -242,14 +233,18 @@ func (memR *Reactor) broadcastTxRoutine(peer p2p.Peer) {
|
||||
// NOTE: Transaction batching was disabled due to
|
||||
// https://github.com/tendermint/tendermint/issues/5796
|
||||
if !memTx.HasPeer(peerID) {
|
||||
success := peer.Send(p2p.Envelope{
|
||||
ChannelID: mempool.MempoolChannel,
|
||||
Message: &protomem.Message{
|
||||
Sum: &protomem.Message_Txs{
|
||||
Txs: &protomem.Txs{Txs: [][]byte{memTx.tx}},
|
||||
},
|
||||
msg := protomem.Message{
|
||||
Sum: &protomem.Message_Txs{
|
||||
Txs: &protomem.Txs{Txs: [][]byte{memTx.tx}},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
bz, err := msg.Marshal()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
success := peer.Send(mempool.MempoolChannel, bz)
|
||||
if !success {
|
||||
time.Sleep(mempool.PeerCatchupSleepIntervalMS * time.Millisecond)
|
||||
continue
|
||||
@@ -273,6 +268,37 @@ func (memR *Reactor) broadcastTxRoutine(peer p2p.Peer) {
|
||||
//-----------------------------------------------------------------------------
|
||||
// Messages
|
||||
|
||||
func (memR *Reactor) decodeMsg(bz []byte) (TxsMessage, error) {
|
||||
msg := protomem.Message{}
|
||||
err := msg.Unmarshal(bz)
|
||||
if err != nil {
|
||||
return TxsMessage{}, err
|
||||
}
|
||||
|
||||
var message TxsMessage
|
||||
|
||||
if i, ok := msg.Sum.(*protomem.Message_Txs); ok {
|
||||
txs := i.Txs.GetTxs()
|
||||
|
||||
if len(txs) == 0 {
|
||||
return message, errors.New("empty TxsMessage")
|
||||
}
|
||||
|
||||
decoded := make([]types.Tx, len(txs))
|
||||
for j, tx := range txs {
|
||||
decoded[j] = types.Tx(tx)
|
||||
}
|
||||
|
||||
message = TxsMessage{
|
||||
Txs: decoded,
|
||||
}
|
||||
return message, nil
|
||||
}
|
||||
return message, fmt.Errorf("msg type: %T is not supported", msg)
|
||||
}
|
||||
|
||||
//-------------------------------------
|
||||
|
||||
// TxsMessage is a Message containing transactions.
|
||||
type TxsMessage struct {
|
||||
Txs []types.Tx
|
||||
|
||||
@@ -38,13 +38,13 @@ type Reactor interface {
|
||||
// or other reason).
|
||||
RemovePeer(peer Peer, reason interface{})
|
||||
|
||||
// Receive is called by the switch when a message is received from the peer.
|
||||
// Receive is called by the switch when msgBytes is received from the peer.
|
||||
//
|
||||
// NOTE reactor can not keep msgBytes around after Receive completes without
|
||||
// copying.
|
||||
//
|
||||
// CONTRACT: msgBytes are not nil.
|
||||
Receive(Envelope)
|
||||
Receive(chID byte, peer Peer, msgBytes []byte)
|
||||
}
|
||||
|
||||
//--------------------------------------
|
||||
@@ -64,8 +64,8 @@ func NewBaseReactor(name string, impl Reactor) *BaseReactor {
|
||||
func (br *BaseReactor) SetSwitch(sw *Switch) {
|
||||
br.Switch = sw
|
||||
}
|
||||
func (*BaseReactor) GetChannels() []*conn.ChannelDescriptor { return nil }
|
||||
func (*BaseReactor) AddPeer(peer Peer) {}
|
||||
func (*BaseReactor) RemovePeer(peer Peer, reason interface{}) {}
|
||||
func (*BaseReactor) Receive(e Envelope) {}
|
||||
func (*BaseReactor) InitPeer(peer Peer) Peer { return peer }
|
||||
func (*BaseReactor) GetChannels() []*conn.ChannelDescriptor { return nil }
|
||||
func (*BaseReactor) AddPeer(peer Peer) {}
|
||||
func (*BaseReactor) RemovePeer(peer Peer, reason interface{}) {}
|
||||
func (*BaseReactor) Receive(chID byte, peer Peer, msgBytes []byte) {}
|
||||
func (*BaseReactor) InitPeer(peer Peer) Peer { return peer }
|
||||
|
||||
@@ -724,7 +724,6 @@ type ChannelDescriptor struct {
|
||||
SendQueueCapacity int
|
||||
RecvBufferCapacity int
|
||||
RecvMessageCapacity int
|
||||
MessageType proto.Message
|
||||
}
|
||||
|
||||
func (chDesc ChannelDescriptor) FillDefaults() (filled ChannelDescriptor) {
|
||||
|
||||
@@ -44,29 +44,15 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
||||
Name: "num_txs",
|
||||
Help: "Number of transactions submitted by each peer.",
|
||||
}, append(labels, "peer_id")).With(labelsAndValues...),
|
||||
MessageReceiveBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "message_receive_bytes_total",
|
||||
Help: "Number of bytes of each message type received.",
|
||||
}, append(labels, "message_type")).With(labelsAndValues...),
|
||||
MessageSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "message_send_bytes_total",
|
||||
Help: "Number of bytes of each message type sent.",
|
||||
}, append(labels, "message_type")).With(labelsAndValues...),
|
||||
}
|
||||
}
|
||||
|
||||
func NopMetrics() *Metrics {
|
||||
return &Metrics{
|
||||
Peers: discard.NewGauge(),
|
||||
PeerReceiveBytesTotal: discard.NewCounter(),
|
||||
PeerSendBytesTotal: discard.NewCounter(),
|
||||
PeerPendingSendBytes: discard.NewGauge(),
|
||||
NumTxs: discard.NewGauge(),
|
||||
MessageReceiveBytesTotal: discard.NewCounter(),
|
||||
MessageSendBytesTotal: discard.NewCounter(),
|
||||
Peers: discard.NewGauge(),
|
||||
PeerReceiveBytesTotal: discard.NewCounter(),
|
||||
PeerSendBytesTotal: discard.NewCounter(),
|
||||
PeerPendingSendBytes: discard.NewGauge(),
|
||||
NumTxs: discard.NewGauge(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,8 +24,4 @@ type Metrics struct {
|
||||
PeerPendingSendBytes metrics.Gauge `metrics_labels:"peer_id"`
|
||||
// Number of transactions submitted by each peer.
|
||||
NumTxs metrics.Gauge `metrics_labels:"peer_id"`
|
||||
// Number of bytes of each message type received.
|
||||
MessageReceiveBytesTotal metrics.Counter `metrics_labels:"message_type"`
|
||||
// Number of bytes of each message type sent.
|
||||
MessageSendBytesTotal metrics.Counter `metrics_labels:"message_type"`
|
||||
}
|
||||
|
||||
@@ -42,9 +42,9 @@ func NewPeer(ip net.IP) *Peer {
|
||||
return mp
|
||||
}
|
||||
|
||||
func (mp *Peer) FlushStop() { mp.Stop() } //nolint:errcheck //ignore error
|
||||
func (mp *Peer) TrySend(e p2p.Envelope) bool { return true }
|
||||
func (mp *Peer) Send(e p2p.Envelope) bool { return true }
|
||||
func (mp *Peer) FlushStop() { mp.Stop() } //nolint:errcheck //ignore error
|
||||
func (mp *Peer) TrySend(chID byte, msgBytes []byte) bool { return true }
|
||||
func (mp *Peer) Send(chID byte, msgBytes []byte) bool { return true }
|
||||
func (mp *Peer) NodeInfo() p2p.NodeInfo {
|
||||
return p2p.DefaultNodeInfo{
|
||||
DefaultNodeID: mp.addr.ID,
|
||||
|
||||
@@ -19,7 +19,7 @@ func NewReactor() *Reactor {
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *Reactor) GetChannels() []*conn.ChannelDescriptor { return r.Channels }
|
||||
func (r *Reactor) AddPeer(peer p2p.Peer) {}
|
||||
func (r *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {}
|
||||
func (r *Reactor) Receive(e p2p.Envelope) {}
|
||||
func (r *Reactor) GetChannels() []*conn.ChannelDescriptor { return r.Channels }
|
||||
func (r *Reactor) AddPeer(peer p2p.Peer) {}
|
||||
func (r *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {}
|
||||
func (r *Reactor) Receive(chID byte, peer p2p.Peer, msgBytes []byte) {}
|
||||
|
||||
@@ -234,13 +234,13 @@ func (_m *Peer) Reset() error {
|
||||
return r0
|
||||
}
|
||||
|
||||
// Send provides a mock function with given fields: _a0
|
||||
func (_m *Peer) Send(_a0 p2p.Envelope) bool {
|
||||
ret := _m.Called(_a0)
|
||||
// Send provides a mock function with given fields: _a0, _a1
|
||||
func (_m *Peer) Send(_a0 byte, _a1 []byte) bool {
|
||||
ret := _m.Called(_a0, _a1)
|
||||
|
||||
var r0 bool
|
||||
if rf, ok := ret.Get(0).(func(p2p.Envelope) bool); ok {
|
||||
r0 = rf(_a0)
|
||||
if rf, ok := ret.Get(0).(func(byte, []byte) bool); ok {
|
||||
r0 = rf(_a0, _a1)
|
||||
} else {
|
||||
r0 = ret.Get(0).(bool)
|
||||
}
|
||||
@@ -335,13 +335,13 @@ func (_m *Peer) String() string {
|
||||
return r0
|
||||
}
|
||||
|
||||
// TrySend provides a mock function with given fields: _a0
|
||||
func (_m *Peer) TrySend(_a0 p2p.Envelope) bool {
|
||||
ret := _m.Called(_a0)
|
||||
// TrySend provides a mock function with given fields: _a0, _a1
|
||||
func (_m *Peer) TrySend(_a0 byte, _a1 []byte) bool {
|
||||
ret := _m.Called(_a0, _a1)
|
||||
|
||||
var r0 bool
|
||||
if rf, ok := ret.Get(0).(func(p2p.Envelope) bool); ok {
|
||||
r0 = rf(_a0)
|
||||
if rf, ok := ret.Get(0).(func(byte, []byte) bool); ok {
|
||||
r0 = rf(_a0, _a1)
|
||||
} else {
|
||||
r0 = ret.Get(0).(bool)
|
||||
}
|
||||
|
||||
70
p2p/peer.go
@@ -5,8 +5,6 @@ import (
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
|
||||
"github.com/tendermint/tendermint/libs/cmap"
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
"github.com/tendermint/tendermint/libs/service"
|
||||
@@ -36,8 +34,8 @@ type Peer interface {
|
||||
Status() tmconn.ConnectionStatus
|
||||
SocketAddr() *NetAddress // actual address of the socket
|
||||
|
||||
Send(Envelope) bool
|
||||
TrySend(Envelope) bool
|
||||
Send(byte, []byte) bool
|
||||
TrySend(byte, []byte) bool
|
||||
|
||||
Set(string, interface{})
|
||||
Get(string) interface{}
|
||||
@@ -134,7 +132,6 @@ func newPeer(
|
||||
mConfig tmconn.MConnConfig,
|
||||
nodeInfo NodeInfo,
|
||||
reactorsByCh map[byte]Reactor,
|
||||
msgTypeByChID map[byte]proto.Message,
|
||||
chDescs []*tmconn.ChannelDescriptor,
|
||||
onPeerError func(Peer, interface{}),
|
||||
options ...PeerOption,
|
||||
@@ -152,7 +149,6 @@ func newPeer(
|
||||
pc.conn,
|
||||
p,
|
||||
reactorsByCh,
|
||||
msgTypeByChID,
|
||||
chDescs,
|
||||
onPeerError,
|
||||
mConfig,
|
||||
@@ -253,31 +249,19 @@ func (p *peer) Status() tmconn.ConnectionStatus {
|
||||
|
||||
// Send msg bytes to the channel identified by chID byte. Returns false if the
|
||||
// send queue is full after timeout, specified by MConnection.
|
||||
func (p *peer) Send(e Envelope) bool {
|
||||
func (p *peer) Send(chID byte, msgBytes []byte) bool {
|
||||
if !p.IsRunning() {
|
||||
// see Switch#Broadcast, where we fetch the list of peers and loop over
|
||||
// them - while we're looping, one peer may be removed and stopped.
|
||||
return false
|
||||
} else if !p.hasChannel(e.ChannelID) {
|
||||
} else if !p.hasChannel(chID) {
|
||||
return false
|
||||
}
|
||||
msg := e.Message
|
||||
if w, ok := msg.(Wrapper); ok {
|
||||
var err error
|
||||
msg, err = w.Wrap()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
msgBytes, err := proto.Marshal(msg)
|
||||
if err != nil {
|
||||
panic(err) // Q: should this panic or error?
|
||||
}
|
||||
res := p.mconn.Send(e.ChannelID, msgBytes)
|
||||
res := p.mconn.Send(chID, msgBytes)
|
||||
if res {
|
||||
labels := []string{
|
||||
"peer_id", string(p.ID()),
|
||||
"chID", fmt.Sprintf("%#x", e.ChannelID),
|
||||
"chID", fmt.Sprintf("%#x", chID),
|
||||
}
|
||||
p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
|
||||
}
|
||||
@@ -286,29 +270,17 @@ func (p *peer) Send(e Envelope) bool {
|
||||
|
||||
// TrySend msg bytes to the channel identified by chID byte. Immediately returns
|
||||
// false if the send queue is full.
|
||||
func (p *peer) TrySend(e Envelope) bool {
|
||||
func (p *peer) TrySend(chID byte, msgBytes []byte) bool {
|
||||
if !p.IsRunning() {
|
||||
return false
|
||||
} else if !p.hasChannel(e.ChannelID) {
|
||||
} else if !p.hasChannel(chID) {
|
||||
return false
|
||||
}
|
||||
msg := e.Message
|
||||
if w, ok := msg.(Wrapper); ok {
|
||||
var err error
|
||||
msg, err = w.Wrap()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
msgBytes, err := proto.Marshal(msg)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
res := p.mconn.TrySend(e.ChannelID, msgBytes)
|
||||
res := p.mconn.TrySend(chID, msgBytes)
|
||||
if res {
|
||||
labels := []string{
|
||||
"peer_id", string(p.ID()),
|
||||
"chID", fmt.Sprintf("%#x", e.ChannelID),
|
||||
"chID", fmt.Sprintf("%#x", chID),
|
||||
}
|
||||
p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes)))
|
||||
}
|
||||
@@ -412,7 +384,6 @@ func createMConnection(
|
||||
conn net.Conn,
|
||||
p *peer,
|
||||
reactorsByCh map[byte]Reactor,
|
||||
msgTypeByChID map[byte]proto.Message,
|
||||
chDescs []*tmconn.ChannelDescriptor,
|
||||
onPeerError func(Peer, interface{}),
|
||||
config tmconn.MConnConfig,
|
||||
@@ -425,31 +396,12 @@ func createMConnection(
|
||||
// which does onPeerError.
|
||||
panic(fmt.Sprintf("Unknown channel %X", chID))
|
||||
}
|
||||
mt := msgTypeByChID[chID]
|
||||
msg := proto.Clone(mt)
|
||||
err := proto.Unmarshal(msgBytes, msg)
|
||||
if err != nil {
|
||||
// TODO(williambanfield) add a log line
|
||||
return
|
||||
}
|
||||
labels := []string{
|
||||
"peer_id", string(p.ID()),
|
||||
"chID", fmt.Sprintf("%#x", chID),
|
||||
}
|
||||
if w, ok := msg.(Unwrapper); ok {
|
||||
msg, err = w.Unwrap()
|
||||
if err != nil {
|
||||
// TODO(williambanfield) add error log line.
|
||||
return
|
||||
}
|
||||
}
|
||||
p.metrics.PeerReceiveBytesTotal.With(labels...).Add(float64(len(msgBytes)))
|
||||
p.metrics.MessageReceiveBytesTotal.With("message_type", "tmp").Add(float64(len(msgBytes)))
|
||||
reactor.Receive(Envelope{
|
||||
ChannelID: chID,
|
||||
Src: p,
|
||||
Message: msg,
|
||||
})
|
||||
reactor.Receive(chID, p, msgBytes)
|
||||
}
|
||||
|
||||
onError := func(r interface{}) {
|
||||
|
||||
@@ -18,22 +18,22 @@ type mockPeer struct {
|
||||
id ID
|
||||
}
|
||||
|
||||
func (mp *mockPeer) FlushStop() { mp.Stop() } //nolint:errcheck // ignore error
|
||||
func (mp *mockPeer) TrySend(e Envelope) bool { return true }
|
||||
func (mp *mockPeer) Send(e Envelope) bool { return true }
|
||||
func (mp *mockPeer) NodeInfo() NodeInfo { return DefaultNodeInfo{} }
|
||||
func (mp *mockPeer) Status() ConnectionStatus { return ConnectionStatus{} }
|
||||
func (mp *mockPeer) ID() ID { return mp.id }
|
||||
func (mp *mockPeer) IsOutbound() bool { return false }
|
||||
func (mp *mockPeer) IsPersistent() bool { return true }
|
||||
func (mp *mockPeer) Get(s string) interface{} { return s }
|
||||
func (mp *mockPeer) Set(string, interface{}) {}
|
||||
func (mp *mockPeer) RemoteIP() net.IP { return mp.ip }
|
||||
func (mp *mockPeer) SocketAddr() *NetAddress { return nil }
|
||||
func (mp *mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} }
|
||||
func (mp *mockPeer) CloseConn() error { return nil }
|
||||
func (mp *mockPeer) SetRemovalFailed() {}
|
||||
func (mp *mockPeer) GetRemovalFailed() bool { return false }
|
||||
func (mp *mockPeer) FlushStop() { mp.Stop() } //nolint:errcheck // ignore error
|
||||
func (mp *mockPeer) TrySend(chID byte, msgBytes []byte) bool { return true }
|
||||
func (mp *mockPeer) Send(chID byte, msgBytes []byte) bool { return true }
|
||||
func (mp *mockPeer) NodeInfo() NodeInfo { return DefaultNodeInfo{} }
|
||||
func (mp *mockPeer) Status() ConnectionStatus { return ConnectionStatus{} }
|
||||
func (mp *mockPeer) ID() ID { return mp.id }
|
||||
func (mp *mockPeer) IsOutbound() bool { return false }
|
||||
func (mp *mockPeer) IsPersistent() bool { return true }
|
||||
func (mp *mockPeer) Get(s string) interface{} { return s }
|
||||
func (mp *mockPeer) Set(string, interface{}) {}
|
||||
func (mp *mockPeer) RemoteIP() net.IP { return mp.ip }
|
||||
func (mp *mockPeer) SocketAddr() *NetAddress { return nil }
|
||||
func (mp *mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} }
|
||||
func (mp *mockPeer) CloseConn() error { return nil }
|
||||
func (mp *mockPeer) SetRemovalFailed() {}
|
||||
func (mp *mockPeer) GetRemovalFailed() bool { return false }
|
||||
|
||||
// Returns a mock peer
|
||||
func newMockPeer(ip net.IP) *mockPeer {
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
@@ -15,7 +14,6 @@ import (
|
||||
"github.com/tendermint/tendermint/crypto/ed25519"
|
||||
"github.com/tendermint/tendermint/libs/bytes"
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
"github.com/tendermint/tendermint/proto/tendermint/p2p"
|
||||
|
||||
"github.com/tendermint/tendermint/config"
|
||||
tmconn "github.com/tendermint/tendermint/p2p/conn"
|
||||
@@ -72,7 +70,7 @@ func TestPeerSend(t *testing.T) {
|
||||
})
|
||||
|
||||
assert.True(p.CanSend(testCh))
|
||||
assert.True(p.Send(Envelope{ChannelID: testCh, Message: &p2p.Message{}}))
|
||||
assert.True(p.Send(testCh, []byte("Asylum")))
|
||||
}
|
||||
|
||||
func createOutboundPeerAndPerformHandshake(
|
||||
@@ -84,9 +82,6 @@ func createOutboundPeerAndPerformHandshake(
|
||||
{ID: testCh, Priority: 1},
|
||||
}
|
||||
reactorsByCh := map[byte]Reactor{testCh: NewTestReactor(chDescs, true)}
|
||||
msgTypeByChID := map[byte]proto.Message{
|
||||
testCh: &p2p.Message{},
|
||||
}
|
||||
pk := ed25519.GenPrivKey()
|
||||
pc, err := testOutboundPeerConn(addr, config, false, pk)
|
||||
if err != nil {
|
||||
@@ -99,7 +94,7 @@ func createOutboundPeerAndPerformHandshake(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p := newPeer(pc, mConfig, peerNodeInfo, reactorsByCh, msgTypeByChID, chDescs, func(p Peer, r interface{}) {})
|
||||
p := newPeer(pc, mConfig, peerNodeInfo, reactorsByCh, chDescs, func(p Peer, r interface{}) {})
|
||||
p.SetLogger(log.TestingLogger().With("peer", addr))
|
||||
return p, nil
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
|
||||
"github.com/tendermint/tendermint/libs/cmap"
|
||||
tmmath "github.com/tendermint/tendermint/libs/math"
|
||||
tmrand "github.com/tendermint/tendermint/libs/rand"
|
||||
@@ -182,7 +184,6 @@ func (r *Reactor) GetChannels() []*conn.ChannelDescriptor {
|
||||
Priority: 1,
|
||||
SendQueueCapacity: 10,
|
||||
RecvMessageCapacity: maxMsgSize,
|
||||
MessageType: &tmp2p.Message{},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -235,10 +236,16 @@ func (r *Reactor) logErrAddrBook(err error) {
|
||||
}
|
||||
|
||||
// Receive implements Reactor by handling incoming PEX messages.
|
||||
func (r *Reactor) Receive(e p2p.Envelope) {
|
||||
r.Logger.Debug("Received message", "src", e.Src, "chId", e.ChannelID, "msg", e.Message)
|
||||
func (r *Reactor) Receive(chID byte, src Peer, msgBytes []byte) {
|
||||
msg, err := decodeMsg(msgBytes)
|
||||
if err != nil {
|
||||
r.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err)
|
||||
r.Switch.StopPeerForError(src, err)
|
||||
return
|
||||
}
|
||||
r.Logger.Debug("Received message", "src", src, "chId", chID, "msg", msg)
|
||||
|
||||
switch msg := e.Message.(type) {
|
||||
switch msg := msg.(type) {
|
||||
case *tmp2p.PexRequest:
|
||||
|
||||
// NOTE: this is a prime candidate for amplification attacks,
|
||||
@@ -248,8 +255,8 @@ func (r *Reactor) Receive(e p2p.Envelope) {
|
||||
|
||||
// If we're a seed and this is an inbound peer,
|
||||
// respond once and disconnect.
|
||||
if r.config.SeedMode && !e.Src.IsOutbound() {
|
||||
id := string(e.Src.ID())
|
||||
if r.config.SeedMode && !src.IsOutbound() {
|
||||
id := string(src.ID())
|
||||
v := r.lastReceivedRequests.Get(id)
|
||||
if v != nil {
|
||||
// FlushStop/StopPeer are already
|
||||
@@ -259,36 +266,36 @@ func (r *Reactor) Receive(e p2p.Envelope) {
|
||||
r.lastReceivedRequests.Set(id, time.Now())
|
||||
|
||||
// Send addrs and disconnect
|
||||
r.SendAddrs(e.Src, r.book.GetSelectionWithBias(biasToSelectNewPeers))
|
||||
r.SendAddrs(src, r.book.GetSelectionWithBias(biasToSelectNewPeers))
|
||||
go func() {
|
||||
// In a go-routine so it doesn't block .Receive.
|
||||
e.Src.FlushStop()
|
||||
r.Switch.StopPeerGracefully(e.Src)
|
||||
src.FlushStop()
|
||||
r.Switch.StopPeerGracefully(src)
|
||||
}()
|
||||
|
||||
} else {
|
||||
// Check we're not receiving requests too frequently.
|
||||
if err := r.receiveRequest(e.Src); err != nil {
|
||||
r.Switch.StopPeerForError(e.Src, err)
|
||||
r.book.MarkBad(e.Src.SocketAddr(), defaultBanTime)
|
||||
if err := r.receiveRequest(src); err != nil {
|
||||
r.Switch.StopPeerForError(src, err)
|
||||
r.book.MarkBad(src.SocketAddr(), defaultBanTime)
|
||||
return
|
||||
}
|
||||
r.SendAddrs(e.Src, r.book.GetSelection())
|
||||
r.SendAddrs(src, r.book.GetSelection())
|
||||
}
|
||||
|
||||
case *tmp2p.PexAddrs:
|
||||
// If we asked for addresses, add them to the book
|
||||
addrs, err := p2p.NetAddressesFromProto(msg.Addrs)
|
||||
if err != nil {
|
||||
r.Switch.StopPeerForError(e.Src, err)
|
||||
r.book.MarkBad(e.Src.SocketAddr(), defaultBanTime)
|
||||
r.Switch.StopPeerForError(src, err)
|
||||
r.book.MarkBad(src.SocketAddr(), defaultBanTime)
|
||||
return
|
||||
}
|
||||
err = r.ReceiveAddrs(addrs, e.Src)
|
||||
err = r.ReceiveAddrs(addrs, src)
|
||||
if err != nil {
|
||||
r.Switch.StopPeerForError(e.Src, err)
|
||||
r.Switch.StopPeerForError(src, err)
|
||||
if err == ErrUnsolicitedList {
|
||||
r.book.MarkBad(e.Src.SocketAddr(), defaultBanTime)
|
||||
r.book.MarkBad(src.SocketAddr(), defaultBanTime)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -341,10 +348,7 @@ func (r *Reactor) RequestAddrs(p Peer) {
|
||||
}
|
||||
r.Logger.Debug("Request addrs", "from", p)
|
||||
r.requestsSent.Set(id, struct{}{})
|
||||
p.Send(p2p.Envelope{
|
||||
ChannelID: PexChannel,
|
||||
Message: &tmp2p.PexRequest{},
|
||||
})
|
||||
p.Send(PexChannel, mustEncode(&tmp2p.PexRequest{}))
|
||||
}
|
||||
|
||||
// ReceiveAddrs adds the given addrs to the addrbook if theres an open
|
||||
@@ -402,11 +406,7 @@ func (r *Reactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error {
|
||||
|
||||
// SendAddrs sends addrs to the peer.
|
||||
func (r *Reactor) SendAddrs(p Peer, netAddrs []*p2p.NetAddress) {
|
||||
e := p2p.Envelope{
|
||||
ChannelID: PexChannel,
|
||||
Message: &tmp2p.PexAddrs{Addrs: p2p.NetAddressesToProto(netAddrs)},
|
||||
}
|
||||
p.Send(e)
|
||||
p.Send(PexChannel, mustEncode(&tmp2p.PexAddrs{Addrs: p2p.NetAddressesToProto(netAddrs)}))
|
||||
}
|
||||
|
||||
// SetEnsurePeersPeriod sets period to ensure peers connected.
|
||||
@@ -763,3 +763,43 @@ func markAddrInBookBasedOnErr(addr *p2p.NetAddress, book AddrBook, err error) {
|
||||
book.MarkAttempt(addr)
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Messages
|
||||
|
||||
// mustEncode proto encodes a tmp2p.Message
|
||||
func mustEncode(pb proto.Message) []byte {
|
||||
msg := tmp2p.Message{}
|
||||
switch pb := pb.(type) {
|
||||
case *tmp2p.PexRequest:
|
||||
msg.Sum = &tmp2p.Message_PexRequest{PexRequest: pb}
|
||||
case *tmp2p.PexAddrs:
|
||||
msg.Sum = &tmp2p.Message_PexAddrs{PexAddrs: pb}
|
||||
default:
|
||||
panic(fmt.Sprintf("Unknown message type %T", pb))
|
||||
}
|
||||
|
||||
bz, err := msg.Marshal()
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unable to marshal %T: %w", pb, err))
|
||||
}
|
||||
return bz
|
||||
}
|
||||
|
||||
func decodeMsg(bz []byte) (proto.Message, error) {
|
||||
pb := &tmp2p.Message{}
|
||||
|
||||
err := pb.Unmarshal(bz)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch msg := pb.Sum.(type) {
|
||||
case *tmp2p.Message_PexRequest:
|
||||
return msg.PexRequest, nil
|
||||
case *tmp2p.Message_PexAddrs:
|
||||
return msg.PexAddrs, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message: %T", msg)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,11 +131,12 @@ func TestPEXReactorReceive(t *testing.T) {
|
||||
r.RequestAddrs(peer)
|
||||
|
||||
size := book.Size()
|
||||
msg := &tmp2p.PexAddrs{Addrs: []tmp2p.NetAddress{peer.SocketAddr().ToProto()}}
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: msg})
|
||||
msg := mustEncode(&tmp2p.PexAddrs{Addrs: []tmp2p.NetAddress{peer.SocketAddr().ToProto()}})
|
||||
r.Receive(PexChannel, peer, msg)
|
||||
assert.Equal(t, size+1, book.Size())
|
||||
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: &tmp2p.PexRequest{}})
|
||||
msg = mustEncode(&tmp2p.PexRequest{})
|
||||
r.Receive(PexChannel, peer, msg) // should not panic.
|
||||
}
|
||||
|
||||
func TestPEXReactorRequestMessageAbuse(t *testing.T) {
|
||||
@@ -154,19 +155,20 @@ func TestPEXReactorRequestMessageAbuse(t *testing.T) {
|
||||
require.True(t, book.HasAddress(peerAddr))
|
||||
|
||||
id := string(peer.ID())
|
||||
msg := mustEncode(&tmp2p.PexRequest{})
|
||||
|
||||
// first time creates the entry
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: &tmp2p.PexRequest{}})
|
||||
r.Receive(PexChannel, peer, msg)
|
||||
assert.True(t, r.lastReceivedRequests.Has(id))
|
||||
assert.True(t, sw.Peers().Has(peer.ID()))
|
||||
|
||||
// next time sets the last time value
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: &tmp2p.PexRequest{}})
|
||||
r.Receive(PexChannel, peer, msg)
|
||||
assert.True(t, r.lastReceivedRequests.Has(id))
|
||||
assert.True(t, sw.Peers().Has(peer.ID()))
|
||||
|
||||
// third time is too many too soon - peer is removed
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: &tmp2p.PexRequest{}})
|
||||
r.Receive(PexChannel, peer, msg)
|
||||
assert.False(t, r.lastReceivedRequests.Has(id))
|
||||
assert.False(t, sw.Peers().Has(peer.ID()))
|
||||
assert.True(t, book.IsBanned(peerAddr))
|
||||
@@ -190,15 +192,15 @@ func TestPEXReactorAddrsMessageAbuse(t *testing.T) {
|
||||
assert.True(t, r.requestsSent.Has(id))
|
||||
assert.True(t, sw.Peers().Has(peer.ID()))
|
||||
|
||||
msg := &tmp2p.PexAddrs{Addrs: []tmp2p.NetAddress{peer.SocketAddr().ToProto()}}
|
||||
msg := mustEncode(&tmp2p.PexAddrs{Addrs: []tmp2p.NetAddress{peer.SocketAddr().ToProto()}})
|
||||
|
||||
// receive some addrs. should clear the request
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: msg})
|
||||
r.Receive(PexChannel, peer, msg)
|
||||
assert.False(t, r.requestsSent.Has(id))
|
||||
assert.True(t, sw.Peers().Has(peer.ID()))
|
||||
|
||||
// receiving more unsolicited addrs causes a disconnect and ban
|
||||
r.Receive(p2p.Envelope{ChannelID: PexChannel, Src: peer, Message: msg})
|
||||
r.Receive(PexChannel, peer, msg)
|
||||
assert.False(t, sw.Peers().Has(peer.ID()))
|
||||
assert.True(t, book.IsBanned(peer.SocketAddr()))
|
||||
}
|
||||
@@ -484,12 +486,8 @@ func TestPEXReactorDoesNotAddPrivatePeersToAddrBook(t *testing.T) {
|
||||
pexR.RequestAddrs(peer)
|
||||
|
||||
size := book.Size()
|
||||
msg := &tmp2p.PexAddrs{Addrs: []tmp2p.NetAddress{peer.SocketAddr().ToProto()}}
|
||||
pexR.Receive(p2p.Envelope{
|
||||
ChannelID: PexChannel,
|
||||
Src: peer,
|
||||
Message: msg,
|
||||
})
|
||||
msg := mustEncode(&tmp2p.PexAddrs{Addrs: []tmp2p.NetAddress{peer.SocketAddr().ToProto()}})
|
||||
pexR.Receive(PexChannel, peer, msg)
|
||||
assert.Equal(t, size, book.Size())
|
||||
|
||||
pexR.AddPeer(peer)
|
||||
@@ -697,10 +695,7 @@ func TestPexVectors(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
|
||||
w, err := tc.msg.(p2p.Wrapper).Wrap()
|
||||
require.NoError(t, err)
|
||||
bz, err := proto.Marshal(w)
|
||||
require.NoError(t, err)
|
||||
bz := mustEncode(tc.msg)
|
||||
|
||||
require.Equal(t, tc.expBytes, hex.EncodeToString(bz), tc.testName)
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/config"
|
||||
"github.com/tendermint/tendermint/libs/cmap"
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
"github.com/tendermint/tendermint/libs/rand"
|
||||
"github.com/tendermint/tendermint/libs/service"
|
||||
"github.com/tendermint/tendermint/p2p/conn"
|
||||
@@ -69,17 +69,16 @@ type PeerFilterFunc func(IPeerSet, Peer) error
|
||||
type Switch struct {
|
||||
service.BaseService
|
||||
|
||||
config *config.P2PConfig
|
||||
reactors map[string]Reactor
|
||||
chDescs []*conn.ChannelDescriptor
|
||||
reactorsByCh map[byte]Reactor
|
||||
msgTypeByChID map[byte]proto.Message
|
||||
peers *PeerSet
|
||||
dialing *cmap.CMap
|
||||
reconnecting *cmap.CMap
|
||||
nodeInfo NodeInfo // our node info
|
||||
nodeKey *NodeKey // our node privkey
|
||||
addrBook AddrBook
|
||||
config *config.P2PConfig
|
||||
reactors map[string]Reactor
|
||||
chDescs []*conn.ChannelDescriptor
|
||||
reactorsByCh map[byte]Reactor
|
||||
peers *PeerSet
|
||||
dialing *cmap.CMap
|
||||
reconnecting *cmap.CMap
|
||||
nodeInfo NodeInfo // our node info
|
||||
nodeKey *NodeKey // our node privkey
|
||||
addrBook AddrBook
|
||||
// peers addresses with whom we'll maintain constant connection
|
||||
persistentPeersAddrs []*NetAddress
|
||||
unconditionalPeerIDs map[ID]struct{}
|
||||
@@ -114,7 +113,6 @@ func NewSwitch(
|
||||
reactors: make(map[string]Reactor),
|
||||
chDescs: make([]*conn.ChannelDescriptor, 0),
|
||||
reactorsByCh: make(map[byte]Reactor),
|
||||
msgTypeByChID: make(map[byte]proto.Message),
|
||||
peers: NewPeerSet(),
|
||||
dialing: cmap.NewCMap(),
|
||||
reconnecting: cmap.NewCMap(),
|
||||
@@ -166,7 +164,6 @@ func (sw *Switch) AddReactor(name string, reactor Reactor) Reactor {
|
||||
}
|
||||
sw.chDescs = append(sw.chDescs, chDesc)
|
||||
sw.reactorsByCh[chID] = reactor
|
||||
sw.msgTypeByChID[chID] = chDesc.MessageType
|
||||
}
|
||||
sw.reactors[name] = reactor
|
||||
reactor.SetSwitch(sw)
|
||||
@@ -185,7 +182,6 @@ func (sw *Switch) RemoveReactor(name string, reactor Reactor) {
|
||||
}
|
||||
}
|
||||
delete(sw.reactorsByCh, chDesc.ID)
|
||||
delete(sw.msgTypeByChID, chDesc.ID)
|
||||
}
|
||||
delete(sw.reactors, name)
|
||||
reactor.SetSwitch(nil)
|
||||
@@ -265,8 +261,8 @@ func (sw *Switch) OnStop() {
|
||||
// closed once msg bytes are sent to all peers (or time out).
|
||||
//
|
||||
// NOTE: Broadcast uses goroutines, so order of broadcast may not be preserved.
|
||||
func (sw *Switch) Broadcast(e Envelope) chan bool {
|
||||
sw.Logger.Debug("Broadcast", "channel", e.ChannelID)
|
||||
func (sw *Switch) Broadcast(chID byte, msgBytes []byte) chan bool {
|
||||
sw.Logger.Debug("Broadcast", "channel", chID, "msgBytes", log.NewLazySprintf("%X", msgBytes))
|
||||
|
||||
peers := sw.peers.List()
|
||||
var wg sync.WaitGroup
|
||||
@@ -276,7 +272,7 @@ func (sw *Switch) Broadcast(e Envelope) chan bool {
|
||||
for _, peer := range peers {
|
||||
go func(p Peer) {
|
||||
defer wg.Done()
|
||||
success := p.Send(e)
|
||||
success := p.Send(chID, msgBytes)
|
||||
successChan <- success
|
||||
}(peer)
|
||||
}
|
||||
@@ -627,12 +623,11 @@ func (sw *Switch) IsPeerPersistent(na *NetAddress) bool {
|
||||
func (sw *Switch) acceptRoutine() {
|
||||
for {
|
||||
p, err := sw.transport.Accept(peerConfig{
|
||||
chDescs: sw.chDescs,
|
||||
onPeerError: sw.StopPeerForError,
|
||||
reactorsByCh: sw.reactorsByCh,
|
||||
msgTypeByChID: sw.msgTypeByChID,
|
||||
metrics: sw.metrics,
|
||||
isPersistent: sw.IsPeerPersistent,
|
||||
chDescs: sw.chDescs,
|
||||
onPeerError: sw.StopPeerForError,
|
||||
reactorsByCh: sw.reactorsByCh,
|
||||
metrics: sw.metrics,
|
||||
isPersistent: sw.IsPeerPersistent,
|
||||
})
|
||||
if err != nil {
|
||||
switch err := err.(type) {
|
||||
@@ -731,12 +726,11 @@ func (sw *Switch) addOutboundPeerWithConfig(
|
||||
}
|
||||
|
||||
p, err := sw.transport.Dial(*addr, peerConfig{
|
||||
chDescs: sw.chDescs,
|
||||
onPeerError: sw.StopPeerForError,
|
||||
isPersistent: sw.IsPeerPersistent,
|
||||
reactorsByCh: sw.reactorsByCh,
|
||||
msgTypeByChID: sw.msgTypeByChID,
|
||||
metrics: sw.metrics,
|
||||
chDescs: sw.chDescs,
|
||||
onPeerError: sw.StopPeerForError,
|
||||
isPersistent: sw.IsPeerPersistent,
|
||||
reactorsByCh: sw.reactorsByCh,
|
||||
metrics: sw.metrics,
|
||||
})
|
||||
if err != nil {
|
||||
if e, ok := err.(ErrRejected); ok {
|
||||
|
||||
@@ -14,7 +14,6 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -24,8 +23,6 @@ import (
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
tmsync "github.com/tendermint/tendermint/libs/sync"
|
||||
"github.com/tendermint/tendermint/p2p/conn"
|
||||
"github.com/tendermint/tendermint/proto/tendermint/p2p"
|
||||
p2pproto "github.com/tendermint/tendermint/proto/tendermint/p2p"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -39,8 +36,9 @@ func init() {
|
||||
}
|
||||
|
||||
type PeerMessage struct {
|
||||
Contents proto.Message
|
||||
Counter int
|
||||
PeerID ID
|
||||
Bytes []byte
|
||||
Counter int
|
||||
}
|
||||
|
||||
type TestReactor struct {
|
||||
@@ -72,12 +70,12 @@ func (tr *TestReactor) AddPeer(peer Peer) {}
|
||||
|
||||
func (tr *TestReactor) RemovePeer(peer Peer, reason interface{}) {}
|
||||
|
||||
func (tr *TestReactor) Receive(e Envelope) {
|
||||
func (tr *TestReactor) Receive(chID byte, peer Peer, msgBytes []byte) {
|
||||
if tr.logMessages {
|
||||
tr.mtx.Lock()
|
||||
defer tr.mtx.Unlock()
|
||||
fmt.Printf("Received: %X, %X\n", e.ChannelID, e.Message)
|
||||
tr.msgsReceived[e.ChannelID] = append(tr.msgsReceived[e.ChannelID], PeerMessage{Contents: e.Message, Counter: tr.msgsCounter})
|
||||
// fmt.Printf("Received: %X, %X\n", chID, msgBytes)
|
||||
tr.msgsReceived[chID] = append(tr.msgsReceived[chID], PeerMessage{peer.ID(), msgBytes, tr.msgsCounter})
|
||||
tr.msgsCounter++
|
||||
}
|
||||
}
|
||||
@@ -105,12 +103,12 @@ func initSwitchFunc(i int, sw *Switch) *Switch {
|
||||
|
||||
// Make two reactors of two channels each
|
||||
sw.AddReactor("foo", NewTestReactor([]*conn.ChannelDescriptor{
|
||||
{ID: byte(0x00), Priority: 10, MessageType: &p2pproto.Message{}},
|
||||
{ID: byte(0x01), Priority: 10, MessageType: &p2pproto.Message{}},
|
||||
{ID: byte(0x00), Priority: 10},
|
||||
{ID: byte(0x01), Priority: 10},
|
||||
}, true))
|
||||
sw.AddReactor("bar", NewTestReactor([]*conn.ChannelDescriptor{
|
||||
{ID: byte(0x02), Priority: 10, MessageType: &p2pproto.Message{}},
|
||||
{ID: byte(0x03), Priority: 10, MessageType: &p2pproto.Message{}},
|
||||
{ID: byte(0x02), Priority: 10},
|
||||
{ID: byte(0x03), Priority: 10},
|
||||
}, true))
|
||||
|
||||
return sw
|
||||
@@ -137,47 +135,31 @@ func TestSwitches(t *testing.T) {
|
||||
}
|
||||
|
||||
// Lets send some messages
|
||||
ch0Msg := &p2pproto.PexAddrs{
|
||||
Addrs: []p2p.NetAddress{
|
||||
{
|
||||
ID: "1",
|
||||
},
|
||||
},
|
||||
}
|
||||
ch1Msg := &p2pproto.PexAddrs{
|
||||
Addrs: []p2p.NetAddress{
|
||||
{
|
||||
ID: "1",
|
||||
},
|
||||
},
|
||||
}
|
||||
ch2Msg := &p2pproto.PexAddrs{
|
||||
Addrs: []p2p.NetAddress{
|
||||
{
|
||||
ID: "2",
|
||||
},
|
||||
},
|
||||
}
|
||||
s1.Broadcast(Envelope{ChannelID: byte(0x00), Message: ch0Msg})
|
||||
s1.Broadcast(Envelope{ChannelID: byte(0x01), Message: ch1Msg})
|
||||
s1.Broadcast(Envelope{ChannelID: byte(0x02), Message: ch2Msg})
|
||||
ch0Msg := []byte("channel zero")
|
||||
ch1Msg := []byte("channel foo")
|
||||
ch2Msg := []byte("channel bar")
|
||||
|
||||
s1.Broadcast(byte(0x00), ch0Msg)
|
||||
s1.Broadcast(byte(0x01), ch1Msg)
|
||||
s1.Broadcast(byte(0x02), ch2Msg)
|
||||
|
||||
assertMsgReceivedWithTimeout(t,
|
||||
ch0Msg,
|
||||
byte(0x00),
|
||||
s2.Reactor("foo").(*TestReactor), 200*time.Millisecond, 5*time.Second)
|
||||
s2.Reactor("foo").(*TestReactor), 10*time.Millisecond, 5*time.Second)
|
||||
assertMsgReceivedWithTimeout(t,
|
||||
ch1Msg,
|
||||
byte(0x01),
|
||||
s2.Reactor("foo").(*TestReactor), 200*time.Millisecond, 5*time.Second)
|
||||
s2.Reactor("foo").(*TestReactor), 10*time.Millisecond, 5*time.Second)
|
||||
assertMsgReceivedWithTimeout(t,
|
||||
ch2Msg,
|
||||
byte(0x02),
|
||||
s2.Reactor("bar").(*TestReactor), 200*time.Millisecond, 5*time.Second)
|
||||
s2.Reactor("bar").(*TestReactor), 10*time.Millisecond, 5*time.Second)
|
||||
}
|
||||
|
||||
func assertMsgReceivedWithTimeout(
|
||||
t *testing.T,
|
||||
msg proto.Message,
|
||||
msgBytes []byte,
|
||||
channel byte,
|
||||
reactor *TestReactor,
|
||||
checkPeriod,
|
||||
@@ -188,13 +170,9 @@ func assertMsgReceivedWithTimeout(
|
||||
select {
|
||||
case <-ticker.C:
|
||||
msgs := reactor.getMsgs(channel)
|
||||
expectedBytes, err := proto.Marshal(msgs[0].Contents)
|
||||
require.NoError(t, err)
|
||||
gotBytes, err := proto.Marshal(msg)
|
||||
require.NoError(t, err)
|
||||
if len(msgs) > 0 {
|
||||
if !bytes.Equal(expectedBytes, gotBytes) {
|
||||
t.Fatalf("Unexpected message bytes. Wanted: %X, Got: %X", msg, msgs[0].Counter)
|
||||
if !bytes.Equal(msgs[0].Bytes, msgBytes) {
|
||||
t.Fatalf("Unexpected message bytes. Wanted: %X, Got: %X", msgBytes, msgs[0].Bytes)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -451,10 +429,7 @@ func TestSwitchStopPeerForError(t *testing.T) {
|
||||
|
||||
// send messages to the peer from sw1
|
||||
p := sw1.Peers().List()[0]
|
||||
p.Send(Envelope{
|
||||
ChannelID: 0x1,
|
||||
Message: &p2p.Message{},
|
||||
})
|
||||
p.Send(0x1, []byte("here's a message to send"))
|
||||
|
||||
// stop sw2. this should cause the p to fail,
|
||||
// which results in calling StopPeerForError internally
|
||||
@@ -849,7 +824,7 @@ func BenchmarkSwitchBroadcast(b *testing.B) {
|
||||
// Send random message from foo channel to another
|
||||
for i := 0; i < b.N; i++ {
|
||||
chID := byte(i % 4)
|
||||
successChan := s1.Broadcast(Envelope{ChannelID: chID})
|
||||
successChan := s1.Broadcast(chID, []byte("test data"))
|
||||
for s := range successChan {
|
||||
if s {
|
||||
numSuccess++
|
||||
|
||||
@@ -149,7 +149,6 @@ func (sw *Switch) addPeerWithConnection(conn net.Conn) error {
|
||||
MConnConfig(sw.config),
|
||||
ni,
|
||||
sw.reactorsByCh,
|
||||
sw.msgTypeByChID,
|
||||
sw.chDescs,
|
||||
sw.StopPeerForError,
|
||||
)
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
|
||||
"golang.org/x/net/netutil"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/crypto"
|
||||
"github.com/tendermint/tendermint/libs/protoio"
|
||||
"github.com/tendermint/tendermint/p2p/conn"
|
||||
@@ -48,10 +47,9 @@ type peerConfig struct {
|
||||
// isPersistent allows you to set a function, which, given socket address
|
||||
// (for outbound peers) OR self-reported address (for inbound peers), tells
|
||||
// if the peer is persistent or not.
|
||||
isPersistent func(*NetAddress) bool
|
||||
reactorsByCh map[byte]Reactor
|
||||
msgTypeByChID map[byte]proto.Message
|
||||
metrics *Metrics
|
||||
isPersistent func(*NetAddress) bool
|
||||
reactorsByCh map[byte]Reactor
|
||||
metrics *Metrics
|
||||
}
|
||||
|
||||
// Transport emits and connects to Peers. The implementation of Peer is left to
|
||||
@@ -521,7 +519,6 @@ func (mt *MultiplexTransport) wrapPeer(
|
||||
mt.mConfig,
|
||||
ni,
|
||||
cfg.reactorsByCh,
|
||||
cfg.msgTypeByChID,
|
||||
cfg.chDescs,
|
||||
cfg.onPeerError,
|
||||
PeerMetrics(cfg.metrics),
|
||||
|
||||
32
p2p/types.go
@@ -1,40 +1,8 @@
|
||||
package p2p
|
||||
|
||||
import (
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/p2p/conn"
|
||||
tmp2p "github.com/tendermint/tendermint/proto/tendermint/p2p"
|
||||
)
|
||||
|
||||
type ChannelDescriptor = conn.ChannelDescriptor
|
||||
type ConnectionStatus = conn.ConnectionStatus
|
||||
|
||||
// Envelope contains a message with sender routing info.
|
||||
type Envelope struct {
|
||||
Src Peer // sender (empty if outbound)
|
||||
Message proto.Message // message payload
|
||||
ChannelID byte
|
||||
}
|
||||
|
||||
// Wrapper is a Protobuf message that can contain a variety of inner messages
|
||||
// (e.g. via oneof fields). If a Channel's message type implements Wrapper, the
|
||||
// Router will automatically wrap outbound messages and unwrap inbound messages,
|
||||
// such that reactors do not have to do this themselves.
|
||||
type Unwrapper interface {
|
||||
proto.Message
|
||||
|
||||
// Unwrap will unwrap the inner message contained in this message.
|
||||
Unwrap() (proto.Message, error)
|
||||
}
|
||||
|
||||
type Wrapper interface {
|
||||
proto.Message
|
||||
|
||||
// Wrap will take the underlying message and wrap it in its wrapper type.
|
||||
Wrap() (proto.Message, error)
|
||||
}
|
||||
|
||||
var (
|
||||
_ Wrapper = &tmp2p.PexRequest{}
|
||||
_ Wrapper = &tmp2p.PexAddrs{}
|
||||
)
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
package blocksync
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
)
|
||||
|
||||
var _ p2p.Wrapper = &StatusRequest{}
|
||||
var _ p2p.Wrapper = &StatusResponse{}
|
||||
var _ p2p.Wrapper = &NoBlockResponse{}
|
||||
var _ p2p.Wrapper = &BlockResponse{}
|
||||
var _ p2p.Wrapper = &BlockRequest{}
|
||||
|
||||
const (
|
||||
BlockResponseMessagePrefixSize = 4
|
||||
BlockResponseMessageFieldKeySize = 1
|
||||
)
|
||||
|
||||
func (m *BlockRequest) Wrap() (proto.Message, error) {
|
||||
bm := &Message{}
|
||||
bm.Sum = &Message_BlockRequest{BlockRequest: m}
|
||||
return bm, nil
|
||||
}
|
||||
|
||||
func (m *BlockResponse) Wrap() (proto.Message, error) {
|
||||
bm := &Message{}
|
||||
bm.Sum = &Message_BlockResponse{BlockResponse: m}
|
||||
return bm, nil
|
||||
}
|
||||
|
||||
func (m *NoBlockResponse) Wrap() (proto.Message, error) {
|
||||
bm := &Message{}
|
||||
bm.Sum = &Message_NoBlockResponse{NoBlockResponse: m}
|
||||
return bm, nil
|
||||
}
|
||||
|
||||
func (m *StatusRequest) Wrap() (proto.Message, error) {
|
||||
bm := &Message{}
|
||||
bm.Sum = &Message_StatusRequest{StatusRequest: m}
|
||||
return bm, nil
|
||||
}
|
||||
|
||||
func (m *StatusResponse) Wrap() (proto.Message, error) {
|
||||
bm := &Message{}
|
||||
bm.Sum = &Message_StatusResponse{StatusResponse: m}
|
||||
return bm, nil
|
||||
}
|
||||
|
||||
// Unwrap implements the p2p Wrapper interface and unwraps a wrapped blockchain
|
||||
// message.
|
||||
func (m *Message) Unwrap() (proto.Message, error) {
|
||||
switch msg := m.Sum.(type) {
|
||||
case *Message_BlockRequest:
|
||||
return m.GetBlockRequest(), nil
|
||||
|
||||
case *Message_BlockResponse:
|
||||
return m.GetBlockResponse(), nil
|
||||
|
||||
case *Message_NoBlockResponse:
|
||||
return m.GetNoBlockResponse(), nil
|
||||
|
||||
case *Message_StatusRequest:
|
||||
return m.GetStatusRequest(), nil
|
||||
|
||||
case *Message_StatusResponse:
|
||||
return m.GetStatusResponse(), nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message: %T", msg)
|
||||
}
|
||||
}
|
||||
@@ -1,109 +0,0 @@
|
||||
package consensus
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
)
|
||||
|
||||
var _ p2p.Wrapper = &VoteSetBits{}
|
||||
var _ p2p.Wrapper = &VoteSetMaj23{}
|
||||
var _ p2p.Wrapper = &Vote{}
|
||||
var _ p2p.Wrapper = &ProposalPOL{}
|
||||
var _ p2p.Wrapper = &Proposal{}
|
||||
var _ p2p.Wrapper = &NewValidBlock{}
|
||||
var _ p2p.Wrapper = &NewRoundStep{}
|
||||
var _ p2p.Wrapper = &HasVote{}
|
||||
var _ p2p.Wrapper = &BlockPart{}
|
||||
|
||||
func (m *VoteSetBits) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_VoteSetBits{VoteSetBits: m}
|
||||
return cm, nil
|
||||
|
||||
}
|
||||
|
||||
func (m *VoteSetMaj23) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_VoteSetMaj23{VoteSetMaj23: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *HasVote) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_HasVote{HasVote: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *Vote) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_Vote{Vote: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *BlockPart) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_BlockPart{BlockPart: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *ProposalPOL) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_ProposalPol{ProposalPol: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *Proposal) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_Proposal{Proposal: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *NewValidBlock) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_NewValidBlock{NewValidBlock: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (m *NewRoundStep) Wrap() (proto.Message, error) {
|
||||
cm := &Message{}
|
||||
cm.Sum = &Message_NewRoundStep{NewRoundStep: m}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
// Unwrap implements the p2p Wrapper interface and unwraps a wrapped consensus
|
||||
// proto message.
|
||||
func (m *Message) Unwrap() (proto.Message, error) {
|
||||
switch msg := m.Sum.(type) {
|
||||
case *Message_NewRoundStep:
|
||||
return m.GetNewRoundStep(), nil
|
||||
|
||||
case *Message_NewValidBlock:
|
||||
return m.GetNewValidBlock(), nil
|
||||
|
||||
case *Message_Proposal:
|
||||
return m.GetProposal(), nil
|
||||
|
||||
case *Message_ProposalPol:
|
||||
return m.GetProposalPol(), nil
|
||||
|
||||
case *Message_BlockPart:
|
||||
return m.GetBlockPart(), nil
|
||||
|
||||
case *Message_Vote:
|
||||
return m.GetVote(), nil
|
||||
|
||||
case *Message_HasVote:
|
||||
return m.GetHasVote(), nil
|
||||
|
||||
case *Message_VoteSetMaj23:
|
||||
return m.GetVoteSetMaj23(), nil
|
||||
|
||||
case *Message_VoteSetBits:
|
||||
return m.GetVoteSetBits(), nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message: %T", msg)
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package mempool
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
)
|
||||
|
||||
var _ p2p.Wrapper = &Txs{}
|
||||
var _ p2p.Unwrapper = &Message{}
|
||||
|
||||
// Wrap implements the p2p Wrapper interface and wraps a mempool message.
|
||||
func (m *Txs) Wrap() (proto.Message, error) {
|
||||
return &Message{
|
||||
Sum: &Message_Txs{Txs: m},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Unwrap implements the p2p Wrapper interface and unwraps a wrapped mempool
|
||||
// message.
|
||||
func (m *Message) Unwrap() (proto.Message, error) {
|
||||
switch msg := m.Sum.(type) {
|
||||
case *Message_Txs:
|
||||
return m.GetTxs(), nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message: %T", msg)
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
package p2p
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
)
|
||||
|
||||
func (m *PexAddrs) Wrap() (proto.Message, error) {
|
||||
pm := &Message{}
|
||||
pm.Sum = &Message_PexAddrs{PexAddrs: m}
|
||||
return pm, nil
|
||||
}
|
||||
|
||||
func (m *PexRequest) Wrap() (proto.Message, error) {
|
||||
pm := &Message{}
|
||||
pm.Sum = &Message_PexRequest{PexRequest: m}
|
||||
return pm, nil
|
||||
}
|
||||
|
||||
// Unwrap implements the p2p Wrapper interface and unwraps a wrapped PEX
|
||||
// message.
|
||||
func (m *Message) Unwrap() (proto.Message, error) {
|
||||
switch msg := m.Sum.(type) {
|
||||
case *Message_PexRequest:
|
||||
return msg.PexRequest, nil
|
||||
case *Message_PexAddrs:
|
||||
return msg.PexAddrs, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown pex message: %T", msg)
|
||||
}
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
package statesync
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cosmos/gogoproto/proto"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
)
|
||||
|
||||
var _ p2p.Wrapper = &ChunkRequest{}
|
||||
var _ p2p.Wrapper = &ChunkResponse{}
|
||||
var _ p2p.Wrapper = &SnapshotsRequest{}
|
||||
var _ p2p.Wrapper = &SnapshotsResponse{}
|
||||
|
||||
func (m *SnapshotsResponse) Wrap() (proto.Message, error) {
|
||||
sm := &Message{}
|
||||
sm.Sum = &Message_SnapshotsResponse{SnapshotsResponse: m}
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
func (m *SnapshotsRequest) Wrap() (proto.Message, error) {
|
||||
sm := &Message{}
|
||||
sm.Sum = &Message_SnapshotsRequest{SnapshotsRequest: m}
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
func (m *ChunkResponse) Wrap() (proto.Message, error) {
|
||||
sm := &Message{}
|
||||
sm.Sum = &Message_ChunkResponse{ChunkResponse: m}
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
func (m *ChunkRequest) Wrap() (proto.Message, error) {
|
||||
sm := &Message{}
|
||||
sm.Sum = &Message_ChunkRequest{ChunkRequest: m}
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
// Unwrap implements the p2p Wrapper interface and unwraps a wrapped state sync
|
||||
// proto message.
|
||||
func (m *Message) Unwrap() (proto.Message, error) {
|
||||
switch msg := m.Sum.(type) {
|
||||
case *Message_ChunkRequest:
|
||||
return m.GetChunkRequest(), nil
|
||||
|
||||
case *Message_ChunkResponse:
|
||||
return m.GetChunkResponse(), nil
|
||||
|
||||
case *Message_SnapshotsRequest:
|
||||
return m.GetSnapshotsRequest(), nil
|
||||
|
||||
case *Message_SnapshotsResponse:
|
||||
return m.GetSnapshotsResponse(), nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message: %T", msg)
|
||||
}
|
||||
}
|
||||
@@ -29,7 +29,7 @@ var _ = time.Kitchen
|
||||
// proto package needs to be updated.
|
||||
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
|
||||
|
||||
// BlockIdFlag indicates which BlcokID the signature is for
|
||||
// BlockIdFlag indicates which BlockID the signature is for
|
||||
type BlockIDFlag int32
|
||||
|
||||
const (
|
||||
|
||||
@@ -9,15 +9,15 @@ import "tendermint/crypto/proof.proto";
|
||||
import "tendermint/version/types.proto";
|
||||
import "tendermint/types/validator.proto";
|
||||
|
||||
// BlockIdFlag indicates which BlcokID the signature is for
|
||||
// BlockIdFlag indicates which BlockID the signature is for
|
||||
enum BlockIDFlag {
|
||||
option (gogoproto.goproto_enum_stringer) = true;
|
||||
option (gogoproto.goproto_enum_prefix) = false;
|
||||
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "BlockIDFlagUnknown"];
|
||||
BLOCK_ID_FLAG_ABSENT = 1 [(gogoproto.enumvalue_customname) = "BlockIDFlagAbsent"];
|
||||
BLOCK_ID_FLAG_COMMIT = 2 [(gogoproto.enumvalue_customname) = "BlockIDFlagCommit"];
|
||||
BLOCK_ID_FLAG_NIL = 3 [(gogoproto.enumvalue_customname) = "BlockIDFlagNil"];
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "BlockIDFlagUnknown"]; // indicates an error condition
|
||||
BLOCK_ID_FLAG_ABSENT = 1 [(gogoproto.enumvalue_customname) = "BlockIDFlagAbsent"]; // the vote was not received
|
||||
BLOCK_ID_FLAG_COMMIT = 2 [(gogoproto.enumvalue_customname) = "BlockIDFlagCommit"]; // voted for the block that received the majority
|
||||
BLOCK_ID_FLAG_NIL = 3 [(gogoproto.enumvalue_customname) = "BlockIDFlagNil"]; // voted for nil
|
||||
}
|
||||
|
||||
// SignedMsgType is a type of signed message in the consensus.
|
||||
|
||||
48
scripts/qa/reporting/README.md
Normal file
@@ -0,0 +1,48 @@
|
||||
# Reporting Scripts
|
||||
|
||||
This directory contains just one utility script at present that is used in
|
||||
reporting/QA.
|
||||
|
||||
## Latency vs Throughput Plotting
|
||||
|
||||
[`latency_throughput.py`](./latency_throughput.py) is a Python script that uses
|
||||
[matplotlib] to plot a graph of transaction latency vs throughput rate based on
|
||||
the CSV output generated by the [loadtime reporting
|
||||
tool](../../../test/loadtime/cmd/report/).
|
||||
|
||||
### Setup
|
||||
|
||||
Execute the following within this directory (the same directory as the
|
||||
`latency_throughput.py` file).
|
||||
|
||||
```bash
|
||||
# Create a virtual environment into which to install your dependencies
|
||||
python3 -m venv .venv
|
||||
|
||||
# Activate the virtual environment
|
||||
source .venv/bin/activate
|
||||
|
||||
# Install dependencies listed in requirements.txt
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Show usage instructions and parameters
|
||||
./latency_throughput.py --help
|
||||
```
|
||||
|
||||
### Running
|
||||
|
||||
```bash
|
||||
# Do the following while ensuring that the virtual environment is activated (see
|
||||
# the Setup steps).
|
||||
#
|
||||
# This will generate a plot in a PNG file called 'tm034.png' in the current
|
||||
# directory based on the reporting tool CSV output in the "raw.csv" file. The
|
||||
# '-t' flag overrides the default title at the top of the plot.
|
||||
|
||||
./latency_throughput.py \
|
||||
-t 'Tendermint v0.34.x Latency vs Throughput' \
|
||||
./tm034.png \
|
||||
/path/to/csv/files/raw.csv
|
||||
```
|
||||
|
||||
[matplotlib]: https://matplotlib.org/
|
||||
170
scripts/qa/reporting/latency_throughput.py
Executable file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
A simple script to parse the CSV output from the loadtime reporting tool (see
|
||||
https://github.com/tendermint/tendermint/tree/main/test/loadtime/cmd/report).
|
||||
|
||||
Produces a plot of average transaction latency vs total transaction throughput
|
||||
according to the number of load testing tool WebSocket connections to the
|
||||
Tendermint node.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import sys
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
DEFAULT_TITLE = "Tendermint latency vs throughput"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Renders a latency vs throughput diagram "
|
||||
"for a set of transactions provided by the loadtime reporting tool",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('-t',
|
||||
'--title',
|
||||
default=DEFAULT_TITLE,
|
||||
help='Plot title')
|
||||
parser.add_argument('output_image',
|
||||
help='Output image file (in PNG format)')
|
||||
parser.add_argument(
|
||||
'input_csv_file',
|
||||
nargs='+',
|
||||
help="CSV input file from which to read transaction data "
|
||||
"- must have been generated by the loadtime reporting tool")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(format='%(levelname)s\t%(message)s',
|
||||
stream=sys.stdout,
|
||||
level=logging.INFO)
|
||||
plot_latency_vs_throughput(args.input_csv_file,
|
||||
args.output_image,
|
||||
title=args.title)
|
||||
|
||||
|
||||
def plot_latency_vs_throughput(input_files, output_image, title=DEFAULT_TITLE):
|
||||
avg_latencies, throughput_rates = process_input_files(input_files, )
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
connections = sorted(avg_latencies.keys())
|
||||
for c in connections:
|
||||
tr = np.array(throughput_rates[c])
|
||||
al = np.array(avg_latencies[c])
|
||||
label = '%d connection%s' % (c, '' if c == 1 else 's')
|
||||
ax.plot(tr, al, 'o-', label=label)
|
||||
|
||||
ax.set_title(title)
|
||||
ax.set_xlabel('Throughput rate (tx/s)')
|
||||
ax.set_ylabel('Average transaction latency (s)')
|
||||
|
||||
plt.legend(loc='upper left')
|
||||
plt.savefig(output_image)
|
||||
|
||||
|
||||
def process_input_files(input_files):
|
||||
# Experimental data from which we will derive the latency vs throughput
|
||||
# statistics
|
||||
experiments = {}
|
||||
|
||||
for input_file in input_files:
|
||||
logging.info('Reading %s...' % input_file)
|
||||
|
||||
with open(input_file, 'rt') as inf:
|
||||
reader = csv.DictReader(inf)
|
||||
for tx in reader:
|
||||
experiments = process_tx(experiments, tx)
|
||||
|
||||
return compute_experiments_stats(experiments)
|
||||
|
||||
|
||||
def process_tx(experiments, tx):
|
||||
exp_id = tx['experiment_id']
|
||||
# Block time is nanoseconds from the epoch - convert to seconds
|
||||
block_time = float(tx['block_time']) / (10**9)
|
||||
# Duration is also in nanoseconds - convert to seconds
|
||||
duration = float(tx['duration_ns']) / (10**9)
|
||||
connections = int(tx['connections'])
|
||||
rate = int(tx['rate'])
|
||||
|
||||
if exp_id not in experiments:
|
||||
experiments[exp_id] = {
|
||||
'connections': connections,
|
||||
'rate': rate,
|
||||
'block_time_min': block_time,
|
||||
# We keep track of the latency associated with the minimum block
|
||||
# time to estimate the start time of the experiment
|
||||
'block_time_min_duration': duration,
|
||||
'block_time_max': block_time,
|
||||
'total_latencies': duration,
|
||||
'tx_count': 1,
|
||||
}
|
||||
logging.info('Found experiment %s with rate=%d, connections=%d' %
|
||||
(exp_id, rate, connections))
|
||||
else:
|
||||
# Validation
|
||||
for field in ['connections', 'rate']:
|
||||
val = int(tx[field])
|
||||
if val != experiments[exp_id][field]:
|
||||
raise Exception(
|
||||
'Found multiple distinct values for field '
|
||||
'"%s" for the same experiment (%s): %d and %d' %
|
||||
(field, exp_id, val, experiments[exp_id][field]))
|
||||
|
||||
if block_time < experiments[exp_id]['block_time_min']:
|
||||
experiments[exp_id]['block_time_min'] = block_time
|
||||
experiments[exp_id]['block_time_min_duration'] = duration
|
||||
if block_time > experiments[exp_id]['block_time_max']:
|
||||
experiments[exp_id]['block_time_max'] = block_time
|
||||
|
||||
experiments[exp_id]['total_latencies'] += duration
|
||||
experiments[exp_id]['tx_count'] += 1
|
||||
|
||||
return experiments
|
||||
|
||||
|
||||
def compute_experiments_stats(experiments):
|
||||
"""Compute average latency vs throughput rate statistics from the given
|
||||
experiments"""
|
||||
stats = {}
|
||||
|
||||
# Compute average latency and throughput rate for each experiment
|
||||
for exp_id, exp in experiments.items():
|
||||
conns = exp['connections']
|
||||
avg_latency = exp['total_latencies'] / exp['tx_count']
|
||||
exp_start_time = exp['block_time_min'] - exp['block_time_min_duration']
|
||||
exp_duration = exp['block_time_max'] - exp_start_time
|
||||
throughput_rate = exp['tx_count'] / exp_duration
|
||||
if conns not in stats:
|
||||
stats[conns] = []
|
||||
|
||||
stats[conns].append({
|
||||
'avg_latency': avg_latency,
|
||||
'throughput_rate': throughput_rate,
|
||||
})
|
||||
|
||||
# Sort stats for each number of connections in order of increasing
|
||||
# throughput rate, and then extract average latencies and throughput rates
|
||||
# as separate data series.
|
||||
conns = sorted(stats.keys())
|
||||
avg_latencies = {}
|
||||
throughput_rates = {}
|
||||
for c in conns:
|
||||
stats[c] = sorted(stats[c], key=lambda s: s['throughput_rate'])
|
||||
avg_latencies[c] = []
|
||||
throughput_rates[c] = []
|
||||
for s in stats[c]:
|
||||
avg_latencies[c].append(s['avg_latency'])
|
||||
throughput_rates[c].append(s['throughput_rate'])
|
||||
logging.info('For %d connection(s): '
|
||||
'throughput rate = %.6f tx/s\t'
|
||||
'average latency = %.6fs' %
|
||||
(c, s['throughput_rate'], s['avg_latency']))
|
||||
|
||||
return (avg_latencies, throughput_rates)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
scripts/qa/reporting/requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
contourpy==1.0.5
|
||||
cycler==0.11.0
|
||||
fonttools==4.37.4
|
||||
kiwisolver==1.4.4
|
||||
matplotlib==3.6.1
|
||||
numpy==1.23.4
|
||||
packaging==21.3
|
||||
Pillow==9.2.0
|
||||
pyparsing==3.0.9
|
||||
python-dateutil==2.8.2
|
||||
six==1.16.0
|
||||
@@ -46,7 +46,7 @@ and a list of evidence of malfeasance (ie. signing conflicting votes).
|
||||
|
||||
| Name | Type | Description | Validation |
|
||||
|--------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------|
|
||||
| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header] (#header) | Must adhere to the validation rules of [header](#header) |
|
||||
| Header | [Header](#header) | Header corresponding to the block. This field contains information used throughout consensus and other areas of the protocol. To find out what it contains, visit [header](#header) | Must adhere to the validation rules of [header](#header) |
|
||||
| Data | [Data](#data) | Data contains a list of transactions. The contents of the transaction is unknown to Tendermint. | This field can be empty or populated, but no validation is performed. Applications can perform validation on individual transactions prior to block creation using [checkTx](https://github.com/tendermint/tendermint/blob/main/spec/abci/abci++_methods.md#checktx).
|
||||
| Evidence | [EvidenceList](#evidencelist) | Evidence contains a list of infractions committed by validators. | Can be empty, but when populated the validations rules from [evidenceList](#evidencelist) apply |
|
||||
| LastCommit | [Commit](#commit) | `LastCommit` includes one vote for every validator. All votes must either be for the previous block, nil or absent. If a vote is for the previous block it must have a valid signature from the corresponding validator. The sum of the voting power of the validators that voted must be greater than 2/3 of the total voting power of the complete validator set. The number of votes in a commit is limited to 10000 (see `types.MaxVotesCount`). | Must be empty for the initial height and must adhere to the validation rules of [commit](#commit). |
|
||||
@@ -202,12 +202,12 @@ Commit is a simple wrapper for a list of signatures, with one for each validator
|
||||
a particular `BlockID` or was absent. It's a part of the `Commit` and can be used
|
||||
to reconstruct the vote set given the validator set.
|
||||
|
||||
| Name | Type | Description | Validation |
|
||||
|------------------|-----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: Either voted for the block that received the majority, voted for another block, voted nil or did not vote | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum |
|
||||
| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 |
|
||||
| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) |
|
||||
| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 |
|
||||
| Name | Type | Description | Validation |
|
||||
|------------------|-----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
|
||||
| BlockIDFlag | [BlockIDFlag](#blockidflag) | Represents the validators participation in consensus: its vote was not received, voted for the block that received the majority, or voted for nil | Must be one of the fields in the [BlockIDFlag](#blockidflag) enum |
|
||||
| ValidatorAddress | [Address](#address) | Address of the validator | Must be of length 20 |
|
||||
| Timestamp | [Time](#time) | This field will vary from `CommitSig` to `CommitSig`. It represents the timestamp of the validator. | [Time](#time) |
|
||||
| Signature | [Signature](#signature) | Signature corresponding to the validators participation in consensus. | The length of the signature must be > 0 and < than 64 |
|
||||
|
||||
NOTE: `ValidatorAddress` and `Timestamp` fields may be removed in the future
|
||||
(see [ADR-25](https://github.com/tendermint/tendermint/blob/main/docs/architecture/adr-025-commit.md)).
|
||||
@@ -218,10 +218,10 @@ BlockIDFlag represents which BlockID the [signature](#commitsig) is for.
|
||||
|
||||
```go
|
||||
enum BlockIDFlag {
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0;
|
||||
BLOCK_ID_FLAG_ABSENT = 1; // signatures for other blocks are also considered absent
|
||||
BLOCK_ID_FLAG_COMMIT = 2;
|
||||
BLOCK_ID_FLAG_NIL = 3;
|
||||
BLOCK_ID_FLAG_UNKNOWN = 0; // indicates an error condition
|
||||
BLOCK_ID_FLAG_ABSENT = 1; // the vote was not received
|
||||
BLOCK_ID_FLAG_COMMIT = 2; // voted for the block that received the majority
|
||||
BLOCK_ID_FLAG_NIL = 3; // voted for nil
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -16,6 +16,49 @@ const (
|
||||
chunkMsgSize = int(16e6)
|
||||
)
|
||||
|
||||
// mustEncodeMsg encodes a Protobuf message, panicing on error.
|
||||
func mustEncodeMsg(pb proto.Message) []byte {
|
||||
msg := ssproto.Message{}
|
||||
switch pb := pb.(type) {
|
||||
case *ssproto.ChunkRequest:
|
||||
msg.Sum = &ssproto.Message_ChunkRequest{ChunkRequest: pb}
|
||||
case *ssproto.ChunkResponse:
|
||||
msg.Sum = &ssproto.Message_ChunkResponse{ChunkResponse: pb}
|
||||
case *ssproto.SnapshotsRequest:
|
||||
msg.Sum = &ssproto.Message_SnapshotsRequest{SnapshotsRequest: pb}
|
||||
case *ssproto.SnapshotsResponse:
|
||||
msg.Sum = &ssproto.Message_SnapshotsResponse{SnapshotsResponse: pb}
|
||||
default:
|
||||
panic(fmt.Errorf("unknown message type %T", pb))
|
||||
}
|
||||
bz, err := msg.Marshal()
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unable to marshal %T: %w", pb, err))
|
||||
}
|
||||
return bz
|
||||
}
|
||||
|
||||
// decodeMsg decodes a Protobuf message.
|
||||
func decodeMsg(bz []byte) (proto.Message, error) {
|
||||
pb := &ssproto.Message{}
|
||||
err := proto.Unmarshal(bz, pb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch msg := pb.Sum.(type) {
|
||||
case *ssproto.Message_ChunkRequest:
|
||||
return msg.ChunkRequest, nil
|
||||
case *ssproto.Message_ChunkResponse:
|
||||
return msg.ChunkResponse, nil
|
||||
case *ssproto.Message_SnapshotsRequest:
|
||||
return msg.SnapshotsRequest, nil
|
||||
case *ssproto.Message_SnapshotsResponse:
|
||||
return msg.SnapshotsResponse, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown message type %T", msg)
|
||||
}
|
||||
}
|
||||
|
||||
// validateMsg validates a message.
|
||||
func validateMsg(pb proto.Message) error {
|
||||
if pb == nil {
|
||||
|
||||