mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-22 12:42:49 +00:00
Compare commits
32 Commits
wb/no-tx
...
wb/panic-r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2fbb304237 | ||
|
|
c3805078ec | ||
|
|
78ef22f49d | ||
|
|
e712ec4137 | ||
|
|
b96e4b6738 | ||
|
|
b0423e2445 | ||
|
|
b0684bd300 | ||
|
|
382947ce93 | ||
|
|
9a7ce08e3e | ||
|
|
55f6d20977 | ||
|
|
b9c35c1263 | ||
|
|
f08f72e334 | ||
|
|
e932b469ed | ||
|
|
5db2a39643 | ||
|
|
6909158933 | ||
|
|
de2cffe7a4 | ||
|
|
c257cda212 | ||
|
|
5a49d1b997 | ||
|
|
e4feb56813 | ||
|
|
abbe8209b5 | ||
|
|
723bf92ebb | ||
|
|
ef79241f79 | ||
|
|
3bf0c7a712 | ||
|
|
055f1b3279 | ||
|
|
1998cf7e77 | ||
|
|
c3bcf9b180 | ||
|
|
f1b9613301 | ||
|
|
5d279c93db | ||
|
|
af71f1cbcb | ||
|
|
1a9bad9dd3 | ||
|
|
db690c3b68 | ||
|
|
0c3601bcac |
25
.github/codecov.yml
vendored
Normal file
25
.github/codecov.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
coverage:
|
||||
precision: 2
|
||||
round: down
|
||||
range: "70...100"
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
threshold: 20%
|
||||
patch: off
|
||||
changes: off
|
||||
|
||||
github_checks:
|
||||
annotations: false
|
||||
|
||||
comment: false
|
||||
|
||||
ignore:
|
||||
- "docs"
|
||||
- "DOCKER"
|
||||
- "scripts"
|
||||
- "**/*.pb.go"
|
||||
- "libs/pubsub/query/query.peg.go"
|
||||
- "*.md"
|
||||
- "*.rst"
|
||||
- "*.yml"
|
||||
10
.github/workflows/coverage.yml
vendored
10
.github/workflows/coverage.yml
vendored
@@ -2,6 +2,9 @@ name: Test Coverage
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
paths:
|
||||
- "**.go"
|
||||
- "!test/"
|
||||
branches:
|
||||
- master
|
||||
- release/**
|
||||
@@ -50,6 +53,7 @@ jobs:
|
||||
with:
|
||||
PATTERNS: |
|
||||
**/**.go
|
||||
"!test/"
|
||||
go.mod
|
||||
go.sum
|
||||
- name: install
|
||||
@@ -72,6 +76,7 @@ jobs:
|
||||
with:
|
||||
PATTERNS: |
|
||||
**/**.go
|
||||
"!test/"
|
||||
go.mod
|
||||
go.sum
|
||||
- uses: actions/download-artifact@v2
|
||||
@@ -100,6 +105,7 @@ jobs:
|
||||
with:
|
||||
PATTERNS: |
|
||||
**/**.go
|
||||
"!test/"
|
||||
go.mod
|
||||
go.sum
|
||||
- uses: actions/download-artifact@v2
|
||||
@@ -121,3 +127,7 @@ jobs:
|
||||
- run: |
|
||||
cat ./*profile.out | grep -v "mode: atomic" >> coverage.txt
|
||||
if: env.GIT_DIFF
|
||||
- uses: codecov/codecov-action@v2.1.0
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
if: env.GIT_DIFF
|
||||
|
||||
2
.github/workflows/e2e-nightly-master.yml
vendored
2
.github/workflows/e2e-nightly-master.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Build
|
||||
working-directory: test/e2e
|
||||
# Run make jobs in parallel, since we can't run steps in parallel.
|
||||
run: make -j2 docker generator runner
|
||||
run: make -j2 docker generator runner tests
|
||||
|
||||
- name: Generate testnets
|
||||
working-directory: test/e2e
|
||||
|
||||
2
.github/workflows/e2e.yml
vendored
2
.github/workflows/e2e.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
- name: Build
|
||||
working-directory: test/e2e
|
||||
# Run two make jobs in parallel, since we can't run steps in parallel.
|
||||
run: make -j2 docker runner
|
||||
run: make -j2 docker runner tests
|
||||
if: "env.GIT_DIFF != ''"
|
||||
|
||||
- name: Run CI testnet
|
||||
|
||||
13
CHANGELOG.md
13
CHANGELOG.md
@@ -156,6 +156,19 @@ Special thanks to external contributors on this release: @JayT106, @bipulprasad,
|
||||
- [statesync] \#6463 Adds Reverse Sync feature to fetch historical light blocks after state sync in order to verify any evidence (@cmwaters)
|
||||
- [blocksync] \#6590 Update the metrics during blocksync (@JayT106)
|
||||
|
||||
## v0.34.13
|
||||
|
||||
*September 6, 2021*
|
||||
|
||||
This release backports improvements to state synchronization and ABCI
|
||||
performance under concurrent load, and the PostgreSQL event indexer.
|
||||
|
||||
### IMPROVEMENTS
|
||||
|
||||
- [statesync] [\#6881](https://github.com/tendermint/tendermint/issues/6881) improvements to stateprovider logic (@cmwaters)
|
||||
- [ABCI] [\#6873](https://github.com/tendermint/tendermint/issues/6873) change client to use multi-reader mutexes (@tychoish)
|
||||
- [indexing] [\#6906](https://github.com/tendermint/tendermint/issues/6906) enable the PostgreSQL indexer sink (@creachadair)
|
||||
|
||||
## v0.34.12
|
||||
|
||||
*August 17, 2021*
|
||||
|
||||
@@ -39,5 +39,8 @@ sections.
|
||||
|
||||
- [RFC-000: P2P Roadmap](./rfc-000-p2p-roadmap.rst)
|
||||
- [RFC-001: Storage Engines](./rfc-001-storage-engine.rst)
|
||||
- [RFC-002: Interprocess Communication](./rfc-002-ipc-ecosystem.md)
|
||||
- [RFC-003: Performance Taxonomy](./rfc-003-performance-questions.md)
|
||||
- [RFC-004: E2E Test Framework Enhancements](./rfc-004-e2e-framework.md)
|
||||
|
||||
<!-- - [RFC-NNN: Title](./rfc-NNN-title.md) -->
|
||||
|
||||
420
docs/rfc/rfc-002-ipc-ecosystem.md
Normal file
420
docs/rfc/rfc-002-ipc-ecosystem.md
Normal file
@@ -0,0 +1,420 @@
|
||||
# RFC 002: Interprocess Communication (IPC) in Tendermint
|
||||
|
||||
## Changelog
|
||||
|
||||
- 08-Sep-2021: Initial draft (@creachadair).
|
||||
|
||||
|
||||
## Abstract
|
||||
|
||||
Communication in Tendermint among consensus nodes, applications, and operator
|
||||
tools all use different message formats and transport mechanisms. In some
|
||||
cases there are multiple options. Having all these options complicates both the
|
||||
code and the developer experience, and hides bugs. To support a more robust,
|
||||
trustworthy, and usable system, we should document which communication paths
|
||||
are essential, which could be removed or reduced in scope, and what we can
|
||||
improve for the most important use cases.
|
||||
|
||||
This document proposes a variety of possible improvements of varying size and
|
||||
scope. Specific design proposals should get their own documentation.
|
||||
|
||||
|
||||
## Background
|
||||
|
||||
The Tendermint state replication engine has a complex IPC footprint.
|
||||
|
||||
1. Consensus nodes communicate with each other using a networked peer-to-peer
|
||||
message-passing protocol.
|
||||
|
||||
2. Consensus nodes communicate with the application whose state is being
|
||||
replicated via the [Application BlockChain Interface (ABCI)][abci].
|
||||
|
||||
3. Consensus nodes export a network-accessible [RPC service][rpc-service] to
|
||||
support operations (bootstrapping, debugging) and synchronization of [light clients][light-client].
|
||||
This interface is also used by the [`tendermint` CLI][tm-cli].
|
||||
|
||||
4. Consensus nodes export a gRPC service exposing a subset of the methods of
|
||||
the RPC service described by (3). This was intended to simplify the
|
||||
implementation of tools that already use gRPC to communicate with an
|
||||
application (via the Cosmos SDK), and wanted to also talk to the consensus
|
||||
node without implementing yet another RPC protocol.
|
||||
|
||||
The gRPC interface to the consensus node has been deprecated and is slated
|
||||
for removal in the forthcoming Tendermint v0.36 release.
|
||||
|
||||
5. Consensus nodes may optionally communicate with a "remote signer" that holds
|
||||
a validator key and can provide public keys and signatures to the consensus
|
||||
node. One of the stated goals of this configuration is to allow the signer
|
||||
to be run on a private network, separate from the consensus node, so that a
|
||||
compromise of the consensus node from the public network would be less
|
||||
likely to expose validator keys.
|
||||
|
||||
## Discussion: Transport Mechanisms
|
||||
|
||||
### Remote Signer Transport
|
||||
|
||||
A remote signer communicates with the consensus node in one of two ways:
|
||||
|
||||
1. "Raw": Using a TCP or Unix-domain socket which carries varint-prefixed
|
||||
protocol buffer messages. In this mode, the consensus node is the server,
|
||||
and the remote signer is the client.
|
||||
|
||||
This mode has been deprecated, and is intended to be removed.
|
||||
|
||||
2. gRPC: This mode uses the same protobuf messages as "Raw" node, but uses a
|
||||
standard encrypted gRPC HTTP/2 stub as the transport. In this mode, the
|
||||
remote signer is the server and the consensus node is the client.
|
||||
|
||||
|
||||
### ABCI Transport
|
||||
|
||||
In ABCI, the _application_ is the server, and the Tendermint consensus engine
|
||||
is the client. Most applications implement the server using the [Cosmos SDK][cosmos-sdk],
|
||||
which handles low-level details of the ABCI interaction and provides a
|
||||
higher-level interface to the rest of the application. The SDK is written in Go.
|
||||
|
||||
Beneath the SDK, the application communicates with Tendermint core in one of
|
||||
two ways:
|
||||
|
||||
- In-process direct calls (for applications written in Go and compiled against
|
||||
the Tendermint code). This is an optimization for the common case where an
|
||||
application is written in Go, to save on the overhead of marshaling and
|
||||
unmarshaling requests and responses within the same process:
|
||||
[`abci/client/local_client.go`][local-client]
|
||||
|
||||
- A custom remote procedure protocol built on wire-format protobuf messages
|
||||
using a socket (the "socket protocol"): [`abci/server/socket_server.go`][socket-server]
|
||||
|
||||
The SDK also provides a [gRPC service][sdk-grpc] accessible from outside the
|
||||
application, allowing transactions to be broadcast to the network, look up
|
||||
transactions, and simulate transaction costs.
|
||||
|
||||
|
||||
### RPC Transport
|
||||
|
||||
The consensus node RPC service allows callers to query consensus parameters
|
||||
(genesis data, transactions, commits), node status (network info, health
|
||||
checks), application state (abci_query, abci_info), mempool state, and other
|
||||
attributes of the node and its application. The service also provides methods
|
||||
allowing transactions and evidence to be injected ("broadcast") into the
|
||||
blockchain.
|
||||
|
||||
The RPC service is exposed in several ways:
|
||||
|
||||
- HTTP GET: Queries may be sent as URI parameters, with method names in the path.
|
||||
|
||||
- HTTP POST: Queries may be sent as JSON-RPC request messages in the body of an
|
||||
HTTP POST request. The server uses a custom implementation of JSON-RPC that
|
||||
is not fully compatible with the [JSON-RPC 2.0 spec][json-rpc], but handles
|
||||
the common cases.
|
||||
|
||||
- Websocket: Queries may be sent as JSON-RPC request messages via a websocket.
|
||||
This transport uses more or less the same JSON-RPC plumbing as the HTTP POST
|
||||
handler.
|
||||
|
||||
The websocket endpoint also includes three methods that are _only_ exported
|
||||
via websocket, which appear to support event subscription.
|
||||
|
||||
- gRPC: A subset of queries may be issued in protocol buffer format to the gRPC
|
||||
interface described above under (4). As noted, this endpoint is deprecated
|
||||
and will be removed in v0.36.
|
||||
|
||||
### Opportunities for Simplification
|
||||
|
||||
**Claim:** There are too many IPC mechanisms.
|
||||
|
||||
The preponderance of ABCI usage is via the Cosmos SDK, which means the
|
||||
application and the consensus node are compiled together into a single binary,
|
||||
and the consensus node calls the ABCI methods of the application directly as Go
|
||||
functions.
|
||||
|
||||
We also need a true IPC transport to support ABCI applications _not_ written in
|
||||
Go. There are also several known applications written in Rust, for example
|
||||
(including [Anoma](https://github.com/anoma/anoma), Penumbra,
|
||||
[Oasis](https://github.com/oasisprotocol/oasis-core), Twilight, and
|
||||
[Nomic](https://github.com/nomic-io/nomic)). Ideally we will have at most one
|
||||
such transport "built-in": More esoteric cases can be handled by a custom proxy.
|
||||
Pragmatically, gRPC is probably the right choice here.
|
||||
|
||||
The primary consumers of the multi-headed "RPC service" today are the light
|
||||
client and the `tendermint` command-line client. There is probably some local
|
||||
use via curl, but I expect that is mostly ad hoc. Ethan reports that nodes are
|
||||
often configured with the ports to the RPC service blocked, which is good for
|
||||
security but complicates use by the light client.
|
||||
|
||||
### Context: Remote Signer Issues
|
||||
|
||||
Since the remote signer needs a secure communication channel to exchange keys
|
||||
and signatures, and is expected to run truly remotely from the node (i.e., on a
|
||||
separate physical server), there is not a whole lot we can do here. We should
|
||||
finish the deprecation and removal of the "raw" socket protocol between the
|
||||
consensus node and remote signers, but the use of gRPC is appropriate.
|
||||
|
||||
The main improvement we can make is to simplify the implementation quite a bit,
|
||||
once we no longer need to support both "raw" and gRPC transports.
|
||||
|
||||
### Context: ABCI Issues
|
||||
|
||||
In the original design of ABCI, the presumption was that all access to the
|
||||
application should be mediated by the consensus node. The idea is that outside
|
||||
access could change application state and corrupt the consensus process, which
|
||||
relies on the application to be deterministic. Of course, even without outside
|
||||
access an application could behave nondeterministically, but allowing other
|
||||
programs to send it requests was seen as courting trouble.
|
||||
|
||||
Conversely, users noted that most of the time, tools written for a particular
|
||||
application don't want to talk to the consensus module directly. The
|
||||
application "owns" the state machine the consensus engine is replicating, so
|
||||
tools that care about application state should talk to the application.
|
||||
Otherwise, they would have to bake in knowledge about Tendermint (e.g., its
|
||||
interfaces and data structures) just because of the mediation.
|
||||
|
||||
For clients to talk directly to the application, however, there is another
|
||||
concern: The consensus node is the ABCI _client_, so it is inconvenient for the
|
||||
application to "push" work into the consensus module via ABCI itself. The
|
||||
current implementation works around this by calling the consensus node's RPC
|
||||
service, which exposes an `ABCIQuery` kitchen-sink method that allows the
|
||||
application a way to poke ABCI messages in the other direction.
|
||||
|
||||
Without this RPC method, you could work around this (at least in principle) by
|
||||
having the consensus module "poll" the application for work that needs done,
|
||||
but that has unsatisfactory implications for performance and robustness, as
|
||||
well as being harder to understand.
|
||||
|
||||
There has apparently been discussion about trying to make a more bidirectional
|
||||
communication between the consensus node and the application, but this issue
|
||||
seems to still be unresolved.
|
||||
|
||||
Another complication of ABCI is that it requires the application (server) to
|
||||
maintain [four separate connections][abci-conn]: One for "consensus" operations
|
||||
(BeginBlock, EndBlock, DeliverTx, Commit), one for "mempool" operations, one
|
||||
for "query" operations, and one for "snapshot" (state synchronization) operations.
|
||||
The rationale seems to have been that these groups of operations should be able
|
||||
to proceed concurrently with each other. In practice, it results in a very complex
|
||||
state management problem to coordinate state updates between the separate streams.
|
||||
While application authors in Go are mostly insulated from that complexity by the
|
||||
Cosmos SDK, the plumbing to maintain those separate streams is complicated, hard
|
||||
to understand, and we suspect it contains concurrency bugs and/or lock contention
|
||||
issues affecting performance that are subtle and difficult to pin down.
|
||||
|
||||
Even without changing the semantics of any ABCI operations, this code could be
|
||||
made smaller and easier to debug by separating the management of concurrency
|
||||
and locking from the IPC transport: If all requests and responses are routed
|
||||
through one connection, the server can explicitly maintain priority queues for
|
||||
requests and responses, and make less-conservative decisions about when locks
|
||||
are (or aren't) required to synchronize state access. With independent queues,
|
||||
the server must lock conservatively, and no optimistic scheduling is practical.
|
||||
|
||||
This would be a tedious implementation change, but should be achievable without
|
||||
breaking any of the existing interfaces. More importantly, it could potentially
|
||||
address a lot of difficult concurrency and performance problems we currently
|
||||
see anecdotally but have difficultly isolating because of how intertwined these
|
||||
separate message streams are at runtime.
|
||||
|
||||
TODO: Impact of ABCI++ for this topic?
|
||||
|
||||
### Context: RPC Issues
|
||||
|
||||
The RPC system serves several masters, and has a complex surface area. I
|
||||
believe there are some improvements that can be exposed by separating some of
|
||||
these concerns.
|
||||
|
||||
The Tendermint light client currently uses the RPC service to look up blocks
|
||||
and transactions, and to forward ABCI queries to the application. The light
|
||||
client proxy uses the RPC service via a websocket. The Cosmos IBC relayer also
|
||||
uses the RPC service via websocket to watch for transaction events, and uses
|
||||
the `ABCIQuery` method to fetch information and proofs for posted transactions.
|
||||
|
||||
Some work is already underway toward using P2P message passing rather than RPC
|
||||
to synchronize light client state with the rest of the network. IBC relaying,
|
||||
however, requires access to the event system, which is currently not accessible
|
||||
except via the RPC interface. Event subscription _could_ be exposed via P2P,
|
||||
but that is a larger project since it adds P2P communication load, and might
|
||||
thus have an impact on the performance of consensus.
|
||||
|
||||
If event subscription can be moved into the P2P network, we could entirely
|
||||
remove the websocket transport, even for clients that still need access to the
|
||||
RPC service. Until then, we may still be able to reduce the scope of the
|
||||
websocket endpoint to _only_ event subscription, by moving uses of the RPC
|
||||
server as a proxy to ABCI over to the gRPC interface.
|
||||
|
||||
Having the RPC server still makes sense for local bootstrapping and operations,
|
||||
but can be further simplified. Here are some specific proposals:
|
||||
|
||||
- Remove the HTTP GET interface entirely.
|
||||
|
||||
- Simplify JSON-RPC plumbing to remove unnecessary reflection and wrapping.
|
||||
|
||||
- Remove the gRPC interface (this is already planned for v0.36).
|
||||
|
||||
- Separate the websocket interface from the rest of the RPC service, and
|
||||
restrict it to only event subscription.
|
||||
|
||||
Eventually we should try to emove the websocket interface entirely, but we
|
||||
will need to revisit that (probably in a new RFC) once we've done some of the
|
||||
easier things.
|
||||
|
||||
These changes would preserve the ability of operators to issue queries with
|
||||
curl (but would require using JSON-RPC instead of URI parameters). That would
|
||||
be a little less user-friendly, but for a use case that should not be that
|
||||
prevalent.
|
||||
|
||||
These changes would also preserve compatibility with existing JSON-RPC based
|
||||
code paths like the `tendermint` CLI and the light client (even ahead of
|
||||
further work to remove that dependency).
|
||||
|
||||
**Design goal:** An operator should be able to disable non-local access to the
|
||||
RPC server on any node in the network without impairing the ability of the
|
||||
network to function for service of state replication, including light clients.
|
||||
|
||||
**Design principle:** All communication required to implement and monitor the
|
||||
consensus network should use P2P, including the various synchronizations.
|
||||
|
||||
### Options for ABCI Transport
|
||||
|
||||
The majority of current usage is in Go, and the majority of that is mediated by
|
||||
the Cosmos SDK, which uses the "direct call" interface. There is probably some
|
||||
opportunity to clean up the implementation of that code, notably by inverting
|
||||
which interface is at the "top" of the abstraction stack (currently it acts
|
||||
like an RPC interface, and escape-hatches into the direct call). However, this
|
||||
general approach works fine and doesn't need to be fundamentally changed.
|
||||
|
||||
For applications _not_ written in Go, the two remaining options are the
|
||||
"socket" protocol (another variation on varint-prefixed protobuf messages over
|
||||
an unstructured stream) and gRPC. It would be nice if we could get rid of one
|
||||
of these to reduce (unneeded?) optionality.
|
||||
|
||||
Since both the socket protocol and gRPC depend on protocol buffers, the
|
||||
"socket" protocol is the most obvious choice to remove. While gRPC is more
|
||||
complex, the set of languages that _have_ protobuf support but _lack_ gRPC
|
||||
support is small. Moreover, gRPC is already widely used in the rest of the
|
||||
ecosystem (including the Cosmos SDK).
|
||||
|
||||
If some use case did arise later that can't work with gRPC, it would not be too
|
||||
difficult for that application author to write a little proxy (in Go) that
|
||||
bridges the convenient SDK APIs into a simpler protocol than gRPC.
|
||||
|
||||
**Design principle:** It is better for an uncommon special case to carry the
|
||||
burdens of its specialness, than to bake an escape hatch into the infrastructure.
|
||||
|
||||
**Recommendation:** We should deprecate and remove the socket protocol.
|
||||
|
||||
### Options for RPC Transport
|
||||
|
||||
[ADR 057][adr-57] proposes using gRPC for the Tendermint RPC implementation.
|
||||
This is still possible, but if we are able to simplify and decouple the
|
||||
concerns as described above, I do not think it should be necessary.
|
||||
|
||||
While JSON-RPC is not the best possible RPC protocol for all situations, it has
|
||||
some advantages over gRPC for our domain. Specifically:
|
||||
|
||||
- It is easy to call JSON-RPC manually from the command-line, which helps with
|
||||
a common concern for the RPC service, local debugging and operations.
|
||||
|
||||
Relatedly: JSON is relatively easy for humans to read and write, and it can
|
||||
be easily copied and pasted to share sample queries and debugging results in
|
||||
chat, issue comments, and so on. Ideally, the RPC service will not be used
|
||||
for activities where the costs of a text protocol are important compared to
|
||||
its legibility and manual usability benefits.
|
||||
|
||||
- gRPC has an enormous dependency footprint for both clients and servers, and
|
||||
many of the features it provides to support security and performance
|
||||
(encryption, compression, streaming, etc.) are mostly irrelevant to local
|
||||
use. Tendermint already needs to include a gRPC client for the remote signer,
|
||||
but if we can avoid the need for a _client_ to depend on gRPC, that is a win
|
||||
for usability.
|
||||
|
||||
- If we intend to migrate light clients off RPC to use P2P entirely, there is
|
||||
no advantage to forcing a temporary migration to gRPC along the way; and once
|
||||
the light client is not dependent on the RPC service, the efficiency of the
|
||||
protocol is much less important.
|
||||
|
||||
- We can still get the benefits of generated data types using protocol buffers, even
|
||||
without using gRPC:
|
||||
|
||||
- Protobuf defines a standard JSON encoding for all message types so
|
||||
languages with protobuf support do not need to worry about type mapping
|
||||
oddities.
|
||||
|
||||
- Using JSON means that even languages _without_ good protobuf support can
|
||||
implement the protocol with a bit more work, and I expect this situation to
|
||||
be rare.
|
||||
|
||||
Even if a language lacks a good standard JSON-RPC mechanism, the protocol is
|
||||
lightweight and can be implemented by simple send/receive over TCP or
|
||||
Unix-domain sockets with no need for code generation, encryption, etc. gRPC
|
||||
uses a complex HTTP/2 based transport that is not easily replicated.
|
||||
|
||||
### Future Work
|
||||
|
||||
The background and proposals sketched above focus on the existing structure of
|
||||
Tendermint and improvements we can make in the short term. It is worthwhile to
|
||||
also consider options for longer-term broader changes to the IPC ecosystem.
|
||||
The following outlines some ideas at a high level:
|
||||
|
||||
- **Consensus service:** Today, the application and the consensus node are
|
||||
nominally connected only via ABCI. Tendermint was originally designed with
|
||||
the assumption that all communication with the application should be mediated
|
||||
by the consensus node. Based on further experience, however, the design goal
|
||||
is now that the _application_ should be the mediator of application state.
|
||||
|
||||
As noted above, however, ABCI is a client/server protocol, with the
|
||||
application as the server. For outside clients that turns out to have been a
|
||||
good choice, but it complicates the relationship between the application and
|
||||
the consensus node: Previously transactions were entered via the node, now
|
||||
they are entered via the app.
|
||||
|
||||
We have worked around this by using the Tendermint RPC service to give the
|
||||
application a "back channel" to the consensus node, so that it can push
|
||||
transactions back into the consensus network. But the RPC service exposes a
|
||||
lot of other functionality, too, including event subscription, block and
|
||||
transaction queries, and a lot of node status information.
|
||||
|
||||
Even if we can't easily "fix" the orientation of the ABCI relationship, we
|
||||
could improve isolation by splitting out the parts of the RPC service that
|
||||
the application needs as a back-channel, and sharing those _only_ with the
|
||||
application. By defining a "consensus service", we could give the application
|
||||
a way to talk back limited to only the capabilities it needs. This approach
|
||||
has the benefit that we could do it without breaking existing use, and if we
|
||||
later did "fix" the ABCI directionality, we could drop the special case
|
||||
without disrupting the rest of the RPC interface.
|
||||
|
||||
- **Event service:** Right now, the IBC relayer relies on the Tendermint RPC
|
||||
service to provide a stream of block and transaction events, which it uses to
|
||||
discover which transactions need relaying to other chains. While I think
|
||||
that event subscription should eventually be handled via P2P, we could gain
|
||||
some immediate benefit by splitting out event subscription from the rest of
|
||||
the RPC service.
|
||||
|
||||
In this model, an event subscription service would be exposed on the public
|
||||
network, but on a different endpoint. This would remove the need for the RPC
|
||||
service to support the websocket protocol, and would allow operators to
|
||||
isolate potentially sensitive status query results from the public network.
|
||||
|
||||
At the moment the relayers also use the RPC service to get block data for
|
||||
synchronization, but work is already in progress to handle that concern via
|
||||
the P2P layer. Once that's done, event subscription could be separated.
|
||||
|
||||
Separating parts of the existing RPC service is not without cost: It might
|
||||
require additional connection endpoints, for example, though it is also not too
|
||||
difficult for multiple otherwise-independent services to share a connection.
|
||||
|
||||
In return, though, it would become easier to reduce transport options and for
|
||||
operators to independently control access to sensitive data. Considering the
|
||||
viability and implications of these ideas is beyond the scope of this RFC, but
|
||||
they are documented here since they follow from the background we have already
|
||||
discussed.
|
||||
|
||||
## References
|
||||
|
||||
[abci]: https://github.com/tendermint/spec/tree/95cf253b6df623066ff7cd4074a94e7a3f147c7a/spec/abci
|
||||
[rpc-service]: https://docs.tendermint.com/master/rpc/
|
||||
[light-client]: https://docs.tendermint.com/master/tendermint-core/light-client.html
|
||||
[tm-cli]: https://github.com/tendermint/tendermint/tree/master/cmd/tendermint
|
||||
[cosmos-sdk]: https://github.com/cosmos/cosmos-sdk/
|
||||
[local-client]: https://github.com/tendermint/tendermint/blob/master/abci/client/local_client.go
|
||||
[socket-server]: https://github.com/tendermint/tendermint/blob/master/abci/server/socket_server.go
|
||||
[sdk-grpc]: https://pkg.go.dev/github.com/cosmos/cosmos-sdk/types/tx#ServiceServer
|
||||
[json-rpc]: https://www.jsonrpc.org/specification
|
||||
[abci-conn]: https://github.com/tendermint/spec/blob/master/spec/abci/apps.md#state
|
||||
[adr-57]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-057-RPC.md
|
||||
283
docs/rfc/rfc-003-performance-questions.md
Normal file
283
docs/rfc/rfc-003-performance-questions.md
Normal file
@@ -0,0 +1,283 @@
|
||||
# RFC 003: Taxonomy of potential performance issues in Tendermint
|
||||
|
||||
## Changelog
|
||||
|
||||
- 2021-09-02: Created initial draft (@wbanfield)
|
||||
- 2021-09-14: Add discussion of the event system (@wbanfield)
|
||||
|
||||
## Abstract
|
||||
|
||||
This document discusses the various sources of performance issues in Tendermint and
|
||||
attempts to clarify what work may be required to understand and address them.
|
||||
|
||||
## Background
|
||||
|
||||
Performance, loosely defined as the ability of a software process to perform its work
|
||||
quickly and efficiently under load and within reasonable resource limits, is a frequent
|
||||
topic of discussion in the Tendermint project.
|
||||
To effectively address any issues with Tendermint performance we need to
|
||||
categorize the various issues, understand their potential sources, and gauge their
|
||||
impact on users.
|
||||
|
||||
Categorizing the different known performance issues will allow us to discuss and fix them
|
||||
more systematically. This document proposes a rough taxonomy of performance issues
|
||||
and highlights areas where more research into potential performance problems is required.
|
||||
|
||||
Understanding Tendermint's performance limitations will also be critically important
|
||||
as we make changes to many of its subsystems. Performance is a central concern for
|
||||
upcoming decisions regarding the `p2p` protocol, RPC message encoding and structure,
|
||||
database usage and selection, and consensus protocol updates.
|
||||
|
||||
|
||||
## Discussion
|
||||
|
||||
This section attempts to delineate the different sections of Tendermint functionality
|
||||
that are often cited as having performance issues. It raises questions and suggests
|
||||
lines of inquiry that may be valuable for better understanding Tendermint's performance issues.
|
||||
|
||||
As a note: We should avoid quickly adding many microbenchmarks or package level benchmarks.
|
||||
These are prone to being worse than useless as they can obscure what _should_ be
|
||||
focused on: performance of the system from the perspective of a user. We should,
|
||||
instead, tune performance with an eye towards user needs and actions users make. These users comprise
|
||||
both operators of Tendermint chains and the people generating transactions for
|
||||
Tendermint chains. Both of these sets of users are largely aligned in wanting an end-to-end
|
||||
system that operates quickly and efficiently.
|
||||
|
||||
REQUEST: The list below may be incomplete, if there are additional sections that are often
|
||||
cited as creating poor performance, please comment so that they may be included.
|
||||
|
||||
### P2P
|
||||
|
||||
#### Claim: Tendermint cannot scale to large numbers of nodes
|
||||
|
||||
A complaint has been reported that Tendermint networks cannot scale to large numbers of nodes.
|
||||
The listed number of nodes a user reported as causing issue was in the thousands.
|
||||
We don't currently have evidence about what the upper-limit of nodes that Tendermint's
|
||||
P2P stack can scale to.
|
||||
|
||||
We need to more concretely understand the source of issues and determine what layer
|
||||
is causing a problem. It's possible that the P2P layer, in the absence of any reactors
|
||||
sending data, is perfectly capable of managing thousands of peer connections. For
|
||||
a reasonable networking and application setup, thousands of connections should not present any
|
||||
issue for the application.
|
||||
|
||||
We need more data to understand the problem directly. We want to drive the popularity
|
||||
and adoption of Tendermint and this will mean allowing for chains with more validators.
|
||||
We should follow up with users experiencing this issue. We may then want to add
|
||||
a series of metrics to the P2P layer to better understand the inefficiencies it produces.
|
||||
|
||||
The following metrics can help us understand the sources of latency in the Tendermint P2P stack:
|
||||
|
||||
* Number of messages sent and received per second
|
||||
* Time of a message spent on the P2P layer send and receive queues
|
||||
|
||||
The following metrics exist and should be leveraged in addition to those added:
|
||||
|
||||
* Number of peers node's connected to
|
||||
* Number of bytes per channel sent and received from each peer
|
||||
|
||||
### Sync
|
||||
|
||||
#### Claim: Block Syncing is slow
|
||||
|
||||
Bootstrapping a new node in a network to the height of the rest of the network is believed to
|
||||
take longer than users would like. Block sync requires fetching all of the blocks from
|
||||
peers and placing them into the local disk for storage. A useful line of inquiry
|
||||
is understanding how quickly a perfectly tuned system _could_ fetch all of the state
|
||||
over a network so that we understand how much overhead Tendermint actually adds.
|
||||
|
||||
The operation is likely to be _incredibly_ dependent on the environment in which
|
||||
the node is being run. The factors that will influence syncing include:
|
||||
1. Number of peers that a syncing node may fetch from.
|
||||
2. Speed of the disk that a validator is writing to.
|
||||
3. Speed of the network connection between the different peers that node is
|
||||
syncing from.
|
||||
|
||||
We should calculate how quickly this operation _could possibly_ complete for common chains and nodes.
|
||||
To calculate how quickly this operation could possibly complete, we should assume that
|
||||
a node is reading at line-rate of the NIC and writing at the full drive speed to its
|
||||
local storage. Comparing this theoretical upper-limit to the actual sync times
|
||||
observed by node operators will give us a good point of comparison for understanding
|
||||
how much overhead Tendermint incurs.
|
||||
|
||||
We should additionally add metrics to the blocksync operation to more clearly pinpoint
|
||||
slow operations. The following metrics should be added to the block syncing operation:
|
||||
|
||||
* Time to fetch and validate each block
|
||||
* Time to execute a block
|
||||
* Blocks sync'd per unit time
|
||||
|
||||
### Application
|
||||
|
||||
Applications performing complex state transitions have the potential to bottleneck
|
||||
the Tendermint node.
|
||||
|
||||
#### Claim: ABCI block delivery could cause slowdown
|
||||
|
||||
ABCI delivers blocks in several methods: `BeginBlock`, `DeliverTx`, `EndBlock`, `Commit`.
|
||||
|
||||
Tendermint delivers transactions one-by-one via the `DeliverTx` call. Most of the
|
||||
transaction delivery in Tendermint occurs asynchronously and therefore appears unlikely to
|
||||
form a bottleneck in ABCI.
|
||||
|
||||
After delivering all transactions, Tendermint then calls the `Commit` ABCI method.
|
||||
Tendermint [locks all access to the mempool][abci-commit-description] while `Commit`
|
||||
proceeds. This means that an application that is slow to execute all of its
|
||||
transactions or finalize state during the `Commit` method will prevent any new
|
||||
transactions from being added to the mempool. Apps that are slow to commit will
|
||||
prevent consensus from proceeded to the next consensus height since Tendermint
|
||||
cannot validate block proposals or produce block proposals without the
|
||||
AppHash obtained from the `Commit` method. We should add a metric for each
|
||||
step in the ABCI protocol to track the amount of time that a node spends communicating
|
||||
with the application at each step.
|
||||
|
||||
#### Claim: ABCI serialization overhead causes slowdown
|
||||
|
||||
The most common way to run a Tendermint application is using the Cosmos-SDK.
|
||||
The Cosmos-SDK runs the ABCI application within the same process as Tendermint.
|
||||
When an application is run in the same process as Tendermint, a serialization penalty
|
||||
is not paid. This is because the local ABCI client does not serialize method calls
|
||||
and instead passes the protobuf type through directly. This can be seen
|
||||
in [local_client.go][abci-local-client-code].
|
||||
|
||||
Serialization and deserialization in the gRPC and socket protocol ABCI methods
|
||||
may cause slowdown. While these may cause issue, they are not part of the primary
|
||||
usecase of Tendermint and do not necessarily need to be addressed at this time.
|
||||
|
||||
### RPC
|
||||
|
||||
#### Claim: The Query API is slow.
|
||||
|
||||
The query API locks a mutex across the ABCI connections. This causes consensus to
|
||||
slow during queries, as ABCI is no longer able to make progress. This is known
|
||||
to be causing issue in the cosmos-sdk and is being addressed [in the sdk][sdk-query-fix]
|
||||
but a more robust solution may be required. Adding metrics to each ABCI client connection
|
||||
and message as described in the Application section of this document would allow us
|
||||
to further introspect the issue here.
|
||||
|
||||
#### Claim: RPC Serialization may cause slowdown
|
||||
|
||||
The Tendermint RPC uses a modified version of JSON-RPC. This RPC powers the `broadcast_tx_*` methods,
|
||||
which is a critical method for adding transactions to Tendermint at the moment. This method is
|
||||
likely invoked quite frequently on popular networks. Being able to perform efficiently
|
||||
on this common and critical operation is very important. The current JSON-RPC implementation
|
||||
relies heavily on type introspection via reflection, which is known to be very slow in
|
||||
Go. We should therefore produce benchmarks of this method to determine how much overhead
|
||||
we are adding to what, is likely to be, a very common operation.
|
||||
|
||||
The other JSON-RPC methods are much less critical to the core functionality of Tendermint.
|
||||
While there may other points of performance consideration within the RPC, methods that do not
|
||||
receive high volumes of requests should not be prioritized for performance consideration.
|
||||
|
||||
NOTE: Previous discussion of the RPC framework was done in [ADR 57][adr-57] and
|
||||
there is ongoing work to inspect and alter the JSON-RPC framework in [RFC 002][rfc-002].
|
||||
Much of these RPC-related performance considerations can either wait until the work of RFC 002 work is done or be
|
||||
considered concordantly with the in-flight changes to the JSON-RPC.
|
||||
|
||||
### Protocol
|
||||
|
||||
#### Claim: Gossiping messages is a slow process
|
||||
|
||||
Currently, for any validator to successfully vote in a consensus _step_, it must
|
||||
receive votes from greater than 2/3 of the validators on the network. In many cases,
|
||||
it's preferable to receive as many votes as possible from correct validators.
|
||||
|
||||
This produces a quadratic increase in messages that are communicated as more validators join the network.
|
||||
(Each of the N validators must communicate with all other N-1 validators).
|
||||
|
||||
This large number of messages communicated per step has been identified to impact
|
||||
performance of the protocol. Given that the number of messages communicated has been
|
||||
identified as a bottleneck, it would be extremely valuable to gather data on how long
|
||||
it takes for popular chains with many validators to gather all votes within a step.
|
||||
|
||||
Metrics that would improve visibility into this include:
|
||||
|
||||
* Amount of time for a node to gather votes in a step.
|
||||
* Amount of time for a node to gather all block parts.
|
||||
* Number of votes each node sends to gossip (i.e. not its own votes, but votes it is
|
||||
transmitting for a peer).
|
||||
* Total number of votes each node sends to receives (A node may receive duplicate votes
|
||||
so understanding how frequently this occurs will be valuable in evaluating the performance
|
||||
of the gossip system).
|
||||
|
||||
#### Claim: Hashing Txs causes slowdown in Tendermint
|
||||
|
||||
Using a faster hash algorithm for Tx hashes is currently a point of discussion
|
||||
in Tendermint. Namely, it is being considered as part of the [modular hashing proposal][modular-hashing].
|
||||
It is currently unknown if hashing transactions in the Mempool forms a significant bottleneck.
|
||||
Although it does not appear to be documented as slow, there are a few open github
|
||||
issues that indicate a possible user preference for a faster hashing algorithm,
|
||||
including [issue 2187][issue-2187] and [issue 2186][issue-2186].
|
||||
|
||||
It is likely worth investigating what order of magnitude Tx hashing takes in comparison to other
|
||||
aspects of adding a Tx to the mempool. It is not currently clear if the rate of adding Tx
|
||||
to the mempool is a source of user pain. We should not endeavor to make large changes to
|
||||
consensus critical components without first being certain that the change is highly
|
||||
valuable and impactful.
|
||||
|
||||
### Digital Signatures
|
||||
|
||||
#### Claim: Verification of digital signatures may cause slowdown in Tendermint
|
||||
|
||||
Working with cryptographic signatures can be computationally expensive. The cosmos
|
||||
hub uses [ed25519 signatures][hub-signature]. The library performing signature
|
||||
verification in Tendermint on votes is [benchmarked][ed25519-bench] to be able to perform an `ed25519`
|
||||
signature in 75μs on a decently fast CPU. A validator in the Cosmos Hub performs
|
||||
3 sets of verifications on the signatures of the 140 validators in the Hub
|
||||
in a consensus round, during block verification, when verifying the prevotes, and
|
||||
when verifying the precommits. With no batching, this would be roughly `3ms` per
|
||||
round. It is quite unlikely, therefore, that this accounts for any serious amount
|
||||
of the ~7 seconds of block time per height in the Hub.
|
||||
|
||||
This may cause slowdown when syncing, since the process needs to constantly verify
|
||||
signatures. It's possible that improved signature aggregation will lead to improved
|
||||
light client or other syncing performance. In general, a metric should be added
|
||||
to track block rate while blocksyncing.
|
||||
|
||||
#### Claim: Our use of digital signatures in the consensus protocol contributes to performance issue
|
||||
|
||||
Currently, Tendermint's digital signature verification requires that all validators
|
||||
receive all vote messages. Each validator must receive the complete digital signature
|
||||
along with the vote message that it corresponds to. This means that all N validators
|
||||
must receive messages from at least 2/3 of the N validators in each consensus
|
||||
round. Given the potential for oddly shaped network topologies and the expected
|
||||
variable network roundtrip times of a few hundred milliseconds in a blockchain,
|
||||
it is highly likely that this amount of gossiping is leading to a significant amount
|
||||
of the slowdown in the Cosmos Hub and in Tendermint consensus.
|
||||
|
||||
### Tendermint Event System
|
||||
|
||||
#### Claim: The event system is a bottleneck in Tendermint
|
||||
|
||||
The Tendermint Event system is used to communicate and store information about
|
||||
internal Tendermint execution. The system uses channels internally to send messages
|
||||
to different subscribers. Sending an event [blocks on the internal channel][event-send].
|
||||
The default configuration is to [use an unbuffered channel for event publishes][event-buffer-capacity].
|
||||
Several consumers of the event system also use an unbuffered channel for reads.
|
||||
An example of this is the [event indexer][event-indexer-unbuffered], which takes an
|
||||
unbuffered subscription to the event system. The result is that these unbuffered readers
|
||||
can cause writes to the event system to block or slow down depending on contention in the
|
||||
event system. This has implications for the consensus system, which [publishes events][consensus-event-send].
|
||||
To better understand the performance of the event system, we should add metrics to track the timing of
|
||||
event sends. The following metrics would be a good start for tracking this performance:
|
||||
|
||||
* Time in event send, labeled by Event Type
|
||||
* Time in event receive, labeled by subscriber
|
||||
* Event throughput, measured in events per unit time.
|
||||
|
||||
### References
|
||||
[modular-hashing]: https://github.com/tendermint/tendermint/pull/6773
|
||||
[issue-2186]: https://github.com/tendermint/tendermint/issues/2186
|
||||
[issue-2187]: https://github.com/tendermint/tendermint/issues/2187
|
||||
[rfc-002]: https://github.com/tendermint/tendermint/pull/6913
|
||||
[adr-57]: https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-057-RPC.md
|
||||
[issue-1319]: https://github.com/tendermint/tendermint/issues/1319
|
||||
[abci-commit-description]: https://github.com/tendermint/spec/blob/master/spec/abci/apps.md#commit
|
||||
[abci-local-client-code]: https://github.com/tendermint/tendermint/blob/511bd3eb7f037855a793a27ff4c53c12f085b570/abci/client/local_client.go#L84
|
||||
[hub-signature]: https://github.com/cosmos/gaia/blob/0ecb6ed8a244d835807f1ced49217d54a9ca2070/docs/resources/genesis.md#consensus-parameters
|
||||
[ed25519-bench]: https://github.com/oasisprotocol/curve25519-voi/blob/d2e7fc59fe38c18ca990c84c4186cba2cc45b1f9/PERFORMANCE.md
|
||||
[event-send]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/libs/pubsub/pubsub.go#L338
|
||||
[event-buffer-capacity]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/types/event_bus.go#L14
|
||||
[event-indexer-unbuffered]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/state/indexer/indexer_service.go#L39
|
||||
[consensus-event-send]: https://github.com/tendermint/tendermint/blob/5bd3b286a2b715737f6d6c33051b69061d38f8ef/internal/consensus/state.go#L1573
|
||||
[sdk-query-fix]: https://github.com/cosmos/cosmos-sdk/pull/10045
|
||||
213
docs/rfc/rfc-004-e2e-framework.rst
Normal file
213
docs/rfc/rfc-004-e2e-framework.rst
Normal file
@@ -0,0 +1,213 @@
|
||||
========================================
|
||||
RFC 004: E2E Test Framework Enhancements
|
||||
========================================
|
||||
|
||||
Changelog
|
||||
---------
|
||||
|
||||
- 2021-09-14: started initial draft (@tychoish)
|
||||
|
||||
Abstract
|
||||
--------
|
||||
|
||||
This document discusses a series of improvements to the e2e test framework
|
||||
that we can consider during the next few releases to help boost confidence in
|
||||
Tendermint releases, and improve developer efficiency.
|
||||
|
||||
Background
|
||||
----------
|
||||
|
||||
During the 0.35 release cycle, the E2E tests were a source of great
|
||||
value, helping to identify a number of bugs before release. At the same time,
|
||||
the tests were not consistently passing during this time, thereby reducing
|
||||
their value, and forcing the core development team to allocate time and energy
|
||||
to maintaining and chasing down issues with the e2e tests and the test
|
||||
harness. The experience of this release cycle calls to mind a series of
|
||||
improvements to the test framework, and this document attempts to capture
|
||||
these improvements, along with motivations, and potential for impact.
|
||||
|
||||
Projects
|
||||
--------
|
||||
|
||||
Flexible Workload Generation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Presently the e2e suite contains a single workload generation pattern, which
|
||||
exists simply to ensure that the test networks have some work during their
|
||||
runs. However, the shape and volume of the work is very consistent and is very
|
||||
gentle to help ensure test reliability.
|
||||
|
||||
We don't need a complex workload generation framework, but being able to have
|
||||
a few different workload shapes available for test networks, both generated and
|
||||
hand-crafted, would be useful.
|
||||
|
||||
Workload patterns/configurations might include:
|
||||
|
||||
- transaction targeting patterns (include light nodes, round robin, target
|
||||
individual nodes)
|
||||
|
||||
- variable transaction size over time.
|
||||
|
||||
- transaction broadcast option (synchronously, checked, fire-and-forget,
|
||||
mixed).
|
||||
|
||||
- number of transactions to submit.
|
||||
|
||||
- non-transaction workloads: (evidence submission, query, event subscription.)
|
||||
|
||||
Configurable Generator
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The nightly e2e suite is defined by the `testnet generator
|
||||
<https://github.com/tendermint/tendermint/blob/master/test/e2e/generator/generate.go#L13-L65>`_,
|
||||
and it's difficult to add dimensions or change the focus of the test suite in
|
||||
any way without modifying the implementation of the generator. If the
|
||||
generator were more configurable, potentially via a file rather than in
|
||||
the Go implementation, we could modify the focus of the test suite on the
|
||||
fly.
|
||||
|
||||
Features that we might want to configure:
|
||||
|
||||
- number of test networks to generate of various topologies, to improve
|
||||
coverage of different configurations.
|
||||
|
||||
- test application configurations (to modify the latency of ABCI calls, etc.)
|
||||
|
||||
- size of test networks.
|
||||
|
||||
- workload shape and behavior.
|
||||
|
||||
- initial sync and catch-up configurations.
|
||||
|
||||
The workload generator currently provides runtime options for limiting the
|
||||
generator to specific types of P2P stacks, and for generating multiple groups
|
||||
of test cases to support parallelism. The goal is to extend this pattern and
|
||||
avoid hardcoding the matrix of test cases in the generator code. Once the
|
||||
testnet configuration generation behavior is configurable at runtime,
|
||||
developers may be able to use the e2e framework to validate changes before
|
||||
landing changes that break e2e tests a day later.
|
||||
|
||||
In addition to the autogenerated suite, it might make sense to maintain a
|
||||
small collection of hand-crafted cases that exercise configurations of
|
||||
concern, to run as part of the nightly (or less frequent) loop.
|
||||
|
||||
Implementation Plan Structure
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
As a development team, we should determine the features should impact the e2e
|
||||
testing early in the development cycle, and if we intend to modify the e2e
|
||||
tests to exercise a feature, we should identify this early and begin the
|
||||
integration process as early as possible.
|
||||
|
||||
To facilitate this, we should adopt a practice whereby we exercise specific
|
||||
features that are currently under development more rigorously in the e2e
|
||||
suite, and then as development stabilizes we can reduce the number or weight
|
||||
of these features in the suite.
|
||||
|
||||
As of 0.35 there are essentially two end to end tests: the suite of 64
|
||||
generated test networks, and the hand crafted `ci.toml` test case. The
|
||||
generated test cases help provide systemtic coverage, while the `ci` run
|
||||
provides coverage for a large number of features.
|
||||
|
||||
Reduce Cycle Time
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
One of the barriers to leveraging the e2e framework, and one of the challenges
|
||||
in debugging failures, is the cycle time of running a single test iteration is
|
||||
quite high: 5 minutes to build the docker image, plus the time to run the test
|
||||
or tests.
|
||||
|
||||
There are a number of improvements and enhancements that can reduce the cycle
|
||||
time in practice:
|
||||
|
||||
- reduce the amount of time required to build the docker image used in these
|
||||
tests. Without the dependency on CGo, the tendermint binaries could be
|
||||
(cross) compiled outside of the docker container and then injected into
|
||||
them, which would take better advantage of docker's native caching,
|
||||
although, without the dependency on CGo there would be no hard requirement
|
||||
for the e2e tests to use docker.
|
||||
|
||||
- support test parallelism. Because of the way the testnets are orchestrated
|
||||
a single system can really only run one network at a time. For executions
|
||||
(local or remote) with more resources, there's no reason to run a few
|
||||
networks in parallel to reduce the feedback time.
|
||||
|
||||
- prune testnet configurations that are unlikely to provide good signal, to
|
||||
shorten the time to feedback.
|
||||
|
||||
- apply some kind of tiered approach to test execution, to improve the
|
||||
legibility of the test result. For example order tests by the dependency of
|
||||
their features, or run test networks without perturbations before running
|
||||
that configuration with perturbations, to be able to isolate the impact of
|
||||
specific features.
|
||||
|
||||
- orchestrate the test harness directly from go test rather than via a special
|
||||
harness and shell scripts so e2e tests may more naively fit into developers
|
||||
existing workflows.
|
||||
|
||||
Many of these improvements, particularly, reducing the build time will also
|
||||
reduce the time to get feedback during automated builds.
|
||||
|
||||
Deeper Insights
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
When a test network fails, it's incredibly difficult to understand _why_ the
|
||||
network failed, as the current system provides very little insight into the
|
||||
system outside of the process logs. When a test network stalls or fails
|
||||
developers should be able to quickly and easily get a sense of the state of
|
||||
the network and all nodes.
|
||||
|
||||
Improvements in persuit of this goal, include functionality that would help
|
||||
node operators in production environments by improving the quality and utility
|
||||
of the logging messages and other reported metrics, but also provide some
|
||||
tools to collect and aggregate this data for developers in the context of test
|
||||
networks.
|
||||
|
||||
- Interleave messages from all nodes in the network to be able to correlate
|
||||
events during the test run.
|
||||
|
||||
- Collect structured metrics of the system operation (CPU/MEM/IO) during the
|
||||
test run, as well as from each tendermint/application process.
|
||||
|
||||
- Build (simple) tools to be able to render and summarize the data collected
|
||||
during the test run to answer basic questions about test outcome.
|
||||
|
||||
Flexible Assertions
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Currently, all assertions run for every test network, which makes the
|
||||
assertions pretty bland, and the framework primarily useful as a smoke-test
|
||||
framework, but it might be useful to be able to write and run different
|
||||
tests for different configurations. This could allow us to test outside of the
|
||||
happy-path.
|
||||
|
||||
In general our existing assertions occupy a fraction of the total test time,
|
||||
so the relative cost of adding a few extra test assertions would be of limited
|
||||
cost, and could help build confidence.
|
||||
|
||||
Additional Kinds of Testing
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The existing e2e suite, exercises networks of nodes that have homogeneous
|
||||
tendermint version, stable configuration, that are expected to make
|
||||
progress. There are many other possible test configurations that may be
|
||||
interesting to engage with. These could include dimensions, such as:
|
||||
|
||||
- Multi-version testing to exercise our compatibility guarantees for networks
|
||||
that might have different tendermint versions.
|
||||
|
||||
- As a flavor or mult-version testing, include upgrade testing, to build
|
||||
confidence in migration code and procedures.
|
||||
|
||||
- Additional test applications, particularly practical-type applciations
|
||||
including some that use gaiad and/or the cosmos-sdk. Test-only applications
|
||||
that simulate other kinds of applications (e.g. variable application
|
||||
operation latency.)
|
||||
|
||||
- Tests of "non-viable" configurations that ensure that forbidden combinations
|
||||
lead to halts.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
- `ADR 66: End-to-End Testing <../architecture/adr-66-e2e-testing.md>`_
|
||||
@@ -185,51 +185,65 @@ the argument name and use `_` as a placeholder.
|
||||
|
||||
### Formatting
|
||||
|
||||
The following nuances when sending/formatting transactions should be
|
||||
taken into account:
|
||||
When sending transactions to the RPC interface, the following formatting rules
|
||||
must be followed:
|
||||
|
||||
With `GET`:
|
||||
Using `GET` (with parameters in the URL):
|
||||
|
||||
To send a UTF8 string byte array, quote the value of the tx parameter:
|
||||
To send a UTF8 string as transaction data, enclose the value of the `tx`
|
||||
parameter in double quotes:
|
||||
|
||||
```sh
|
||||
curl 'http://localhost:26657/broadcast_tx_commit?tx="hello"'
|
||||
```
|
||||
|
||||
which sends a 5 byte transaction: "h e l l o" \[68 65 6c 6c 6f\].
|
||||
which sends a 5-byte transaction: "h e l l o" \[68 65 6c 6c 6f\].
|
||||
|
||||
Note the URL must be wrapped with single quotes, else bash will ignore
|
||||
the double quotes. To avoid the single quotes, escape the double quotes:
|
||||
Note that the URL in this example is enclosed in single quotes to prevent the
|
||||
shell from interpreting the double quotes. Alternatively, you may escape the
|
||||
double quotes with backslashes:
|
||||
|
||||
```sh
|
||||
curl http://localhost:26657/broadcast_tx_commit?tx=\"hello\"
|
||||
```
|
||||
|
||||
Using a special character:
|
||||
The double-quoted format works with for multibyte characters, as long as they
|
||||
are valid UTF8, for example:
|
||||
|
||||
```sh
|
||||
curl 'http://localhost:26657/broadcast_tx_commit?tx="€5"'
|
||||
```
|
||||
|
||||
sends a 4 byte transaction: "€5" (UTF8) \[e2 82 ac 35\].
|
||||
sends a 4-byte transaction: "€5" (UTF8) \[e2 82 ac 35\].
|
||||
|
||||
To send as raw hex, omit quotes AND prefix the hex string with `0x`:
|
||||
Arbitrary (non-UTF8) transaction data may also be encoded as a string of
|
||||
hexadecimal digits (2 digits per byte). To do this, omit the quotation marks
|
||||
and prefix the hex string with `0x`:
|
||||
|
||||
```sh
|
||||
curl http://localhost:26657/broadcast_tx_commit?tx=0x01020304
|
||||
curl http://localhost:26657/broadcast_tx_commit?tx=0x68656C6C6F
|
||||
```
|
||||
|
||||
which sends a 4 byte transaction: \[01 02 03 04\].
|
||||
which sends the 5-byte transaction: \[68 65 6c 6c 6f\].
|
||||
|
||||
With `POST` (using `json`), the raw hex must be `base64` encoded:
|
||||
Using `POST` (with parameters in JSON), the transaction data are sent as a JSON
|
||||
string in base64 encoding:
|
||||
|
||||
```sh
|
||||
curl --data-binary '{"jsonrpc":"2.0","id":"anything","method":"broadcast_tx_commit","params": {"tx": "AQIDBA=="}}' -H 'content-type:text/plain;' http://localhost:26657
|
||||
curl http://localhost:26657 -H 'Content-Type: application/json' --data-binary '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "anything",
|
||||
"method": "broadcast_tx_commit",
|
||||
"params": {
|
||||
"tx": "aGVsbG8="
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
which sends the same 4 byte transaction: \[01 02 03 04\].
|
||||
which sends the same 5-byte transaction: \[68 65 6c 6c 6f\].
|
||||
|
||||
Note that raw hex cannot be used in `POST` transactions.
|
||||
Note that the hexadecimal encoding of transaction data is _not_ supported in
|
||||
JSON (`POST`) requests.
|
||||
|
||||
## Reset
|
||||
|
||||
|
||||
5
go.mod
5
go.mod
@@ -27,15 +27,14 @@ require (
|
||||
github.com/prometheus/client_golang v1.11.0
|
||||
github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0
|
||||
github.com/rs/cors v1.8.0
|
||||
github.com/rs/zerolog v1.24.0
|
||||
github.com/rs/zerolog v1.25.0
|
||||
github.com/sasha-s/go-deadlock v0.2.1-0.20190427202633-1595213edefa
|
||||
github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa
|
||||
github.com/spf13/cobra v1.2.1
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/spf13/viper v1.8.1
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/tendermint/tm-db v0.6.4
|
||||
github.com/vektra/mockery/v2 v2.9.0
|
||||
github.com/vektra/mockery/v2 v2.9.3
|
||||
golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a
|
||||
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
|
||||
|
||||
8
go.sum
8
go.sum
@@ -767,8 +767,8 @@ github.com/rs/cors v1.8.0/go.mod h1:EBwu+T5AvHOcXwvZIkQFjUN6s8Czyqw12GL/Y0tUyRM=
|
||||
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
|
||||
github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.18.0/go.mod h1:9nvC1axdVrAHcu/s9taAVfBuIdTZLVQmKQyvrUjF5+I=
|
||||
github.com/rs/zerolog v1.24.0 h1:76ivFxmVSRs1u2wUwJVg5VZDYQgeH1JpoS6ndgr9Wy8=
|
||||
github.com/rs/zerolog v1.24.0/go.mod h1:7KHcEGe0QZPOm2IE4Kpb5rTh6n1h2hIgS5OOnu1rUaI=
|
||||
github.com/rs/zerolog v1.25.0 h1:Rj7XygbUHKUlDPcVdoLyR91fJBsduXj5fRxyqIQj/II=
|
||||
github.com/rs/zerolog v1.25.0/go.mod h1:7KHcEGe0QZPOm2IE4Kpb5rTh6n1h2hIgS5OOnu1rUaI=
|
||||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/ryancurrah/gomodguard v1.2.3 h1:ww2fsjqocGCAFamzvv/b8IsRduuHHeK2MHTcTxZTQX8=
|
||||
@@ -895,8 +895,8 @@ github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyC
|
||||
github.com/valyala/fasthttp v1.16.0/go.mod h1:YOKImeEosDdBPnxc0gy7INqi3m1zK6A+xl6TwOBhHCA=
|
||||
github.com/valyala/quicktemplate v1.6.3/go.mod h1:fwPzK2fHuYEODzJ9pkw0ipCPNHZ2tD5KW4lOuSdPKzY=
|
||||
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
|
||||
github.com/vektra/mockery/v2 v2.9.0 h1:+3FhCL3EviR779mTzXwUuhPNnqFUA7sDnt9OFkXaFd4=
|
||||
github.com/vektra/mockery/v2 v2.9.0/go.mod h1:2gU4Cf/f8YyC8oEaSXfCnZBMxMjMl/Ko205rlP0fO90=
|
||||
github.com/vektra/mockery/v2 v2.9.3 h1:ma6hcGQw4q/lhFUTJ+E9V8/5tsIcht9i2Q4d1qo26SQ=
|
||||
github.com/vektra/mockery/v2 v2.9.3/go.mod h1:2gU4Cf/f8YyC8oEaSXfCnZBMxMjMl/Ko205rlP0fO90=
|
||||
github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE=
|
||||
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
|
||||
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
|
||||
|
||||
@@ -52,7 +52,7 @@ func TestByzantinePrevoteEquivocation(t *testing.T) {
|
||||
thisConfig := ResetConfig(fmt.Sprintf("%s_%d", testName, i))
|
||||
defer os.RemoveAll(thisConfig.RootDir)
|
||||
|
||||
ensureDir(path.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
ensureDir(t, path.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
app := appFunc()
|
||||
vals := types.TM2PB.ValidatorUpdates(state.Validators)
|
||||
app.InitChain(abci.RequestInitChain{Validators: vals})
|
||||
|
||||
@@ -69,9 +69,10 @@ func configSetup(t *testing.T) *cfg.Config {
|
||||
return config
|
||||
}
|
||||
|
||||
func ensureDir(dir string, mode os.FileMode) {
|
||||
func ensureDir(t *testing.T, dir string, mode os.FileMode) {
|
||||
t.Helper()
|
||||
if err := tmos.EnsureDir(dir, mode); err != nil {
|
||||
panic(err)
|
||||
t.Fatalf("error opening directory: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,18 +222,20 @@ func startTestRound(cs *State, height int64, round int32) {
|
||||
|
||||
// Create proposal block from cs1 but sign it with vs.
|
||||
func decideProposal(
|
||||
t *testing.T,
|
||||
cs1 *State,
|
||||
vs *validatorStub,
|
||||
height int64,
|
||||
round int32,
|
||||
) (proposal *types.Proposal, block *types.Block) {
|
||||
t.Helper()
|
||||
cs1.mtx.Lock()
|
||||
block, blockParts := cs1.createProposalBlock()
|
||||
validRound := cs1.ValidRound
|
||||
chainID := cs1.state.ChainID
|
||||
cs1.mtx.Unlock()
|
||||
if block == nil {
|
||||
panic("Failed to createProposalBlock. Did you forget to add commit for previous block?")
|
||||
t.Fatal("Failed to createProposalBlock. Did you forget to add commit for previous block?")
|
||||
}
|
||||
|
||||
// Make proposal
|
||||
@@ -240,7 +243,7 @@ func decideProposal(
|
||||
proposal = types.NewProposal(height, round, polRound, propBlockID)
|
||||
p := proposal.ToProto()
|
||||
if err := vs.SignProposal(context.Background(), chainID, p); err != nil {
|
||||
panic(err)
|
||||
t.Fatalf("error signing proposal: %s", err)
|
||||
}
|
||||
|
||||
proposal.Signature = p.Signature
|
||||
@@ -267,36 +270,38 @@ func signAddVotes(
|
||||
}
|
||||
|
||||
func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStub, blockHash []byte) {
|
||||
t.Helper()
|
||||
prevotes := cs.Votes.Prevotes(round)
|
||||
pubKey, err := privVal.GetPubKey(context.Background())
|
||||
require.NoError(t, err)
|
||||
address := pubKey.Address()
|
||||
var vote *types.Vote
|
||||
if vote = prevotes.GetByAddress(address); vote == nil {
|
||||
panic("Failed to find prevote from validator")
|
||||
t.Fatalf("Failed to find prevote from validator")
|
||||
}
|
||||
if blockHash == nil {
|
||||
if vote.BlockID.Hash != nil {
|
||||
panic(fmt.Sprintf("Expected prevote to be for nil, got %X", vote.BlockID.Hash))
|
||||
t.Fatalf("Expected prevote to be for nil, got %X", vote.BlockID.Hash)
|
||||
}
|
||||
} else {
|
||||
if !bytes.Equal(vote.BlockID.Hash, blockHash) {
|
||||
panic(fmt.Sprintf("Expected prevote to be for %X, got %X", blockHash, vote.BlockID.Hash))
|
||||
t.Fatalf("Expected prevote to be for %X, got %X", blockHash, vote.BlockID.Hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func validateLastPrecommit(t *testing.T, cs *State, privVal *validatorStub, blockHash []byte) {
|
||||
t.Helper()
|
||||
votes := cs.LastCommit
|
||||
pv, err := privVal.GetPubKey(context.Background())
|
||||
require.NoError(t, err)
|
||||
address := pv.Address()
|
||||
var vote *types.Vote
|
||||
if vote = votes.GetByAddress(address); vote == nil {
|
||||
panic("Failed to find precommit from validator")
|
||||
t.Fatalf("Failed to find precommit from validator")
|
||||
}
|
||||
if !bytes.Equal(vote.BlockID.Hash, blockHash) {
|
||||
panic(fmt.Sprintf("Expected precommit to be for %X, got %X", blockHash, vote.BlockID.Hash))
|
||||
t.Fatalf("Expected precommit to be for %X, got %X", blockHash, vote.BlockID.Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -309,41 +314,42 @@ func validatePrecommit(
|
||||
votedBlockHash,
|
||||
lockedBlockHash []byte,
|
||||
) {
|
||||
t.Helper()
|
||||
precommits := cs.Votes.Precommits(thisRound)
|
||||
pv, err := privVal.GetPubKey(context.Background())
|
||||
require.NoError(t, err)
|
||||
address := pv.Address()
|
||||
var vote *types.Vote
|
||||
if vote = precommits.GetByAddress(address); vote == nil {
|
||||
panic("Failed to find precommit from validator")
|
||||
t.Fatalf("Failed to find precommit from validator")
|
||||
}
|
||||
|
||||
if votedBlockHash == nil {
|
||||
if vote.BlockID.Hash != nil {
|
||||
panic("Expected precommit to be for nil")
|
||||
t.Fatalf("Expected precommit to be for nil")
|
||||
}
|
||||
} else {
|
||||
if !bytes.Equal(vote.BlockID.Hash, votedBlockHash) {
|
||||
panic("Expected precommit to be for proposal block")
|
||||
t.Fatalf("Expected precommit to be for proposal block")
|
||||
}
|
||||
}
|
||||
|
||||
if lockedBlockHash == nil {
|
||||
if cs.LockedRound != lockRound || cs.LockedBlock != nil {
|
||||
panic(fmt.Sprintf(
|
||||
t.Fatalf(
|
||||
"Expected to be locked on nil at round %d. Got locked at round %d with block %v",
|
||||
lockRound,
|
||||
cs.LockedRound,
|
||||
cs.LockedBlock))
|
||||
cs.LockedBlock)
|
||||
}
|
||||
} else {
|
||||
if cs.LockedRound != lockRound || !bytes.Equal(cs.LockedBlock.Hash(), lockedBlockHash) {
|
||||
panic(fmt.Sprintf(
|
||||
t.Fatalf(
|
||||
"Expected block to be locked on round %d, got %d. Got locked block %X, expected %X",
|
||||
lockRound,
|
||||
cs.LockedRound,
|
||||
cs.LockedBlock.Hash(),
|
||||
lockedBlockHash))
|
||||
lockedBlockHash)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -357,6 +363,7 @@ func validatePrevoteAndPrecommit(
|
||||
votedBlockHash,
|
||||
lockedBlockHash []byte,
|
||||
) {
|
||||
t.Helper()
|
||||
// verify the prevote
|
||||
validatePrevote(t, cs, thisRound, privVal, votedBlockHash)
|
||||
// verify precommit
|
||||
@@ -444,13 +451,14 @@ func newStateWithConfigAndBlockStore(
|
||||
return cs
|
||||
}
|
||||
|
||||
func loadPrivValidator(config *cfg.Config) *privval.FilePV {
|
||||
func loadPrivValidator(t *testing.T, config *cfg.Config) *privval.FilePV {
|
||||
t.Helper()
|
||||
privValidatorKeyFile := config.PrivValidator.KeyFile()
|
||||
ensureDir(filepath.Dir(privValidatorKeyFile), 0700)
|
||||
ensureDir(t, filepath.Dir(privValidatorKeyFile), 0700)
|
||||
privValidatorStateFile := config.PrivValidator.StateFile()
|
||||
privValidator, err := privval.LoadOrGenFilePV(privValidatorKeyFile, privValidatorStateFile)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
t.Fatalf("error generating validator file: %s", err)
|
||||
}
|
||||
privValidator.Reset()
|
||||
return privValidator
|
||||
@@ -475,220 +483,241 @@ func randState(config *cfg.Config, nValidators int) (*State, []*validatorStub) {
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
|
||||
func ensureNoNewEvent(ch <-chan tmpubsub.Message, timeout time.Duration,
|
||||
func ensureNoNewEvent(t *testing.T, ch <-chan tmpubsub.Message, timeout time.Duration,
|
||||
errorMessage string) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(timeout):
|
||||
break
|
||||
case <-ch:
|
||||
panic(errorMessage)
|
||||
t.Fatalf("unexpected event: %s", errorMessage)
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNoNewEventOnChannel(ch <-chan tmpubsub.Message) {
|
||||
func ensureNoNewEventOnChannel(t *testing.T, ch <-chan tmpubsub.Message) {
|
||||
t.Helper()
|
||||
ensureNoNewEvent(
|
||||
t,
|
||||
ch,
|
||||
ensureTimeout,
|
||||
"We should be stuck waiting, not receiving new event on the channel")
|
||||
}
|
||||
|
||||
func ensureNoNewRoundStep(stepCh <-chan tmpubsub.Message) {
|
||||
func ensureNoNewRoundStep(t *testing.T, stepCh <-chan tmpubsub.Message) {
|
||||
t.Helper()
|
||||
ensureNoNewEvent(
|
||||
t,
|
||||
stepCh,
|
||||
ensureTimeout,
|
||||
"We should be stuck waiting, not receiving NewRoundStep event")
|
||||
}
|
||||
|
||||
func ensureNoNewUnlock(unlockCh <-chan tmpubsub.Message) {
|
||||
func ensureNoNewUnlock(t *testing.T, unlockCh <-chan tmpubsub.Message) {
|
||||
t.Helper()
|
||||
ensureNoNewEvent(
|
||||
t,
|
||||
unlockCh,
|
||||
ensureTimeout,
|
||||
"We should be stuck waiting, not receiving Unlock event")
|
||||
}
|
||||
|
||||
func ensureNoNewTimeout(stepCh <-chan tmpubsub.Message, timeout int64) {
|
||||
func ensureNoNewTimeout(t *testing.T, stepCh <-chan tmpubsub.Message, timeout int64) {
|
||||
t.Helper()
|
||||
timeoutDuration := time.Duration(timeout*10) * time.Nanosecond
|
||||
ensureNoNewEvent(
|
||||
t,
|
||||
stepCh,
|
||||
timeoutDuration,
|
||||
"We should be stuck waiting, not receiving NewTimeout event")
|
||||
}
|
||||
|
||||
func ensureNewEvent(ch <-chan tmpubsub.Message, height int64, round int32, timeout time.Duration, errorMessage string) {
|
||||
func ensureNewEvent(t *testing.T, ch <-chan tmpubsub.Message, height int64, round int32, timeout time.Duration, errorMessage string) { //nolint: lll
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(timeout):
|
||||
panic(errorMessage)
|
||||
t.Fatalf("timed out waiting for new event: %s", errorMessage)
|
||||
case msg := <-ch:
|
||||
roundStateEvent, ok := msg.Data().(types.EventDataRoundState)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataRoundState, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataRoundState, got %T. Wrong subscription channel?", msg.Data())
|
||||
}
|
||||
if roundStateEvent.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, roundStateEvent.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, roundStateEvent.Height)
|
||||
}
|
||||
if roundStateEvent.Round != round {
|
||||
panic(fmt.Sprintf("expected round %v, got %v", round, roundStateEvent.Round))
|
||||
t.Fatalf("expected round %v, got %v", round, roundStateEvent.Round)
|
||||
}
|
||||
// TODO: We could check also for a step at this point!
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNewRound(roundCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
func ensureNewRound(t *testing.T, roundCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for NewRound event")
|
||||
t.Fatal("Timeout expired while waiting for NewRound event")
|
||||
case msg := <-roundCh:
|
||||
newRoundEvent, ok := msg.Data().(types.EventDataNewRound)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataNewRound, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataNewRound, got %T. Wrong subscription channel?", msg.Data())
|
||||
}
|
||||
if newRoundEvent.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, newRoundEvent.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, newRoundEvent.Height)
|
||||
}
|
||||
if newRoundEvent.Round != round {
|
||||
panic(fmt.Sprintf("expected round %v, got %v", round, newRoundEvent.Round))
|
||||
t.Fatalf("expected round %v, got %v", round, newRoundEvent.Round)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNewTimeout(timeoutCh <-chan tmpubsub.Message, height int64, round int32, timeout int64) {
|
||||
func ensureNewTimeout(t *testing.T, timeoutCh <-chan tmpubsub.Message, height int64, round int32, timeout int64) {
|
||||
t.Helper()
|
||||
timeoutDuration := time.Duration(timeout*10) * time.Nanosecond
|
||||
ensureNewEvent(timeoutCh, height, round, timeoutDuration,
|
||||
ensureNewEvent(t, timeoutCh, height, round, timeoutDuration,
|
||||
"Timeout expired while waiting for NewTimeout event")
|
||||
}
|
||||
|
||||
func ensureNewProposal(proposalCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
func ensureNewProposal(t *testing.T, proposalCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for NewProposal event")
|
||||
t.Fatalf("Timeout expired while waiting for NewProposal event")
|
||||
case msg := <-proposalCh:
|
||||
proposalEvent, ok := msg.Data().(types.EventDataCompleteProposal)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataCompleteProposal, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataCompleteProposal, got %T. Wrong subscription channel?",
|
||||
msg.Data())
|
||||
}
|
||||
if proposalEvent.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, proposalEvent.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, proposalEvent.Height)
|
||||
}
|
||||
if proposalEvent.Round != round {
|
||||
panic(fmt.Sprintf("expected round %v, got %v", round, proposalEvent.Round))
|
||||
t.Fatalf("expected round %v, got %v", round, proposalEvent.Round)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNewValidBlock(validBlockCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
ensureNewEvent(validBlockCh, height, round, ensureTimeout,
|
||||
func ensureNewValidBlock(t *testing.T, validBlockCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
t.Helper()
|
||||
ensureNewEvent(t, validBlockCh, height, round, ensureTimeout,
|
||||
"Timeout expired while waiting for NewValidBlock event")
|
||||
}
|
||||
|
||||
func ensureNewBlock(blockCh <-chan tmpubsub.Message, height int64) {
|
||||
func ensureNewBlock(t *testing.T, blockCh <-chan tmpubsub.Message, height int64) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for NewBlock event")
|
||||
t.Fatalf("Timeout expired while waiting for NewBlock event")
|
||||
case msg := <-blockCh:
|
||||
blockEvent, ok := msg.Data().(types.EventDataNewBlock)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataNewBlock, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataNewBlock, got %T. Wrong subscription channel?",
|
||||
msg.Data())
|
||||
}
|
||||
if blockEvent.Block.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, blockEvent.Block.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, blockEvent.Block.Height)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNewBlockHeader(blockCh <-chan tmpubsub.Message, height int64, blockHash tmbytes.HexBytes) {
|
||||
func ensureNewBlockHeader(t *testing.T, blockCh <-chan tmpubsub.Message, height int64, blockHash tmbytes.HexBytes) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for NewBlockHeader event")
|
||||
t.Fatalf("Timeout expired while waiting for NewBlockHeader event")
|
||||
case msg := <-blockCh:
|
||||
blockHeaderEvent, ok := msg.Data().(types.EventDataNewBlockHeader)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataNewBlockHeader, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataNewBlockHeader, got %T. Wrong subscription channel?",
|
||||
msg.Data())
|
||||
}
|
||||
if blockHeaderEvent.Header.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, blockHeaderEvent.Header.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, blockHeaderEvent.Header.Height)
|
||||
}
|
||||
if !bytes.Equal(blockHeaderEvent.Header.Hash(), blockHash) {
|
||||
panic(fmt.Sprintf("expected header %X, got %X", blockHash, blockHeaderEvent.Header.Hash()))
|
||||
t.Fatalf("expected header %X, got %X", blockHash, blockHeaderEvent.Header.Hash())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNewUnlock(unlockCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
ensureNewEvent(unlockCh, height, round, ensureTimeout,
|
||||
func ensureNewUnlock(t *testing.T, unlockCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
t.Helper()
|
||||
ensureNewEvent(t, unlockCh, height, round, ensureTimeout,
|
||||
"Timeout expired while waiting for NewUnlock event")
|
||||
}
|
||||
|
||||
func ensureProposal(proposalCh <-chan tmpubsub.Message, height int64, round int32, propID types.BlockID) {
|
||||
func ensureProposal(t *testing.T, proposalCh <-chan tmpubsub.Message, height int64, round int32, propID types.BlockID) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for NewProposal event")
|
||||
t.Fatalf("Timeout expired while waiting for NewProposal event")
|
||||
case msg := <-proposalCh:
|
||||
proposalEvent, ok := msg.Data().(types.EventDataCompleteProposal)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataCompleteProposal, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataCompleteProposal, got %T. Wrong subscription channel?",
|
||||
msg.Data())
|
||||
}
|
||||
if proposalEvent.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, proposalEvent.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, proposalEvent.Height)
|
||||
}
|
||||
if proposalEvent.Round != round {
|
||||
panic(fmt.Sprintf("expected round %v, got %v", round, proposalEvent.Round))
|
||||
t.Fatalf("expected round %v, got %v", round, proposalEvent.Round)
|
||||
}
|
||||
if !proposalEvent.BlockID.Equals(propID) {
|
||||
panic(fmt.Sprintf("Proposed block does not match expected block (%v != %v)", proposalEvent.BlockID, propID))
|
||||
t.Fatalf("Proposed block does not match expected block (%v != %v)", proposalEvent.BlockID, propID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ensurePrecommit(voteCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
ensureVote(voteCh, height, round, tmproto.PrecommitType)
|
||||
func ensurePrecommit(t *testing.T, voteCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
t.Helper()
|
||||
ensureVote(t, voteCh, height, round, tmproto.PrecommitType)
|
||||
}
|
||||
|
||||
func ensurePrevote(voteCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
ensureVote(voteCh, height, round, tmproto.PrevoteType)
|
||||
func ensurePrevote(t *testing.T, voteCh <-chan tmpubsub.Message, height int64, round int32) {
|
||||
t.Helper()
|
||||
ensureVote(t, voteCh, height, round, tmproto.PrevoteType)
|
||||
}
|
||||
|
||||
func ensureVote(voteCh <-chan tmpubsub.Message, height int64, round int32,
|
||||
func ensureVote(t *testing.T, voteCh <-chan tmpubsub.Message, height int64, round int32,
|
||||
voteType tmproto.SignedMsgType) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for NewVote event")
|
||||
t.Fatalf("Timeout expired while waiting for NewVote event")
|
||||
case msg := <-voteCh:
|
||||
voteEvent, ok := msg.Data().(types.EventDataVote)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("expected a EventDataVote, got %T. Wrong subscription channel?",
|
||||
msg.Data()))
|
||||
t.Fatalf("expected a EventDataVote, got %T. Wrong subscription channel?",
|
||||
msg.Data())
|
||||
}
|
||||
vote := voteEvent.Vote
|
||||
if vote.Height != height {
|
||||
panic(fmt.Sprintf("expected height %v, got %v", height, vote.Height))
|
||||
t.Fatalf("expected height %v, got %v", height, vote.Height)
|
||||
}
|
||||
if vote.Round != round {
|
||||
panic(fmt.Sprintf("expected round %v, got %v", round, vote.Round))
|
||||
t.Fatalf("expected round %v, got %v", round, vote.Round)
|
||||
}
|
||||
if vote.Type != voteType {
|
||||
panic(fmt.Sprintf("expected type %v, got %v", voteType, vote.Type))
|
||||
t.Fatalf("expected type %v, got %v", voteType, vote.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ensurePrecommitTimeout(ch <-chan tmpubsub.Message) {
|
||||
func ensurePrecommitTimeout(t *testing.T, ch <-chan tmpubsub.Message) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for the Precommit to Timeout")
|
||||
t.Fatalf("Timeout expired while waiting for the Precommit to Timeout")
|
||||
case <-ch:
|
||||
}
|
||||
}
|
||||
|
||||
func ensureNewEventOnChannel(ch <-chan tmpubsub.Message) {
|
||||
func ensureNewEventOnChannel(t *testing.T, ch <-chan tmpubsub.Message) {
|
||||
t.Helper()
|
||||
select {
|
||||
case <-time.After(ensureTimeout):
|
||||
panic("Timeout expired while waiting for new activity on the channel")
|
||||
t.Fatalf("Timeout expired while waiting for new activity on the channel")
|
||||
case <-ch:
|
||||
}
|
||||
}
|
||||
@@ -711,6 +740,7 @@ func randConsensusState(
|
||||
appFunc func() abci.Application,
|
||||
configOpts ...func(*cfg.Config),
|
||||
) ([]*State, cleanupFunc) {
|
||||
t.Helper()
|
||||
|
||||
genDoc, privVals := factory.RandGenesisDoc(config, nValidators, false, 30)
|
||||
css := make([]*State, nValidators)
|
||||
@@ -731,7 +761,7 @@ func randConsensusState(
|
||||
opt(thisConfig)
|
||||
}
|
||||
|
||||
ensureDir(filepath.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
ensureDir(t, filepath.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
|
||||
app := appFunc()
|
||||
|
||||
@@ -759,6 +789,7 @@ func randConsensusState(
|
||||
|
||||
// nPeers = nValidators + nNotValidator
|
||||
func randConsensusNetWithPeers(
|
||||
t *testing.T,
|
||||
config *cfg.Config,
|
||||
nValidators,
|
||||
nPeers int,
|
||||
@@ -768,6 +799,7 @@ func randConsensusNetWithPeers(
|
||||
) ([]*State, *types.GenesisDoc, *cfg.Config, cleanupFunc) {
|
||||
genDoc, privVals := factory.RandGenesisDoc(config, nValidators, false, testMinPower)
|
||||
css := make([]*State, nPeers)
|
||||
t.Helper()
|
||||
logger := consensusLogger()
|
||||
|
||||
var peer0Config *cfg.Config
|
||||
@@ -776,7 +808,7 @@ func randConsensusNetWithPeers(
|
||||
state, _ := sm.MakeGenesisState(genDoc)
|
||||
thisConfig := ResetConfig(fmt.Sprintf("%s_%d", testName, i))
|
||||
configRootDirs = append(configRootDirs, thisConfig.RootDir)
|
||||
ensureDir(filepath.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
ensureDir(t, filepath.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
if i == 0 {
|
||||
peer0Config = thisConfig
|
||||
}
|
||||
@@ -786,16 +818,16 @@ func randConsensusNetWithPeers(
|
||||
} else {
|
||||
tempKeyFile, err := ioutil.TempFile("", "priv_validator_key_")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
t.Fatalf("error creating temp file for validator key: %s", err)
|
||||
}
|
||||
tempStateFile, err := ioutil.TempFile("", "priv_validator_state_")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
t.Fatalf("error loading validator state: %s", err)
|
||||
}
|
||||
|
||||
privVal, err = privval.GenFilePV(tempKeyFile.Name(), tempStateFile.Name(), "")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
t.Fatalf("error generating validator key: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,12 +40,12 @@ func TestMempoolNoProgressUntilTxsAvailable(t *testing.T) {
|
||||
newBlockCh := subscribe(cs.eventBus, types.EventQueryNewBlock)
|
||||
startTestRound(cs, height, round)
|
||||
|
||||
ensureNewEventOnChannel(newBlockCh) // first block gets committed
|
||||
ensureNoNewEventOnChannel(newBlockCh)
|
||||
ensureNewEventOnChannel(t, newBlockCh) // first block gets committed
|
||||
ensureNoNewEventOnChannel(t, newBlockCh)
|
||||
deliverTxsRange(cs, 0, 1)
|
||||
ensureNewEventOnChannel(newBlockCh) // commit txs
|
||||
ensureNewEventOnChannel(newBlockCh) // commit updated app hash
|
||||
ensureNoNewEventOnChannel(newBlockCh)
|
||||
ensureNewEventOnChannel(t, newBlockCh) // commit txs
|
||||
ensureNewEventOnChannel(t, newBlockCh) // commit updated app hash
|
||||
ensureNoNewEventOnChannel(t, newBlockCh)
|
||||
}
|
||||
|
||||
func TestMempoolProgressAfterCreateEmptyBlocksInterval(t *testing.T) {
|
||||
@@ -63,9 +63,9 @@ func TestMempoolProgressAfterCreateEmptyBlocksInterval(t *testing.T) {
|
||||
newBlockCh := subscribe(cs.eventBus, types.EventQueryNewBlock)
|
||||
startTestRound(cs, cs.Height, cs.Round)
|
||||
|
||||
ensureNewEventOnChannel(newBlockCh) // first block gets committed
|
||||
ensureNoNewEventOnChannel(newBlockCh) // then we dont make a block ...
|
||||
ensureNewEventOnChannel(newBlockCh) // until the CreateEmptyBlocksInterval has passed
|
||||
ensureNewEventOnChannel(t, newBlockCh) // first block gets committed
|
||||
ensureNoNewEventOnChannel(t, newBlockCh) // then we dont make a block ...
|
||||
ensureNewEventOnChannel(t, newBlockCh) // until the CreateEmptyBlocksInterval has passed
|
||||
}
|
||||
|
||||
func TestMempoolProgressInHigherRound(t *testing.T) {
|
||||
@@ -93,19 +93,19 @@ func TestMempoolProgressInHigherRound(t *testing.T) {
|
||||
}
|
||||
startTestRound(cs, height, round)
|
||||
|
||||
ensureNewRound(newRoundCh, height, round) // first round at first height
|
||||
ensureNewEventOnChannel(newBlockCh) // first block gets committed
|
||||
ensureNewRound(t, newRoundCh, height, round) // first round at first height
|
||||
ensureNewEventOnChannel(t, newBlockCh) // first block gets committed
|
||||
|
||||
height++ // moving to the next height
|
||||
round = 0
|
||||
|
||||
ensureNewRound(newRoundCh, height, round) // first round at next height
|
||||
deliverTxsRange(cs, 0, 1) // we deliver txs, but dont set a proposal so we get the next round
|
||||
ensureNewTimeout(timeoutCh, height, round, cs.config.TimeoutPropose.Nanoseconds())
|
||||
ensureNewRound(t, newRoundCh, height, round) // first round at next height
|
||||
deliverTxsRange(cs, 0, 1) // we deliver txs, but dont set a proposal so we get the next round
|
||||
ensureNewTimeout(t, timeoutCh, height, round, cs.config.TimeoutPropose.Nanoseconds())
|
||||
|
||||
round++ // moving to the next round
|
||||
ensureNewRound(newRoundCh, height, round) // wait for the next round
|
||||
ensureNewEventOnChannel(newBlockCh) // now we can commit the block
|
||||
round++ // moving to the next round
|
||||
ensureNewRound(t, newRoundCh, height, round) // wait for the next round
|
||||
ensureNewEventOnChannel(t, newBlockCh) // now we can commit the block
|
||||
}
|
||||
|
||||
func deliverTxsRange(cs *State, start, end int) {
|
||||
|
||||
@@ -336,7 +336,7 @@ func TestReactorWithEvidence(t *testing.T) {
|
||||
|
||||
defer os.RemoveAll(thisConfig.RootDir)
|
||||
|
||||
ensureDir(path.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
ensureDir(t, path.Dir(thisConfig.Consensus.WalFile()), 0700) // dir for wal
|
||||
app := appFunc()
|
||||
vals := types.TM2PB.ValidatorUpdates(state.Validators)
|
||||
app.InitChain(abci.RequestInitChain{Validators: vals})
|
||||
@@ -627,6 +627,7 @@ func TestReactorValidatorSetChanges(t *testing.T) {
|
||||
nPeers := 7
|
||||
nVals := 4
|
||||
states, _, _, cleanup := randConsensusNetWithPeers(
|
||||
t,
|
||||
config,
|
||||
nVals,
|
||||
nPeers,
|
||||
|
||||
@@ -58,7 +58,7 @@ func startNewStateAndWaitForBlock(t *testing.T, consensusReplayConfig *cfg.Confi
|
||||
logger := log.TestingLogger()
|
||||
state, err := sm.MakeGenesisStateFromFile(consensusReplayConfig.GenesisFile())
|
||||
require.NoError(t, err)
|
||||
privValidator := loadPrivValidator(consensusReplayConfig)
|
||||
privValidator := loadPrivValidator(t, consensusReplayConfig)
|
||||
blockStore := store.NewBlockStore(dbm.NewMemDB())
|
||||
cs := newStateWithConfigAndBlockStore(
|
||||
consensusReplayConfig,
|
||||
@@ -154,7 +154,7 @@ LOOP:
|
||||
blockStore := store.NewBlockStore(blockDB)
|
||||
state, err := sm.MakeGenesisStateFromFile(consensusReplayConfig.GenesisFile())
|
||||
require.NoError(t, err)
|
||||
privValidator := loadPrivValidator(consensusReplayConfig)
|
||||
privValidator := loadPrivValidator(t, consensusReplayConfig)
|
||||
cs := newStateWithConfigAndBlockStore(
|
||||
consensusReplayConfig,
|
||||
state,
|
||||
@@ -321,6 +321,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
nVals := 4
|
||||
|
||||
css, genDoc, config, cleanup := randConsensusNetWithPeers(
|
||||
t,
|
||||
config,
|
||||
nVals,
|
||||
nPeers,
|
||||
@@ -345,15 +346,15 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
// start the machine
|
||||
startTestRound(css[0], height, round)
|
||||
incrementHeight(vss...)
|
||||
ensureNewRound(newRoundCh, height, 0)
|
||||
ensureNewProposal(proposalCh, height, round)
|
||||
ensureNewRound(t, newRoundCh, height, 0)
|
||||
ensureNewProposal(t, proposalCh, height, round)
|
||||
rs := css[0].GetRoundState()
|
||||
|
||||
signAddVotes(sim.Config, css[0], tmproto.PrecommitType,
|
||||
rs.ProposalBlock.Hash(), rs.ProposalBlockParts.Header(),
|
||||
vss[1:nVals]...)
|
||||
|
||||
ensureNewRound(newRoundCh, height+1, 0)
|
||||
ensureNewRound(t, newRoundCh, height+1, 0)
|
||||
|
||||
// HEIGHT 2
|
||||
height++
|
||||
@@ -380,12 +381,12 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
if err := css[0].SetProposalAndBlock(proposal, propBlock, propBlockParts, "some peer"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ensureNewProposal(proposalCh, height, round)
|
||||
ensureNewProposal(t, proposalCh, height, round)
|
||||
rs = css[0].GetRoundState()
|
||||
signAddVotes(sim.Config, css[0], tmproto.PrecommitType,
|
||||
rs.ProposalBlock.Hash(), rs.ProposalBlockParts.Header(),
|
||||
vss[1:nVals]...)
|
||||
ensureNewRound(newRoundCh, height+1, 0)
|
||||
ensureNewRound(t, newRoundCh, height+1, 0)
|
||||
|
||||
// HEIGHT 3
|
||||
height++
|
||||
@@ -412,12 +413,12 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
if err := css[0].SetProposalAndBlock(proposal, propBlock, propBlockParts, "some peer"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ensureNewProposal(proposalCh, height, round)
|
||||
ensureNewProposal(t, proposalCh, height, round)
|
||||
rs = css[0].GetRoundState()
|
||||
signAddVotes(sim.Config, css[0], tmproto.PrecommitType,
|
||||
rs.ProposalBlock.Hash(), rs.ProposalBlockParts.Header(),
|
||||
vss[1:nVals]...)
|
||||
ensureNewRound(newRoundCh, height+1, 0)
|
||||
ensureNewRound(t, newRoundCh, height+1, 0)
|
||||
|
||||
// HEIGHT 4
|
||||
height++
|
||||
@@ -471,7 +472,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
if err := css[0].SetProposalAndBlock(proposal, propBlock, propBlockParts, "some peer"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ensureNewProposal(proposalCh, height, round)
|
||||
ensureNewProposal(t, proposalCh, height, round)
|
||||
|
||||
removeValidatorTx2 := kvstore.MakeValSetChangeTx(newVal2ABCI, 0)
|
||||
err = assertMempool(css[0].txNotifier).CheckTx(context.Background(), removeValidatorTx2, nil, mempl.TxInfo{})
|
||||
@@ -487,7 +488,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
rs.ProposalBlockParts.Header(), newVss[i])
|
||||
}
|
||||
|
||||
ensureNewRound(newRoundCh, height+1, 0)
|
||||
ensureNewRound(t, newRoundCh, height+1, 0)
|
||||
|
||||
// HEIGHT 5
|
||||
height++
|
||||
@@ -497,7 +498,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
newVss[newVssIdx].VotingPower = 25
|
||||
sort.Sort(ValidatorStubsByPower(newVss))
|
||||
selfIndex = valIndexFn(0)
|
||||
ensureNewProposal(proposalCh, height, round)
|
||||
ensureNewProposal(t, proposalCh, height, round)
|
||||
rs = css[0].GetRoundState()
|
||||
for i := 0; i < nVals+1; i++ {
|
||||
if i == selfIndex {
|
||||
@@ -507,7 +508,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
tmproto.PrecommitType, rs.ProposalBlock.Hash(),
|
||||
rs.ProposalBlockParts.Header(), newVss[i])
|
||||
}
|
||||
ensureNewRound(newRoundCh, height+1, 0)
|
||||
ensureNewRound(t, newRoundCh, height+1, 0)
|
||||
|
||||
// HEIGHT 6
|
||||
height++
|
||||
@@ -534,7 +535,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
if err := css[0].SetProposalAndBlock(proposal, propBlock, propBlockParts, "some peer"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ensureNewProposal(proposalCh, height, round)
|
||||
ensureNewProposal(t, proposalCh, height, round)
|
||||
rs = css[0].GetRoundState()
|
||||
for i := 0; i < nVals+3; i++ {
|
||||
if i == selfIndex {
|
||||
@@ -544,7 +545,7 @@ func setupSimulator(t *testing.T) *simulatorTestSuite {
|
||||
tmproto.PrecommitType, rs.ProposalBlock.Hash(),
|
||||
rs.ProposalBlockParts.Header(), newVss[i])
|
||||
}
|
||||
ensureNewRound(newRoundCh, height+1, 0)
|
||||
ensureNewRound(t, newRoundCh, height+1, 0)
|
||||
|
||||
sim.Chain = make([]*types.Block, 0)
|
||||
sim.Commits = make([]*types.Commit, 0)
|
||||
|
||||
@@ -137,7 +137,7 @@ type State struct {
|
||||
done chan struct{}
|
||||
|
||||
// synchronous pubsub between consensus state and reactor.
|
||||
// state only emits EventNewRoundStep and EventVote
|
||||
// state only emits EventNewRoundStep, EventValidBlock, and EventVote
|
||||
evsw tmevents.EventSwitch
|
||||
|
||||
// for reporting metrics
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -379,6 +379,7 @@ func (c *Client) Update(ctx context.Context, now time.Time) (*types.LightBlock,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a new light block then verify it
|
||||
if latestBlock.Height > lastTrustedHeight {
|
||||
err = c.verifyLightBlock(ctx, latestBlock, now)
|
||||
if err != nil {
|
||||
@@ -388,7 +389,8 @@ func (c *Client) Update(ctx context.Context, now time.Time) (*types.LightBlock,
|
||||
return latestBlock, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
// else return the latestTrustedBlock
|
||||
return c.latestTrustedBlock, nil
|
||||
}
|
||||
|
||||
// VerifyLightBlockAtHeight fetches the light block at the given height
|
||||
|
||||
@@ -644,7 +644,7 @@ func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) {
|
||||
chainID,
|
||||
trustOptions,
|
||||
mockDeadNode,
|
||||
[]provider.Provider{mockFullNode, mockFullNode},
|
||||
[]provider.Provider{mockDeadNode, mockFullNode},
|
||||
dbs.New(dbm.NewMemDB()),
|
||||
light.Logger(log.TestingLogger()),
|
||||
)
|
||||
@@ -663,6 +663,32 @@ func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) {
|
||||
mockFullNode.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestClientReplacesPrimaryWithWitnessIfPrimaryDoesntHaveBlock(t *testing.T) {
|
||||
mockFullNode := &provider_mocks.Provider{}
|
||||
mockFullNode.On("LightBlock", mock.Anything, mock.Anything).Return(l1, nil)
|
||||
|
||||
mockDeadNode := &provider_mocks.Provider{}
|
||||
mockDeadNode.On("LightBlock", mock.Anything, mock.Anything).Return(nil, provider.ErrLightBlockNotFound)
|
||||
c, err := light.NewClient(
|
||||
ctx,
|
||||
chainID,
|
||||
trustOptions,
|
||||
mockDeadNode,
|
||||
[]provider.Provider{mockDeadNode, mockFullNode},
|
||||
dbs.New(dbm.NewMemDB()),
|
||||
light.Logger(log.TestingLogger()),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
_, err = c.Update(ctx, bTime.Add(2*time.Hour))
|
||||
require.NoError(t, err)
|
||||
|
||||
// we should still have the dead node as a witness because it
|
||||
// hasn't repeatedly been unresponsive yet
|
||||
assert.Equal(t, 2, len(c.Witnesses()))
|
||||
mockDeadNode.AssertExpectations(t)
|
||||
mockFullNode.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestClient_BackwardsVerification(t *testing.T) {
|
||||
{
|
||||
headers, vals, _ := genLightBlocksWithKeys(chainID, 9, 3, 0, bTime)
|
||||
|
||||
@@ -341,7 +341,7 @@ func (c *Client) Block(ctx context.Context, height *int64) (*ctypes.ResultBlock,
|
||||
}
|
||||
|
||||
// BlockByHash calls rpcclient#BlockByHash and then verifies the result.
|
||||
func (c *Client) BlockByHash(ctx context.Context, hash []byte) (*ctypes.ResultBlock, error) {
|
||||
func (c *Client) BlockByHash(ctx context.Context, hash tmbytes.HexBytes) (*ctypes.ResultBlock, error) {
|
||||
res, err := c.next.BlockByHash(ctx, hash)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -454,7 +454,7 @@ func (c *Client) Commit(ctx context.Context, height *int64) (*ctypes.ResultCommi
|
||||
|
||||
// Tx calls rpcclient#Tx method and then verifies the proof if such was
|
||||
// requested.
|
||||
func (c *Client) Tx(ctx context.Context, hash []byte, prove bool) (*ctypes.ResultTx, error) {
|
||||
func (c *Client) Tx(ctx context.Context, hash tmbytes.HexBytes, prove bool) (*ctypes.ResultTx, error) {
|
||||
res, err := c.next.Tx(ctx, hash, prove)
|
||||
if err != nil || !prove {
|
||||
return res, err
|
||||
|
||||
@@ -702,7 +702,11 @@ func (n *nodeImpl) OnStart() error {
|
||||
n.Logger.Info("starting state sync")
|
||||
state, err := n.stateSyncReactor.Sync(context.TODO())
|
||||
if err != nil {
|
||||
n.Logger.Error("state sync failed", "err", err)
|
||||
n.Logger.Error("state sync failed; shutting down this node", "err", err)
|
||||
// stop the node
|
||||
if err := n.Stop(); err != nil {
|
||||
n.Logger.Error("failed to shut down node", "err", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -419,7 +419,7 @@ func (c *baseRPCClient) Block(ctx context.Context, height *int64) (*ctypes.Resul
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *baseRPCClient) BlockByHash(ctx context.Context, hash []byte) (*ctypes.ResultBlock, error) {
|
||||
func (c *baseRPCClient) BlockByHash(ctx context.Context, hash bytes.HexBytes) (*ctypes.ResultBlock, error) {
|
||||
result := new(ctypes.ResultBlock)
|
||||
params := map[string]interface{}{
|
||||
"hash": hash,
|
||||
@@ -460,7 +460,7 @@ func (c *baseRPCClient) Commit(ctx context.Context, height *int64) (*ctypes.Resu
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *baseRPCClient) Tx(ctx context.Context, hash []byte, prove bool) (*ctypes.ResultTx, error) {
|
||||
func (c *baseRPCClient) Tx(ctx context.Context, hash bytes.HexBytes, prove bool) (*ctypes.ResultTx, error) {
|
||||
result := new(ctypes.ResultTx)
|
||||
params := map[string]interface{}{
|
||||
"hash": hash,
|
||||
|
||||
@@ -67,11 +67,11 @@ type ABCIClient interface {
|
||||
// and prove anything about the chain.
|
||||
type SignClient interface {
|
||||
Block(ctx context.Context, height *int64) (*ctypes.ResultBlock, error)
|
||||
BlockByHash(ctx context.Context, hash []byte) (*ctypes.ResultBlock, error)
|
||||
BlockByHash(ctx context.Context, hash bytes.HexBytes) (*ctypes.ResultBlock, error)
|
||||
BlockResults(ctx context.Context, height *int64) (*ctypes.ResultBlockResults, error)
|
||||
Commit(ctx context.Context, height *int64) (*ctypes.ResultCommit, error)
|
||||
Validators(ctx context.Context, height *int64, page, perPage *int) (*ctypes.ResultValidators, error)
|
||||
Tx(ctx context.Context, hash []byte, prove bool) (*ctypes.ResultTx, error)
|
||||
Tx(ctx context.Context, hash bytes.HexBytes, prove bool) (*ctypes.ResultTx, error)
|
||||
|
||||
// TxSearch defines a method to search for a paginated set of transactions by
|
||||
// DeliverTx event search criteria.
|
||||
|
||||
@@ -166,7 +166,7 @@ func (c *Local) Block(ctx context.Context, height *int64) (*ctypes.ResultBlock,
|
||||
return c.env.Block(c.ctx, height)
|
||||
}
|
||||
|
||||
func (c *Local) BlockByHash(ctx context.Context, hash []byte) (*ctypes.ResultBlock, error) {
|
||||
func (c *Local) BlockByHash(ctx context.Context, hash bytes.HexBytes) (*ctypes.ResultBlock, error) {
|
||||
return c.env.BlockByHash(c.ctx, hash)
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ func (c *Local) Validators(ctx context.Context, height *int64, page, perPage *in
|
||||
return c.env.Validators(c.ctx, height, page, perPage)
|
||||
}
|
||||
|
||||
func (c *Local) Tx(ctx context.Context, hash []byte, prove bool) (*ctypes.ResultTx, error) {
|
||||
func (c *Local) Tx(ctx context.Context, hash bytes.HexBytes, prove bool) (*ctypes.ResultTx, error) {
|
||||
return c.env.Tx(c.ctx, hash, prove)
|
||||
}
|
||||
|
||||
|
||||
@@ -166,7 +166,7 @@ func (c Client) Block(ctx context.Context, height *int64) (*ctypes.ResultBlock,
|
||||
return c.env.Block(&rpctypes.Context{}, height)
|
||||
}
|
||||
|
||||
func (c Client) BlockByHash(ctx context.Context, hash []byte) (*ctypes.ResultBlock, error) {
|
||||
func (c Client) BlockByHash(ctx context.Context, hash bytes.HexBytes) (*ctypes.ResultBlock, error) {
|
||||
return c.env.BlockByHash(&rpctypes.Context{}, hash)
|
||||
}
|
||||
|
||||
|
||||
@@ -115,11 +115,11 @@ func (_m *Client) Block(ctx context.Context, height *int64) (*coretypes.ResultBl
|
||||
}
|
||||
|
||||
// BlockByHash provides a mock function with given fields: ctx, hash
|
||||
func (_m *Client) BlockByHash(ctx context.Context, hash []byte) (*coretypes.ResultBlock, error) {
|
||||
func (_m *Client) BlockByHash(ctx context.Context, hash bytes.HexBytes) (*coretypes.ResultBlock, error) {
|
||||
ret := _m.Called(ctx, hash)
|
||||
|
||||
var r0 *coretypes.ResultBlock
|
||||
if rf, ok := ret.Get(0).(func(context.Context, []byte) *coretypes.ResultBlock); ok {
|
||||
if rf, ok := ret.Get(0).(func(context.Context, bytes.HexBytes) *coretypes.ResultBlock); ok {
|
||||
r0 = rf(ctx, hash)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
@@ -128,7 +128,7 @@ func (_m *Client) BlockByHash(ctx context.Context, hash []byte) (*coretypes.Resu
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(context.Context, []byte) error); ok {
|
||||
if rf, ok := ret.Get(1).(func(context.Context, bytes.HexBytes) error); ok {
|
||||
r1 = rf(ctx, hash)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
@@ -706,11 +706,11 @@ func (_m *Client) Subscribe(ctx context.Context, subscriber string, query string
|
||||
}
|
||||
|
||||
// Tx provides a mock function with given fields: ctx, hash, prove
|
||||
func (_m *Client) Tx(ctx context.Context, hash []byte, prove bool) (*coretypes.ResultTx, error) {
|
||||
func (_m *Client) Tx(ctx context.Context, hash bytes.HexBytes, prove bool) (*coretypes.ResultTx, error) {
|
||||
ret := _m.Called(ctx, hash, prove)
|
||||
|
||||
var r0 *coretypes.ResultTx
|
||||
if rf, ok := ret.Get(0).(func(context.Context, []byte, bool) *coretypes.ResultTx); ok {
|
||||
if rf, ok := ret.Get(0).(func(context.Context, bytes.HexBytes, bool) *coretypes.ResultTx); ok {
|
||||
r0 = rf(ctx, hash, prove)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
@@ -719,7 +719,7 @@ func (_m *Client) Tx(ctx context.Context, hash []byte, prove bool) (*coretypes.R
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(context.Context, []byte, bool) error); ok {
|
||||
if rf, ok := ret.Get(1).(func(context.Context, bytes.HexBytes, bool) error); ok {
|
||||
r1 = rf(ctx, hash, prove)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/tendermint/tendermint/libs/bytes"
|
||||
tmmath "github.com/tendermint/tendermint/libs/math"
|
||||
tmquery "github.com/tendermint/tendermint/libs/pubsub/query"
|
||||
ctypes "github.com/tendermint/tendermint/rpc/core/types"
|
||||
@@ -107,7 +108,11 @@ func (env *Environment) Block(ctx *rpctypes.Context, heightPtr *int64) (*ctypes.
|
||||
|
||||
// BlockByHash gets block by hash.
|
||||
// More: https://docs.tendermint.com/master/rpc/#/Info/block_by_hash
|
||||
func (env *Environment) BlockByHash(ctx *rpctypes.Context, hash []byte) (*ctypes.ResultBlock, error) {
|
||||
func (env *Environment) BlockByHash(ctx *rpctypes.Context, hash bytes.HexBytes) (*ctypes.ResultBlock, error) {
|
||||
// N.B. The hash parameter is HexBytes so that the reflective parameter
|
||||
// decoding logic in the HTTP service will correctly translate from JSON.
|
||||
// See https://github.com/tendermint/tendermint/issues/6802 for context.
|
||||
|
||||
block := env.BlockStore.LoadBlockByHash(hash)
|
||||
if block == nil {
|
||||
return &ctypes.ResultBlock{BlockID: types.BlockID{}, Block: nil}, nil
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/tendermint/tendermint/libs/bytes"
|
||||
tmmath "github.com/tendermint/tendermint/libs/math"
|
||||
tmquery "github.com/tendermint/tendermint/libs/pubsub/query"
|
||||
ctypes "github.com/tendermint/tendermint/rpc/core/types"
|
||||
@@ -17,9 +18,13 @@ import (
|
||||
// transaction is in the mempool, invalidated, or was not sent in the first
|
||||
// place.
|
||||
// More: https://docs.tendermint.com/master/rpc/#/Info/tx
|
||||
func (env *Environment) Tx(ctx *rpctypes.Context, hash []byte, prove bool) (*ctypes.ResultTx, error) {
|
||||
func (env *Environment) Tx(ctx *rpctypes.Context, hash bytes.HexBytes, prove bool) (*ctypes.ResultTx, error) {
|
||||
// if index is disabled, return error
|
||||
|
||||
// N.B. The hash parameter is HexBytes so that the reflective parameter
|
||||
// decoding logic in the HTTP service will correctly translate from JSON.
|
||||
// See https://github.com/tendermint/tendermint/issues/6802 for context.
|
||||
|
||||
if !indexer.KVSinkEnabled(env.EventSinks) {
|
||||
return nil, errors.New("transaction querying is disabled due to no kvEventSink")
|
||||
}
|
||||
|
||||
@@ -601,6 +601,32 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
/unsafe_flush_mempool:
|
||||
get:
|
||||
summary: Flush mempool of all unconfirmed transactions
|
||||
operationId: unsafe_flush_mempool
|
||||
tags:
|
||||
- Unsafe
|
||||
description: |
|
||||
Flush flushes out the mempool. It acquires a read-lock, fetches all the
|
||||
transactions currently in the transaction store and removes each transaction
|
||||
from the store and all indexes and finally resets the cache.
|
||||
|
||||
Note, flushing the mempool may leave the mempool in an inconsistent state.
|
||||
responses:
|
||||
"200":
|
||||
description: empty answer
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/EmptyResponse"
|
||||
"500":
|
||||
description: empty error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
|
||||
/blockchain:
|
||||
get:
|
||||
summary: "Get block headers (max: 20) for minHeight <= height <= maxHeight."
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
all: docker generator runner
|
||||
all: docker generator runner tests
|
||||
|
||||
docker:
|
||||
docker build --tag tendermint/e2e-node -f docker/Dockerfile ../..
|
||||
@@ -15,4 +15,7 @@ generator:
|
||||
runner:
|
||||
go build -o build/runner ./runner
|
||||
|
||||
.PHONY: all app docker generator runner
|
||||
tests:
|
||||
go test -o build/tests ./tests
|
||||
|
||||
.PHONY: all app docker generator runner tests
|
||||
|
||||
@@ -51,7 +51,7 @@ var (
|
||||
nodeStateSyncs = uniformChoice{e2e.StateSyncDisabled, e2e.StateSyncP2P, e2e.StateSyncRPC}
|
||||
nodePersistIntervals = uniformChoice{0, 1, 5}
|
||||
nodeSnapshotIntervals = uniformChoice{0, 3}
|
||||
nodeRetainBlocks = uniformChoice{0, int(e2e.EvidenceAgeHeight), int(e2e.EvidenceAgeHeight) + 5}
|
||||
nodeRetainBlocks = uniformChoice{0, 2 * int(e2e.EvidenceAgeHeight), 4 * int(e2e.EvidenceAgeHeight)}
|
||||
nodePerturbations = probSetChoice{
|
||||
"disconnect": 0.1,
|
||||
"pause": 0.1,
|
||||
@@ -87,11 +87,19 @@ func Generate(r *rand.Rand, opts Options) ([]e2e.Manifest, error) {
|
||||
}
|
||||
manifests = append(manifests, manifest)
|
||||
}
|
||||
|
||||
if opts.Sorted {
|
||||
// When the sorted flag is set (generally, as long as
|
||||
// groups aren't set),
|
||||
e2e.SortManifests(manifests)
|
||||
}
|
||||
|
||||
return manifests, nil
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
P2P P2PMode
|
||||
P2P P2PMode
|
||||
Sorted bool
|
||||
}
|
||||
|
||||
type P2PMode string
|
||||
@@ -119,18 +127,11 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er
|
||||
TxSize: int64(txSize.Choose(r).(int)),
|
||||
}
|
||||
|
||||
var p2pNodeFactor int
|
||||
|
||||
switch opt["p2p"].(P2PMode) {
|
||||
case NewP2PMode:
|
||||
manifest.UseLegacyP2P = true
|
||||
case LegacyP2PMode:
|
||||
manifest.UseLegacyP2P = false
|
||||
case HybridP2PMode:
|
||||
manifest.UseLegacyP2P = true
|
||||
p2pNodeFactor = 2
|
||||
p2pMode := opt["p2p"].(P2PMode)
|
||||
switch p2pMode {
|
||||
case NewP2PMode, LegacyP2PMode, HybridP2PMode:
|
||||
default:
|
||||
return manifest, fmt.Errorf("unknown p2p mode %s", opt["p2p"])
|
||||
return manifest, fmt.Errorf("unknown p2p mode %s", p2pMode)
|
||||
}
|
||||
|
||||
var numSeeds, numValidators, numFulls, numLightClients int
|
||||
@@ -153,10 +154,11 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er
|
||||
for i := 1; i <= numSeeds; i++ {
|
||||
node := generateNode(r, e2e.ModeSeed, 0, manifest.InitialHeight, false)
|
||||
|
||||
if p2pNodeFactor == 0 {
|
||||
node.UseLegacyP2P = manifest.UseLegacyP2P
|
||||
} else if p2pNodeFactor%i == 0 {
|
||||
node.UseLegacyP2P = !manifest.UseLegacyP2P
|
||||
switch p2pMode {
|
||||
case LegacyP2PMode:
|
||||
node.UseLegacyP2P = true
|
||||
case HybridP2PMode:
|
||||
node.UseLegacyP2P = r.Intn(2) == 1
|
||||
}
|
||||
|
||||
manifest.Nodes[fmt.Sprintf("seed%02d", i)] = node
|
||||
@@ -177,10 +179,11 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er
|
||||
node := generateNode(
|
||||
r, e2e.ModeValidator, startAt, manifest.InitialHeight, i <= 2)
|
||||
|
||||
if p2pNodeFactor == 0 {
|
||||
node.UseLegacyP2P = manifest.UseLegacyP2P
|
||||
} else if p2pNodeFactor%i == 0 {
|
||||
node.UseLegacyP2P = !manifest.UseLegacyP2P
|
||||
switch p2pMode {
|
||||
case LegacyP2PMode:
|
||||
node.UseLegacyP2P = true
|
||||
case HybridP2PMode:
|
||||
node.UseLegacyP2P = r.Intn(2) == 1
|
||||
}
|
||||
|
||||
manifest.Nodes[name] = node
|
||||
@@ -213,11 +216,13 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er
|
||||
}
|
||||
node := generateNode(r, e2e.ModeFull, startAt, manifest.InitialHeight, false)
|
||||
|
||||
if p2pNodeFactor == 0 {
|
||||
node.UseLegacyP2P = manifest.UseLegacyP2P
|
||||
} else if p2pNodeFactor%i == 0 {
|
||||
node.UseLegacyP2P = !manifest.UseLegacyP2P
|
||||
switch p2pMode {
|
||||
case LegacyP2PMode:
|
||||
node.UseLegacyP2P = true
|
||||
case HybridP2PMode:
|
||||
node.UseLegacyP2P = r.Intn(2) == 1
|
||||
}
|
||||
|
||||
manifest.Nodes[fmt.Sprintf("full%02d", i)] = node
|
||||
}
|
||||
|
||||
|
||||
@@ -57,6 +57,10 @@ func NewCLI() *CLI {
|
||||
return fmt.Errorf("p2p mode must be either new, legacy, hybrid or mixed got %s", p2pMode)
|
||||
}
|
||||
|
||||
if groups == 0 {
|
||||
opts.Sorted = true
|
||||
}
|
||||
|
||||
return cli.generate(dir, groups, opts)
|
||||
},
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ persist_interval = 0
|
||||
perturb = ["restart"]
|
||||
privval_protocol = "tcp"
|
||||
seeds = ["seed01"]
|
||||
block_sync = "v0"
|
||||
|
||||
[node.validator03]
|
||||
database = "badgerdb"
|
||||
@@ -51,7 +52,8 @@ abci_protocol = "grpc"
|
||||
persist_interval = 3
|
||||
perturb = ["kill"]
|
||||
privval_protocol = "grpc"
|
||||
retain_blocks = 7
|
||||
block_sync = "v0"
|
||||
retain_blocks = 10
|
||||
|
||||
[node.validator04]
|
||||
abci_protocol = "builtin"
|
||||
@@ -59,12 +61,13 @@ snapshot_interval = 5
|
||||
database = "rocksdb"
|
||||
persistent_peers = ["validator01"]
|
||||
perturb = ["pause"]
|
||||
block_sync = "v0"
|
||||
|
||||
[node.validator05]
|
||||
database = "cleveldb"
|
||||
block_sync = "v0"
|
||||
database = "cleveldb"
|
||||
block_sync = "v0"
|
||||
state_sync = "p2p"
|
||||
seeds = ["seed01"]
|
||||
seeds = ["seed01"]
|
||||
start_at = 1005 # Becomes part of the validator set at 1010
|
||||
abci_protocol = "grpc"
|
||||
perturb = ["pause", "disconnect", "restart"]
|
||||
@@ -73,11 +76,10 @@ privval_protocol = "tcp"
|
||||
[node.full01]
|
||||
mode = "full"
|
||||
start_at = 1010
|
||||
# FIXME: should be v2, disabled due to flake
|
||||
block_sync = "v0"
|
||||
persistent_peers = ["validator01", "validator02", "validator03", "validator04"]
|
||||
perturb = ["restart"]
|
||||
retain_blocks = 7
|
||||
retain_blocks = 10
|
||||
state_sync = "rpc"
|
||||
|
||||
[node.light01]
|
||||
|
||||
@@ -3,6 +3,7 @@ package e2e
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
)
|
||||
@@ -59,9 +60,6 @@ type Manifest struct {
|
||||
// by individual nodes.
|
||||
LogLevel string `toml:"log_level"`
|
||||
|
||||
// UseLegacyP2P uses the legacy p2p layer for all nodes in a test.
|
||||
UseLegacyP2P bool `toml:"use_legacy_p2p"`
|
||||
|
||||
// QueueType describes the type of queue that the system uses internally
|
||||
QueueType string `toml:"queue_type"`
|
||||
|
||||
@@ -170,3 +168,43 @@ func LoadManifest(file string) (Manifest, error) {
|
||||
}
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
// SortManifests orders (in-place) a list of manifests such that the
|
||||
// manifests will be ordered (vaguely) from least complex to most
|
||||
// complex.
|
||||
func SortManifests(manifests []Manifest) {
|
||||
sort.SliceStable(manifests, func(i, j int) bool {
|
||||
left, right := manifests[i], manifests[j]
|
||||
|
||||
if len(left.Nodes) < len(right.Nodes) {
|
||||
return true
|
||||
}
|
||||
|
||||
if left.InitialHeight < right.InitialHeight {
|
||||
return true
|
||||
}
|
||||
|
||||
if left.TxSize < right.TxSize {
|
||||
return true
|
||||
}
|
||||
|
||||
if left.Evidence < right.Evidence {
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
leftPerturb int
|
||||
rightPerturb int
|
||||
)
|
||||
|
||||
for _, n := range left.Nodes {
|
||||
leftPerturb += len(n.Perturb)
|
||||
}
|
||||
for _, n := range right.Nodes {
|
||||
rightPerturb += len(n.Perturb)
|
||||
}
|
||||
|
||||
return leftPerturb < rightPerturb
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
@@ -182,7 +182,7 @@ func LoadTestnet(file string) (*Testnet, error) {
|
||||
Perturbations: []Perturbation{},
|
||||
LogLevel: manifest.LogLevel,
|
||||
QueueType: manifest.QueueType,
|
||||
UseLegacyP2P: manifest.UseLegacyP2P && nodeManifest.UseLegacyP2P,
|
||||
UseLegacyP2P: nodeManifest.UseLegacyP2P,
|
||||
}
|
||||
|
||||
if node.StartAt == testnet.InitialHeight {
|
||||
|
||||
@@ -19,7 +19,7 @@ FAILED=()
|
||||
|
||||
for MANIFEST in "$@"; do
|
||||
START=$SECONDS
|
||||
echo "==> Running testnet $MANIFEST..."
|
||||
echo "==> Running testnet: $MANIFEST"
|
||||
|
||||
if ! ./build/runner -f "$MANIFEST"; then
|
||||
echo "==> Testnet $MANIFEST failed, dumping manifest..."
|
||||
|
||||
@@ -21,8 +21,8 @@ import (
|
||||
//
|
||||
// Metrics are based of the `benchmarkLength`, the amount of consecutive blocks
|
||||
// sampled from in the testnet
|
||||
func Benchmark(testnet *e2e.Testnet, benchmarkLength int64) error {
|
||||
block, _, err := waitForHeight(testnet, 0)
|
||||
func Benchmark(ctx context.Context, testnet *e2e.Testnet, benchmarkLength int64) error {
|
||||
block, err := getLatestBlock(ctx, testnet)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -32,13 +32,15 @@ func Benchmark(testnet *e2e.Testnet, benchmarkLength int64) error {
|
||||
// wait for the length of the benchmark period in blocks to pass. We allow 5 seconds for each block
|
||||
// which should be sufficient.
|
||||
waitingTime := time.Duration(benchmarkLength*5) * time.Second
|
||||
endHeight, err := waitForAllNodes(testnet, block.Height+benchmarkLength, waitingTime)
|
||||
ctx, cancel := context.WithTimeout(ctx, waitingTime)
|
||||
defer cancel()
|
||||
block, _, err = waitForHeight(ctx, testnet, block.Height+benchmarkLength)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dur := time.Since(startAt)
|
||||
|
||||
logger.Info("Ending benchmark period", "height", endHeight)
|
||||
logger.Info("Ending benchmark period", "height", block.Height)
|
||||
|
||||
// fetch a sample of blocks
|
||||
blocks, err := fetchBlockChainSample(testnet, benchmarkLength)
|
||||
|
||||
@@ -28,7 +28,7 @@ const lightClientEvidenceRatio = 4
|
||||
// evidence and broadcasts it to a random node through the rpc endpoint `/broadcast_evidence`.
|
||||
// Evidence is random and can be a mixture of LightClientAttackEvidence and
|
||||
// DuplicateVoteEvidence.
|
||||
func InjectEvidence(testnet *e2e.Testnet, amount int) error {
|
||||
func InjectEvidence(ctx context.Context, testnet *e2e.Testnet, amount int) error {
|
||||
// select a random node
|
||||
var targetNode *e2e.Node
|
||||
|
||||
@@ -79,9 +79,12 @@ func InjectEvidence(testnet *e2e.Testnet, amount int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
wctx, cancel := context.WithTimeout(ctx, time.Minute)
|
||||
defer cancel()
|
||||
|
||||
// wait for the node to reach the height above the forged height so that
|
||||
// it is able to validate the evidence
|
||||
_, err = waitForNode(targetNode, waitHeight, 30*time.Second)
|
||||
_, err = waitForNode(wctx, targetNode, waitHeight)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -107,9 +110,12 @@ func InjectEvidence(testnet *e2e.Testnet, amount int) error {
|
||||
}
|
||||
}
|
||||
|
||||
wctx, cancel = context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// wait for the node to reach the height above the forged height so that
|
||||
// it is able to validate the evidence
|
||||
_, err = waitForNode(targetNode, blockRes.Block.Height+2, 10*time.Second)
|
||||
_, err = waitForNode(wctx, targetNode, blockRes.Block.Height+2)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,10 +3,9 @@ package main
|
||||
import (
|
||||
"container/ring"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
|
||||
@@ -15,9 +14,8 @@ import (
|
||||
)
|
||||
|
||||
// Load generates transactions against the network until the given context is
|
||||
// canceled. A multiplier of greater than one can be supplied if load needs to
|
||||
// be generated beyond a minimum amount.
|
||||
func Load(ctx context.Context, testnet *e2e.Testnet, multiplier int) error {
|
||||
// canceled.
|
||||
func Load(ctx context.Context, testnet *e2e.Testnet) error {
|
||||
// Since transactions are executed across all nodes in the network, we need
|
||||
// to reduce transaction load for larger networks to avoid using too much
|
||||
// CPU. This gives high-throughput small networks and low-throughput large ones.
|
||||
@@ -27,11 +25,9 @@ func Load(ctx context.Context, testnet *e2e.Testnet, multiplier int) error {
|
||||
if concurrency == 0 {
|
||||
concurrency = 1
|
||||
}
|
||||
initialTimeout := 1 * time.Minute
|
||||
stallTimeout := 30 * time.Second
|
||||
|
||||
chTx := make(chan types.Tx)
|
||||
chSuccess := make(chan types.Tx)
|
||||
chSuccess := make(chan int) // success counts per iteration
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
@@ -39,61 +35,115 @@ func Load(ctx context.Context, testnet *e2e.Testnet, multiplier int) error {
|
||||
logger.Info(fmt.Sprintf("Starting transaction load (%v workers)...", concurrency))
|
||||
started := time.Now()
|
||||
|
||||
go loadGenerate(ctx, chTx, multiplier, testnet.TxSize)
|
||||
go loadGenerate(ctx, chTx, testnet.TxSize)
|
||||
|
||||
for w := 0; w < concurrency; w++ {
|
||||
go loadProcess(ctx, testnet, chTx, chSuccess)
|
||||
}
|
||||
|
||||
// Monitor successful transactions, and abort on stalls.
|
||||
// Montior transaction to ensure load propagates to the network
|
||||
//
|
||||
// This loop doesn't check or time out for stalls, since a stall here just
|
||||
// aborts the load generator sooner and could obscure backpressure
|
||||
// from the test harness, and there are other checks for
|
||||
// stalls in the framework. Ideally we should monitor latency as a guide
|
||||
// for when to give up, but we don't have a good way to track that yet.
|
||||
success := 0
|
||||
timeout := initialTimeout
|
||||
for {
|
||||
select {
|
||||
case <-chSuccess:
|
||||
success++
|
||||
timeout = stallTimeout
|
||||
case <-time.After(timeout):
|
||||
return fmt.Errorf("unable to submit transactions for %v", timeout)
|
||||
case numSeen := <-chSuccess:
|
||||
success += numSeen
|
||||
case <-ctx.Done():
|
||||
if success == 0 {
|
||||
// if we couldn't submit any transactions,
|
||||
// that's probably a problem and the test
|
||||
// should error; however, for very short tests
|
||||
// we shouldn't abort.
|
||||
//
|
||||
// The 2s cut off, is a rough guess based on
|
||||
// the expected value of
|
||||
// loadGenerateWaitTime. If the implementation
|
||||
// of that function changes, then this might
|
||||
// also need to change without more
|
||||
// refactoring.
|
||||
if success == 0 && time.Since(started) > 2*time.Second {
|
||||
return errors.New("failed to submit any transactions")
|
||||
}
|
||||
logger.Info(fmt.Sprintf("Ending transaction load after %v txs (%.1f tx/s)...",
|
||||
success, float64(success)/time.Since(started).Seconds()))
|
||||
|
||||
// TODO perhaps allow test networks to
|
||||
// declare required transaction rates, which
|
||||
// might allow us to avoid the special case
|
||||
// around 0 txs above.
|
||||
rate := float64(success) / time.Since(started).Seconds()
|
||||
|
||||
logger.Info("ending transaction load",
|
||||
"dur_secs", time.Since(started).Seconds(),
|
||||
"txns", success,
|
||||
"rate", rate,
|
||||
"slow", rate < 1)
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loadGenerate generates jobs until the context is canceled
|
||||
func loadGenerate(ctx context.Context, chTx chan<- types.Tx, multiplier int, size int64) {
|
||||
for i := 0; i < math.MaxInt64; i++ {
|
||||
// loadGenerate generates jobs until the context is canceled.
|
||||
//
|
||||
// The chTx has multiple consumers, thus the rate limiting of the load
|
||||
// generation is primarily the result of backpressure from the
|
||||
// broadcast transaction, though there is still some timer-based
|
||||
// limiting.
|
||||
func loadGenerate(ctx context.Context, chTx chan<- types.Tx, size int64) {
|
||||
timer := time.NewTimer(0)
|
||||
defer timer.Stop()
|
||||
defer close(chTx)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-timer.C:
|
||||
}
|
||||
|
||||
// We keep generating the same 100 keys over and over, with different values.
|
||||
// This gives a reasonable load without putting too much data in the app.
|
||||
id := i % 100
|
||||
id := rand.Int63() % 100 // nolint: gosec
|
||||
|
||||
bz := make([]byte, size)
|
||||
_, err := rand.Read(bz)
|
||||
_, err := rand.Read(bz) // nolint: gosec
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to read random bytes: %v", err))
|
||||
}
|
||||
tx := types.Tx(fmt.Sprintf("load-%X=%x", id, bz))
|
||||
|
||||
select {
|
||||
case chTx <- tx:
|
||||
sqrtSize := int(math.Sqrt(float64(size)))
|
||||
time.Sleep(10 * time.Millisecond * time.Duration(sqrtSize/multiplier))
|
||||
|
||||
case <-ctx.Done():
|
||||
close(chTx)
|
||||
return
|
||||
case chTx <- tx:
|
||||
// sleep for a bit before sending the
|
||||
// next transaction.
|
||||
timer.Reset(loadGenerateWaitTime(size))
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func loadGenerateWaitTime(size int64) time.Duration {
|
||||
const (
|
||||
min = int64(100 * time.Millisecond)
|
||||
max = int64(time.Second)
|
||||
)
|
||||
|
||||
var (
|
||||
baseJitter = rand.Int63n(max-min+1) + min // nolint: gosec
|
||||
sizeFactor = size * int64(time.Millisecond)
|
||||
sizeJitter = rand.Int63n(sizeFactor-min+1) + min // nolint: gosec
|
||||
)
|
||||
|
||||
return time.Duration(baseJitter + sizeJitter)
|
||||
}
|
||||
|
||||
// loadProcess processes transactions
|
||||
func loadProcess(ctx context.Context, testnet *e2e.Testnet, chTx <-chan types.Tx, chSuccess chan<- types.Tx) {
|
||||
func loadProcess(ctx context.Context, testnet *e2e.Testnet, chTx <-chan types.Tx, chSuccess chan<- int) {
|
||||
// Each worker gets its own client to each usable node, which
|
||||
// allows for some concurrency while still bounding it.
|
||||
clients := make([]*rpchttp.HTTP, 0, len(testnet.Nodes))
|
||||
@@ -127,8 +177,7 @@ func loadProcess(ctx context.Context, testnet *e2e.Testnet, chTx <-chan types.Tx
|
||||
clientRing = clientRing.Next()
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
successes := 0
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -137,19 +186,24 @@ func loadProcess(ctx context.Context, testnet *e2e.Testnet, chTx <-chan types.Tx
|
||||
clientRing = clientRing.Next()
|
||||
client := clientRing.Value.(*rpchttp.HTTP)
|
||||
|
||||
if _, err := client.Health(ctx); err != nil {
|
||||
if status, err := client.Status(ctx); err != nil {
|
||||
continue
|
||||
} else if status.SyncInfo.CatchingUp {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err = client.BroadcastTxSync(ctx, tx); err != nil {
|
||||
if _, err := client.BroadcastTxSync(ctx, tx); err != nil {
|
||||
continue
|
||||
}
|
||||
successes++
|
||||
|
||||
select {
|
||||
case chSuccess <- tx:
|
||||
case chSuccess <- successes:
|
||||
successes = 0 // reset counter for the next iteration
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -57,44 +57,47 @@ func NewCLI() *CLI {
|
||||
}
|
||||
|
||||
chLoadResult := make(chan error)
|
||||
ctx, loadCancel := context.WithCancel(context.Background())
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
lctx, loadCancel := context.WithCancel(ctx)
|
||||
defer loadCancel()
|
||||
go func() {
|
||||
err := Load(ctx, cli.testnet, 1)
|
||||
chLoadResult <- err
|
||||
chLoadResult <- Load(lctx, cli.testnet)
|
||||
}()
|
||||
|
||||
if err := Start(cli.testnet); err != nil {
|
||||
if err := Start(ctx, cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through
|
||||
if err := Wait(ctx, cli.testnet, 5); err != nil { // allow some txs to go through
|
||||
return err
|
||||
}
|
||||
|
||||
if cli.testnet.HasPerturbations() {
|
||||
if err := Perturb(cli.testnet); err != nil {
|
||||
if err := Perturb(ctx, cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through
|
||||
if err := Wait(ctx, cli.testnet, 5); err != nil { // allow some txs to go through
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cli.testnet.Evidence > 0 {
|
||||
if err := InjectEvidence(cli.testnet, cli.testnet.Evidence); err != nil {
|
||||
if err := InjectEvidence(ctx, cli.testnet, cli.testnet.Evidence); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Wait(cli.testnet, 5); err != nil { // ensure chain progress
|
||||
if err := Wait(ctx, cli.testnet, 5); err != nil { // ensure chain progress
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
loadCancel()
|
||||
|
||||
if err := <-chLoadResult; err != nil {
|
||||
return fmt.Errorf("transaction load failed: %w", err)
|
||||
}
|
||||
if err := Wait(cli.testnet, 5); err != nil { // wait for network to settle before tests
|
||||
if err := Wait(ctx, cli.testnet, 5); err != nil { // wait for network to settle before tests
|
||||
return err
|
||||
}
|
||||
if err := Test(cli.testnet); err != nil {
|
||||
@@ -139,7 +142,7 @@ func NewCLI() *CLI {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return Start(cli.testnet)
|
||||
return Start(cmd.Context(), cli.testnet)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -147,7 +150,7 @@ func NewCLI() *CLI {
|
||||
Use: "perturb",
|
||||
Short: "Perturbs the Docker testnet, e.g. by restarting or disconnecting nodes",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return Perturb(cli.testnet)
|
||||
return Perturb(cmd.Context(), cli.testnet)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -155,7 +158,7 @@ func NewCLI() *CLI {
|
||||
Use: "wait",
|
||||
Short: "Waits for a few blocks to be produced and all nodes to catch up",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return Wait(cli.testnet, 5)
|
||||
return Wait(cmd.Context(), cli.testnet, 5)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -187,20 +190,10 @@ func NewCLI() *CLI {
|
||||
})
|
||||
|
||||
cli.root.AddCommand(&cobra.Command{
|
||||
Use: "load [multiplier]",
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
Use: "load",
|
||||
Short: "Generates transaction load until the command is canceled",
|
||||
RunE: func(cmd *cobra.Command, args []string) (err error) {
|
||||
m := 1
|
||||
|
||||
if len(args) == 1 {
|
||||
m, err = strconv.Atoi(args[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return Load(context.Background(), cli.testnet, m)
|
||||
return Load(context.Background(), cli.testnet)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -218,7 +211,7 @@ func NewCLI() *CLI {
|
||||
}
|
||||
}
|
||||
|
||||
return InjectEvidence(cli.testnet, amount)
|
||||
return InjectEvidence(cmd.Context(), cli.testnet, amount)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -281,23 +274,26 @@ Does not run any perbutations.
|
||||
}
|
||||
|
||||
chLoadResult := make(chan error)
|
||||
ctx, loadCancel := context.WithCancel(context.Background())
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
lctx, loadCancel := context.WithCancel(ctx)
|
||||
defer loadCancel()
|
||||
go func() {
|
||||
err := Load(ctx, cli.testnet, 1)
|
||||
err := Load(lctx, cli.testnet)
|
||||
chLoadResult <- err
|
||||
}()
|
||||
|
||||
if err := Start(cli.testnet); err != nil {
|
||||
if err := Start(ctx, cli.testnet); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through
|
||||
if err := Wait(ctx, cli.testnet, 5); err != nil { // allow some txs to go through
|
||||
return err
|
||||
}
|
||||
|
||||
// we benchmark performance over the next 100 blocks
|
||||
if err := Benchmark(cli.testnet, 100); err != nil {
|
||||
if err := Benchmark(ctx, cli.testnet, 100); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
@@ -9,14 +10,24 @@ import (
|
||||
)
|
||||
|
||||
// Perturbs a running testnet.
|
||||
func Perturb(testnet *e2e.Testnet) error {
|
||||
func Perturb(ctx context.Context, testnet *e2e.Testnet) error {
|
||||
timer := time.NewTimer(0) // first tick fires immediately; reset below
|
||||
defer timer.Stop()
|
||||
|
||||
for _, node := range testnet.Nodes {
|
||||
for _, perturbation := range node.Perturbations {
|
||||
_, err := PerturbNode(node, perturbation)
|
||||
if err != nil {
|
||||
return err
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
_, err := PerturbNode(ctx, node, perturbation)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// give network some time to recover between each
|
||||
timer.Reset(20 * time.Second)
|
||||
}
|
||||
time.Sleep(20 * time.Second) // give network some time to recover between each
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -24,7 +35,7 @@ func Perturb(testnet *e2e.Testnet) error {
|
||||
|
||||
// PerturbNode perturbs a node with a given perturbation, returning its status
|
||||
// after recovering.
|
||||
func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
|
||||
func PerturbNode(ctx context.Context, node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
|
||||
testnet := node.Testnet
|
||||
switch perturbation {
|
||||
case e2e.PerturbationDisconnect:
|
||||
@@ -77,7 +88,9 @@ func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.Resul
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
status, err := waitForNode(node, 0, 3*time.Minute)
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
||||
defer cancel()
|
||||
status, err := waitForNode(ctx, node, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -13,23 +13,24 @@ import (
|
||||
)
|
||||
|
||||
// waitForHeight waits for the network to reach a certain height (or above),
|
||||
// returning the highest height seen. Errors if the network is not making
|
||||
// returning the block at the height seen. Errors if the network is not making
|
||||
// progress at all.
|
||||
func waitForHeight(testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
|
||||
// If height == 0, the initial height of the test network is used as the target.
|
||||
func waitForHeight(ctx context.Context, testnet *e2e.Testnet, height int64) (*types.Block, *types.BlockID, error) {
|
||||
var (
|
||||
err error
|
||||
maxResult *rpctypes.ResultBlock
|
||||
clients = map[string]*rpchttp.HTTP{}
|
||||
lastHeight int64
|
||||
lastIncrease = time.Now()
|
||||
nodesAtHeight = map[string]struct{}{}
|
||||
numRunningNodes int
|
||||
)
|
||||
for _, node := range testnet.Nodes {
|
||||
if node.Mode == e2e.ModeSeed {
|
||||
continue
|
||||
}
|
||||
if height == 0 {
|
||||
height = testnet.InitialHeight
|
||||
}
|
||||
|
||||
if node.Mode == e2e.ModeLight {
|
||||
for _, node := range testnet.Nodes {
|
||||
if node.Stateless() {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -38,86 +39,97 @@ func waitForHeight(testnet *e2e.Testnet, height int64) (*types.Block, *types.Blo
|
||||
}
|
||||
}
|
||||
|
||||
timer := time.NewTimer(0)
|
||||
defer timer.Stop()
|
||||
for {
|
||||
for _, node := range testnet.Nodes {
|
||||
// skip nodes that have reached the target height
|
||||
if _, ok := nodesAtHeight[node.Name]; ok {
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, nil, ctx.Err()
|
||||
case <-timer.C:
|
||||
for _, node := range testnet.Nodes {
|
||||
// skip nodes that have reached the target height
|
||||
if _, ok := nodesAtHeight[node.Name]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if node.Mode == e2e.ModeSeed {
|
||||
continue
|
||||
}
|
||||
// skip nodes that don't have state or haven't started yet
|
||||
if node.Stateless() {
|
||||
continue
|
||||
}
|
||||
if !node.HasStarted {
|
||||
continue
|
||||
}
|
||||
|
||||
if node.Mode == e2e.ModeLight {
|
||||
continue
|
||||
}
|
||||
// cache the clients
|
||||
client, ok := clients[node.Name]
|
||||
if !ok {
|
||||
client, err = node.Client()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
clients[node.Name] = client
|
||||
}
|
||||
|
||||
if !node.HasStarted {
|
||||
continue
|
||||
}
|
||||
|
||||
// cache the clients
|
||||
client, ok := clients[node.Name]
|
||||
if !ok {
|
||||
client, err = node.Client()
|
||||
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
result, err := client.Status(wctx)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
clients[node.Name] = client
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
result, err := client.Block(ctx, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if result.Block != nil && (maxResult == nil || result.Block.Height > maxResult.Block.Height) {
|
||||
maxResult = result
|
||||
lastIncrease = time.Now()
|
||||
}
|
||||
|
||||
if maxResult != nil && maxResult.Block.Height >= height {
|
||||
// the node has achieved the target height!
|
||||
|
||||
// add this node to the set of target
|
||||
// height nodes
|
||||
nodesAtHeight[node.Name] = struct{}{}
|
||||
|
||||
// if not all of the nodes that we
|
||||
// have clients for have reached the
|
||||
// target height, keep trying.
|
||||
if numRunningNodes > len(nodesAtHeight) {
|
||||
continue
|
||||
if result.SyncInfo.LatestBlockHeight > lastHeight {
|
||||
lastHeight = result.SyncInfo.LatestBlockHeight
|
||||
lastIncrease = time.Now()
|
||||
}
|
||||
|
||||
// return once all nodes have reached
|
||||
// the target height.
|
||||
return maxResult.Block, &maxResult.BlockID, nil
|
||||
}
|
||||
}
|
||||
if result.SyncInfo.LatestBlockHeight >= height {
|
||||
// the node has achieved the target height!
|
||||
|
||||
if len(clients) == 0 {
|
||||
return nil, nil, errors.New("unable to connect to any network nodes")
|
||||
}
|
||||
if time.Since(lastIncrease) >= time.Minute {
|
||||
if maxResult == nil {
|
||||
return nil, nil, errors.New("chain stalled at unknown height")
|
||||
// add this node to the set of target
|
||||
// height nodes
|
||||
nodesAtHeight[node.Name] = struct{}{}
|
||||
|
||||
// if not all of the nodes that we
|
||||
// have clients for have reached the
|
||||
// target height, keep trying.
|
||||
if numRunningNodes > len(nodesAtHeight) {
|
||||
continue
|
||||
}
|
||||
|
||||
// All nodes are at or above the target height. Now fetch the block for that target height
|
||||
// and return it. We loop again through all clients because some may have pruning set but
|
||||
// at least two of them should be archive nodes.
|
||||
for _, c := range clients {
|
||||
result, err := c.Block(ctx, &height)
|
||||
if err != nil || result == nil || result.Block == nil {
|
||||
continue
|
||||
}
|
||||
return result.Block, &result.BlockID, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes]",
|
||||
maxResult.Block.Height,
|
||||
len(nodesAtHeight),
|
||||
numRunningNodes)
|
||||
if len(clients) == 0 {
|
||||
return nil, nil, errors.New("unable to connect to any network nodes")
|
||||
}
|
||||
if time.Since(lastIncrease) >= time.Minute {
|
||||
if lastHeight == 0 {
|
||||
return nil, nil, errors.New("chain stalled at unknown height (most likely upon starting)")
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("chain stalled at height %v [%d of %d nodes %+v]",
|
||||
lastHeight,
|
||||
len(nodesAtHeight),
|
||||
numRunningNodes,
|
||||
nodesAtHeight)
|
||||
|
||||
}
|
||||
timer.Reset(1 * time.Second)
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForNode waits for a node to become available and catch up to the given block height.
|
||||
func waitForNode(node *e2e.Node, height int64, timeout time.Duration) (*rpctypes.ResultStatus, error) {
|
||||
func waitForNode(ctx context.Context, node *e2e.Node, height int64) (*rpctypes.ResultStatus, error) {
|
||||
if node.Mode == e2e.ModeSeed {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -126,42 +138,91 @@ func waitForNode(node *e2e.Node, height int64, timeout time.Duration) (*rpctypes
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
timer := time.NewTimer(0)
|
||||
defer timer.Stop()
|
||||
|
||||
var (
|
||||
lastFailed bool
|
||||
counter int
|
||||
)
|
||||
for {
|
||||
status, err := client.Status(ctx)
|
||||
switch {
|
||||
case errors.Is(err, context.DeadlineExceeded):
|
||||
return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
|
||||
case errors.Is(err, context.Canceled):
|
||||
return nil, err
|
||||
case err == nil && status.SyncInfo.LatestBlockHeight >= height:
|
||||
return status, nil
|
||||
counter++
|
||||
if lastFailed {
|
||||
lastFailed = false
|
||||
|
||||
// if there was a problem with the request in
|
||||
// the previous recreate the client to ensure
|
||||
// reconnection
|
||||
client, err = node.Client()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
case <-timer.C:
|
||||
status, err := client.Status(ctx)
|
||||
switch {
|
||||
case errors.Is(err, context.DeadlineExceeded):
|
||||
return nil, fmt.Errorf("timed out waiting for %v to reach height %v", node.Name, height)
|
||||
case errors.Is(err, context.Canceled):
|
||||
return nil, err
|
||||
case err == nil && status.SyncInfo.LatestBlockHeight >= height:
|
||||
return status, nil
|
||||
case counter%100 == 0:
|
||||
switch {
|
||||
case err != nil:
|
||||
lastFailed = true
|
||||
logger.Error("node not yet ready",
|
||||
"iter", counter,
|
||||
"node", node.Name,
|
||||
"err", err,
|
||||
"target", height,
|
||||
)
|
||||
case status != nil:
|
||||
logger.Error("node not yet ready",
|
||||
"iter", counter,
|
||||
"node", node.Name,
|
||||
"height", status.SyncInfo.LatestBlockHeight,
|
||||
"target", height,
|
||||
)
|
||||
}
|
||||
}
|
||||
timer.Reset(250 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// waitForAllNodes waits for all nodes to become available and catch up to the given block height.
|
||||
func waitForAllNodes(testnet *e2e.Testnet, height int64, timeout time.Duration) (int64, error) {
|
||||
var lastHeight int64
|
||||
|
||||
// getLatestBlock returns the last block that all active nodes in the network have
|
||||
// agreed upon i.e. the earlist of each nodes latest block
|
||||
func getLatestBlock(ctx context.Context, testnet *e2e.Testnet) (*types.Block, error) {
|
||||
var earliestBlock *types.Block
|
||||
for _, node := range testnet.Nodes {
|
||||
if node.Mode == e2e.ModeSeed {
|
||||
// skip nodes that don't have state or haven't started yet
|
||||
if node.Stateless() {
|
||||
continue
|
||||
}
|
||||
if !node.HasStarted {
|
||||
continue
|
||||
}
|
||||
|
||||
status, err := waitForNode(node, height, timeout)
|
||||
client, err := node.Client()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if status.SyncInfo.LatestBlockHeight > lastHeight {
|
||||
lastHeight = status.SyncInfo.LatestBlockHeight
|
||||
wctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
result, err := client.Block(wctx, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if result.Block != nil && (earliestBlock == nil || earliestBlock.Height > result.Block.Height) {
|
||||
earliestBlock = result.Block
|
||||
}
|
||||
}
|
||||
|
||||
return lastHeight, nil
|
||||
return earliestBlock, nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
@@ -8,7 +9,7 @@ import (
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
)
|
||||
|
||||
func Start(testnet *e2e.Testnet) error {
|
||||
func Start(ctx context.Context, testnet *e2e.Testnet) error {
|
||||
if len(testnet.Nodes) == 0 {
|
||||
return fmt.Errorf("no nodes in testnet")
|
||||
}
|
||||
@@ -45,7 +46,14 @@ func Start(testnet *e2e.Testnet) error {
|
||||
if err := execCompose(testnet.Dir, "up", "-d", node.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := waitForNode(node, 0, time.Minute); err != nil {
|
||||
|
||||
if err := func() error {
|
||||
ctx, cancel := context.WithTimeout(ctx, time.Minute)
|
||||
defer cancel()
|
||||
|
||||
_, err := waitForNode(ctx, node, 0)
|
||||
return err
|
||||
}(); err != nil {
|
||||
return err
|
||||
}
|
||||
node.HasStarted = true
|
||||
@@ -60,7 +68,7 @@ func Start(testnet *e2e.Testnet) error {
|
||||
"nodes", len(testnet.Nodes)-len(nodeQueue),
|
||||
"pending", len(nodeQueue))
|
||||
|
||||
block, blockID, err := waitForHeight(testnet, networkHeight)
|
||||
block, blockID, err := waitForHeight(ctx, testnet, networkHeight)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -74,9 +82,16 @@ func Start(testnet *e2e.Testnet) error {
|
||||
// that this node will start at before we
|
||||
// start the node.
|
||||
|
||||
logger.Info("Waiting for network to advance to height",
|
||||
"node", node.Name,
|
||||
"last_height", networkHeight,
|
||||
"waiting_for", node.StartAt,
|
||||
"size", len(testnet.Nodes)-len(nodeQueue),
|
||||
"pending", len(nodeQueue))
|
||||
|
||||
networkHeight = node.StartAt
|
||||
|
||||
block, blockID, err = waitForHeight(testnet, networkHeight)
|
||||
block, blockID, err = waitForHeight(ctx, testnet, networkHeight)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -93,10 +108,15 @@ func Start(testnet *e2e.Testnet) error {
|
||||
if err := execCompose(testnet.Dir, "up", "-d", node.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
status, err := waitForNode(node, node.StartAt, 8*time.Minute)
|
||||
|
||||
wctx, wcancel := context.WithTimeout(ctx, 8*time.Minute)
|
||||
status, err := waitForNode(wctx, node, node.StartAt)
|
||||
if err != nil {
|
||||
wcancel()
|
||||
return err
|
||||
}
|
||||
wcancel()
|
||||
|
||||
node.HasStarted = true
|
||||
logger.Info(fmt.Sprintf("Node %v up on http://127.0.0.1:%v at height %v",
|
||||
node.Name, node.ProxyPort, status.SyncInfo.LatestBlockHeight))
|
||||
|
||||
@@ -15,5 +15,5 @@ func Test(testnet *e2e.Testnet) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return execVerbose("go", "test", "-count", "1", "./tests/...")
|
||||
return execVerbose("./build/tests", "-test.count", "1")
|
||||
}
|
||||
|
||||
@@ -1,31 +1,27 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
)
|
||||
|
||||
// Wait waits for a number of blocks to be produced, and for all nodes to catch
|
||||
// up with it.
|
||||
func Wait(testnet *e2e.Testnet, blocks int64) error {
|
||||
block, _, err := waitForHeight(testnet, 0)
|
||||
func Wait(ctx context.Context, testnet *e2e.Testnet, blocks int64) error {
|
||||
block, err := getLatestBlock(ctx, testnet)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return WaitUntil(testnet, block.Height+blocks)
|
||||
return WaitUntil(ctx, testnet, block.Height+blocks)
|
||||
}
|
||||
|
||||
// WaitUntil waits until a given height has been reached.
|
||||
func WaitUntil(testnet *e2e.Testnet, height int64) error {
|
||||
func WaitUntil(ctx context.Context, testnet *e2e.Testnet, height int64) error {
|
||||
logger.Info(fmt.Sprintf("Waiting for all nodes to reach height %v...", height))
|
||||
_, err := waitForAllNodes(testnet, height, waitingTime(len(testnet.Nodes)))
|
||||
|
||||
_, _, err := waitForHeight(ctx, testnet, height)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// waitingTime estimates how long it should take for a node to reach the height.
|
||||
// More nodes in a network implies we may expect a slower network and may have to wait longer.
|
||||
func waitingTime(nodes int) time.Duration {
|
||||
return time.Minute + (time.Duration(nodes) * (30 * time.Second))
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package e2e_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
@@ -10,6 +11,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/tendermint/tendermint/rpc/client/http"
|
||||
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
@@ -44,7 +46,7 @@ func TestApp_Hash(t *testing.T) {
|
||||
|
||||
block, err := client.Block(ctx, nil)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, info.Response.LastBlockAppHash, block.Block.AppHash,
|
||||
require.EqualValues(t, info.Response.LastBlockAppHash, block.Block.AppHash.Bytes(),
|
||||
"app hash does not match last block's app hash")
|
||||
|
||||
status, err := client.Status(ctx)
|
||||
@@ -56,42 +58,101 @@ func TestApp_Hash(t *testing.T) {
|
||||
|
||||
// Tests that we can set a value and retrieve it.
|
||||
func TestApp_Tx(t *testing.T) {
|
||||
testNode(t, func(t *testing.T, node e2e.Node) {
|
||||
client, err := node.Client()
|
||||
require.NoError(t, err)
|
||||
type broadcastFunc func(context.Context, types.Tx) error
|
||||
|
||||
// Generate a random value, to prevent duplicate tx errors when
|
||||
// manually running the test multiple times for a testnet.
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
bz := make([]byte, 32)
|
||||
_, err = r.Read(bz)
|
||||
require.NoError(t, err)
|
||||
testCases := []struct {
|
||||
Name string
|
||||
WaitTime time.Duration
|
||||
BroadcastTx func(client *http.HTTP) broadcastFunc
|
||||
ShouldSkip bool
|
||||
}{
|
||||
{
|
||||
Name: "Sync",
|
||||
WaitTime: time.Minute,
|
||||
BroadcastTx: func(client *http.HTTP) broadcastFunc {
|
||||
return func(ctx context.Context, tx types.Tx) error {
|
||||
_, err := client.BroadcastTxSync(ctx, tx)
|
||||
return err
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Commit",
|
||||
WaitTime: 15 * time.Second,
|
||||
// TODO: turn this check back on if it can
|
||||
// return reliably. Currently these calls have
|
||||
// a hard timeout of 10s (server side
|
||||
// configured). The Sync check is probably
|
||||
// safe.
|
||||
ShouldSkip: true,
|
||||
BroadcastTx: func(client *http.HTTP) broadcastFunc {
|
||||
return func(ctx context.Context, tx types.Tx) error {
|
||||
_, err := client.BroadcastTxCommit(ctx, tx)
|
||||
return err
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Async",
|
||||
WaitTime: 90 * time.Second,
|
||||
// TODO: turn this check back on if there's a
|
||||
// way to avoid failures in the case that the
|
||||
// transaction doesn't make it into the
|
||||
// mempool. (retries?)
|
||||
ShouldSkip: true,
|
||||
BroadcastTx: func(client *http.HTTP) broadcastFunc {
|
||||
return func(ctx context.Context, tx types.Tx) error {
|
||||
_, err := client.BroadcastTxAsync(ctx, tx)
|
||||
return err
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
key := fmt.Sprintf("testapp-tx-%v", node.Name)
|
||||
value := fmt.Sprintf("%x", bz)
|
||||
tx := types.Tx(fmt.Sprintf("%v=%v", key, value))
|
||||
|
||||
_, err = client.BroadcastTxSync(ctx, tx)
|
||||
require.NoError(t, err)
|
||||
|
||||
hash := tx.Hash()
|
||||
waitTime := 20 * time.Second
|
||||
|
||||
require.Eventuallyf(t, func() bool {
|
||||
txResp, err := client.Tx(ctx, hash, false)
|
||||
return err == nil && bytes.Equal(txResp.Tx, tx)
|
||||
}, waitTime, time.Second,
|
||||
"submitted tx %X wasn't committed after %v", hash, waitTime,
|
||||
)
|
||||
|
||||
// NOTE: we don't test abci query of the light client
|
||||
if node.Mode == e2e.ModeLight {
|
||||
return
|
||||
for idx, test := range testCases {
|
||||
if test.ShouldSkip {
|
||||
continue
|
||||
}
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
// testNode calls t.Parallel as well, so we should
|
||||
// have a copy of the
|
||||
test := testCases[idx]
|
||||
testNode(t, func(t *testing.T, node e2e.Node) {
|
||||
client, err := node.Client()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Generate a random value, to prevent duplicate tx errors when
|
||||
// manually running the test multiple times for a testnet.
|
||||
bz := make([]byte, 32)
|
||||
_, err = rand.Read(bz)
|
||||
require.NoError(t, err)
|
||||
|
||||
key := fmt.Sprintf("testapp-tx-%v", node.Name)
|
||||
value := fmt.Sprintf("%x", bz)
|
||||
tx := types.Tx(fmt.Sprintf("%v=%v", key, value))
|
||||
|
||||
require.NoError(t, test.BroadcastTx(client)(ctx, tx))
|
||||
|
||||
hash := tx.Hash()
|
||||
|
||||
require.Eventuallyf(t, func() bool {
|
||||
txResp, err := client.Tx(ctx, hash, false)
|
||||
return err == nil && bytes.Equal(txResp.Tx, tx)
|
||||
},
|
||||
test.WaitTime, // timeout
|
||||
time.Second, // interval
|
||||
"submitted tx %X wasn't committed after %v",
|
||||
hash, test.WaitTime,
|
||||
)
|
||||
|
||||
abciResp, err := client.ABCIQuery(ctx, "", []byte(key))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, key, string(abciResp.Response.Key))
|
||||
assert.Equal(t, value, string(abciResp.Response.Value))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
abciResp, err := client.ABCIQuery(ctx, "", []byte(key))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, key, string(abciResp.Response.Key))
|
||||
assert.Equal(t, value, string(abciResp.Response.Value))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package e2e_test
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
@@ -72,9 +71,6 @@ func loadTestnet(t *testing.T) e2e.Testnet {
|
||||
if manifest == "" {
|
||||
t.Skip("E2E_MANIFEST not set, not an end-to-end test run")
|
||||
}
|
||||
if !filepath.IsAbs(manifest) {
|
||||
manifest = filepath.Join("..", manifest)
|
||||
}
|
||||
|
||||
testnetCacheMtx.Lock()
|
||||
defer testnetCacheMtx.Unlock()
|
||||
|
||||
Reference in New Issue
Block a user