From 09cf0bcb012d37de33b41425d82a77bbb31ed1b5 Mon Sep 17 00:00:00 2001 From: Marko Date: Wed, 6 Jan 2021 10:49:30 -0800 Subject: [PATCH 1/7] privval: add grpc (#5725) Co-authored-by: Anton Kaliaev --- CHANGELOG_PENDING.md | 1 + Makefile | 23 ++- cmd/priv_val_server/main.go | 133 ++++++++++++++--- config/config.go | 38 +++++ config/toml.go | 11 ++ docs/nodes/remote_signer.md | 70 +++++++++ go.mod | 2 + go.sum | 6 + libs/net/net_test.go | 5 + node/node.go | 74 ++++++--- node/utils.go | 26 ++++ privval/grpc/client.go | 108 ++++++++++++++ privval/grpc/client_test.go | 168 +++++++++++++++++++++ privval/grpc/server.go | 87 +++++++++++ privval/grpc/server_test.go | 187 +++++++++++++++++++++++ privval/grpc/util.go | 82 ++++++++++ proto/tendermint/privval/service.pb.go | 199 +++++++++++++++++++++++++ proto/tendermint/privval/service.proto | 14 ++ test/e2e/app/main.go | 23 +++ test/e2e/generator/generate.go | 2 +- test/e2e/networks/ci.toml | 2 +- test/e2e/pkg/manifest.go | 3 +- test/e2e/pkg/testnet.go | 2 +- test/e2e/runner/setup.go | 7 + types/priv_validator.go | 5 + 25 files changed, 1233 insertions(+), 45 deletions(-) create mode 100644 docs/nodes/remote_signer.md create mode 100644 node/utils.go create mode 100644 privval/grpc/client.go create mode 100644 privval/grpc/client_test.go create mode 100644 privval/grpc/server.go create mode 100644 privval/grpc/server_test.go create mode 100644 privval/grpc/util.go create mode 100644 proto/tendermint/privval/service.pb.go create mode 100644 proto/tendermint/privval/service.proto diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 7ee898323..a3126d1f5 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -47,6 +47,7 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi - [crypto/ed25519] \#5632 Adopt zip215 `ed25519` verification. (@marbar3778) - [privval] \#5603 Add `--key` to `init`, `gen_validator`, `testnet` & `unsafe_reset_priv_validator` for use in generating `secp256k1` keys. +- [privval] \#5725 add gRPC support to private validator. - [abci/client] \#5673 `Async` requests return an error if queue is full (@melekes) - [mempool] \#5673 Cancel `CheckTx` requests if RPC client disconnects or times out (@melekes) - [abci] \#5706 Added `AbciVersion` to `RequestInfo` allowing applications to check ABCI version when connecting to Tendermint. (@marbar3778) diff --git a/Makefile b/Makefile index 34d1f7da2..90d651dbf 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ include tools/Makefile include test/Makefile ############################################################################### -### Build Tendermint ### +### Build Tendermint ### ############################################################################### build: $(BUILDDIR)/ @@ -127,6 +127,27 @@ install_abci: @go install -mod=readonly ./abci/cmd/... .PHONY: install_abci +############################################################################### +### Privval Server ### +############################################################################### + +build_privval_server: + @go build -mod=readonly -o $(BUILDDIR)/ -i ./cmd/priv_val_server/... +.PHONY: build_privval_server + +generate_test_cert: + # generate self signing ceritificate authority + @certstrap init --common-name "root CA" --expires "20 years" + # generate server cerificate + @certstrap request-cert -cn server -ip 127.0.0.1 + # self-sign server cerificate with rootCA + @certstrap sign server --CA "root CA" + # generate client cerificate + @certstrap request-cert -cn client -ip 127.0.0.1 + # self-sign client cerificate with rootCA + @certstrap sign client --CA "root CA" +.PHONY: generate_test_cert + ############################################################################### ### Distribution ### ############################################################################### diff --git a/cmd/priv_val_server/main.go b/cmd/priv_val_server/main.go index 813ce6b14..03d375693 100644 --- a/cmd/priv_val_server/main.go +++ b/cmd/priv_val_server/main.go @@ -1,24 +1,50 @@ package main import ( + "context" + "crypto/tls" + "crypto/x509" "flag" + "fmt" + "io/ioutil" + "net" + "net/http" "os" "time" - "github.com/tendermint/tendermint/crypto/ed25519" + grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "github.com/tendermint/tendermint/libs/log" tmnet "github.com/tendermint/tendermint/libs/net" tmos "github.com/tendermint/tendermint/libs/os" - "github.com/tendermint/tendermint/privval" + grpcprivval "github.com/tendermint/tendermint/privval/grpc" + privvalproto "github.com/tendermint/tendermint/proto/tendermint/privval" +) + +var ( + // Create a metrics registry. + reg = prometheus.NewRegistry() + + // Create some standard server metrics. + grpcMetrics = grpc_prometheus.NewServerMetrics() ) func main() { var ( - addr = flag.String("addr", ":26659", "Address of client to connect to") + addr = flag.String("addr", "127.0.0.1:26659", "Address to listen on (host:port)") chainID = flag.String("chain-id", "mychain", "chain id") privValKeyPath = flag.String("priv-key", "", "priv val key file path") privValStatePath = flag.String("priv-state", "", "priv val state file path") + insecure = flag.Bool("insecure", false, "allow server to run insecurely (no TLS)") + certFile = flag.String("certfile", "", "absolute path to server certificate") + keyFile = flag.String("keyfile", "", "absolute path to server key") + rootCA = flag.String("rootcafile", "", "absolute path to root CA") + prometheusAddr = flag.String("prometheus-addr", "", "address for prometheus endpoint (host:port)") logger = log.NewTMLogger( log.NewSyncWriter(os.Stdout), @@ -32,39 +58,106 @@ func main() { "chainID", *chainID, "privKeyPath", *privValKeyPath, "privStatePath", *privValStatePath, + "insecure", *insecure, + "certFile", *certFile, + "keyFile", *keyFile, + "rootCA", *rootCA, ) pv := privval.LoadFilePV(*privValKeyPath, *privValStatePath) - var dialer privval.SocketDialer + opts := []grpc.ServerOption{} + if !*insecure { + certificate, err := tls.LoadX509KeyPair(*certFile, *keyFile) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to load X509 key pair: %v", err) + os.Exit(1) + } + + certPool := x509.NewCertPool() + bs, err := ioutil.ReadFile(*rootCA) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to read client ca cert: %s", err) + os.Exit(1) + } + + if ok := certPool.AppendCertsFromPEM(bs); !ok { + fmt.Fprintf(os.Stderr, "failed to append client certs") + os.Exit(1) + } + + tlsConfig := &tls.Config{ + ClientAuth: tls.RequireAndVerifyClientCert, + Certificates: []tls.Certificate{certificate}, + ClientCAs: certPool, + MinVersion: tls.VersionTLS13, + } + + creds := grpc.Creds(credentials.NewTLS(tlsConfig)) + opts = append(opts, creds) + logger.Info("SignerServer: Creating security credentials") + } else { + logger.Info("SignerServer: You are using an insecure gRPC connection!") + } + + // add prometheus metrics for unary RPC calls + opts = append(opts, grpc.UnaryInterceptor(grpc_prometheus.UnaryServerInterceptor)) + + ss := grpcprivval.NewSignerServer(*chainID, pv, logger) + protocol, address := tmnet.ProtocolAndAddress(*addr) - switch protocol { - case "unix": - dialer = privval.DialUnixFn(address) - case "tcp": - connTimeout := 3 * time.Second // TODO - dialer = privval.DialTCPFn(address, connTimeout, ed25519.GenPrivKey()) - default: - logger.Error("Unknown protocol", "protocol", protocol) + + lis, err := net.Listen(protocol, address) + if err != nil { + fmt.Fprintf(os.Stderr, "SignerServer: Failed to listen %v", err) os.Exit(1) } - sd := privval.NewSignerDialerEndpoint(logger, dialer) - ss := privval.NewSignerServer(sd, *chainID, pv) + s := grpc.NewServer(opts...) - err := ss.Start() - if err != nil { - panic(err) + privvalproto.RegisterPrivValidatorAPIServer(s, ss) + + var httpSrv *http.Server + if *prometheusAddr != "" { + httpSrv = registerPrometheus(*prometheusAddr, s) + } + + logger.Info("SignerServer: Starting grpc server") + if err := s.Serve(lis); err != nil { + fmt.Fprintf(os.Stderr, "Unable to listen on port %s: %v", *addr, err) + os.Exit(1) } // Stop upon receiving SIGTERM or CTRL-C. tmos.TrapSignal(logger, func() { - err := ss.Stop() - if err != nil { - panic(err) + logger.Debug("SignerServer: calling Close") + if *prometheusAddr != "" { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + if err := httpSrv.Shutdown(ctx); err != nil { + fmt.Fprintf(os.Stderr, "Unable to stop http server: %v", err) + os.Exit(1) + } } + s.GracefulStop() }) // Run forever. select {} } + +func registerPrometheus(addr string, s *grpc.Server) *http.Server { + // Initialize all metrics. + grpcMetrics.InitializeMetrics(s) + // create http server to serve prometheus + httpServer := &http.Server{Handler: promhttp.HandlerFor(reg, promhttp.HandlerOpts{}), Addr: addr} + + go func() { + if err := httpServer.ListenAndServe(); err != nil { + fmt.Fprintf(os.Stderr, "Unable to start a http server: %v", err) + os.Exit(1) + } + }() + + return httpServer +} diff --git a/config/config.go b/config/config.go index fd8b6669d..f66312127 100644 --- a/config/config.go +++ b/config/config.go @@ -204,6 +204,16 @@ type BaseConfig struct { //nolint: maligned // connections from an external PrivValidator process PrivValidatorListenAddr string `mapstructure:"priv-validator-laddr"` + // Client certificate generated while creating needed files for secure connection. + // If a remote validator address is provided but no certificate, the connection will be insecure + PrivValidatorClientCertificate string `mapstructure:"priv-validator-client-certificate-file"` + + // Client key generated while creating certificates for secure connection + PrivValidatorClientKey string `mapstructure:"priv-validator-client-key-file"` + + // Path Root Certificate Authority used to sign both client and server certificates + PrivValidatorRootCA string `mapstructure:"priv-validator-root-ca-file"` + // A JSON file containing the private key to use for p2p authenticated encryption NodeKey string `mapstructure:"node-key-file"` @@ -253,6 +263,21 @@ func (cfg BaseConfig) GenesisFile() string { return rootify(cfg.Genesis, cfg.RootDir) } +// PrivValidatorClientKeyFile returns the full path to the priv_validator_key.json file +func (cfg BaseConfig) PrivValidatorClientKeyFile() string { + return rootify(cfg.PrivValidatorClientKey, cfg.RootDir) +} + +// PrivValidatorClientCertificateFile returns the full path to the priv_validator_key.json file +func (cfg BaseConfig) PrivValidatorClientCertificateFile() string { + return rootify(cfg.PrivValidatorClientCertificate, cfg.RootDir) +} + +// PrivValidatorCertificateAuthorityFile returns the full path to the priv_validator_key.json file +func (cfg BaseConfig) PrivValidatorRootCAFile() string { + return rootify(cfg.PrivValidatorRootCA, cfg.RootDir) +} + // PrivValidatorKeyFile returns the full path to the priv_validator_key.json file func (cfg BaseConfig) PrivValidatorKeyFile() string { return rootify(cfg.PrivValidatorKey, cfg.RootDir) @@ -273,6 +298,19 @@ func (cfg BaseConfig) DBDir() string { return rootify(cfg.DBPath, cfg.RootDir) } +func (cfg *BaseConfig) ArePrivValidatorClientSecurityOptionsPresent() bool { + switch { + case cfg.PrivValidatorRootCA == "": + return false + case cfg.PrivValidatorClientKey == "": + return false + case cfg.PrivValidatorClientCertificate == "": + return false + default: + return true + } +} + // ValidateBasic performs basic validation (checking param bounds, etc.) and // returns an error if any check fails. func (cfg BaseConfig) ValidateBasic() error { diff --git a/config/toml.go b/config/toml.go index ed29ab46c..34152acca 100644 --- a/config/toml.go +++ b/config/toml.go @@ -136,8 +136,19 @@ priv-validator-state-file = "{{ js .BaseConfig.PrivValidatorState }}" # TCP or UNIX socket address for Tendermint to listen on for # connections from an external PrivValidator process +# when the listenAddr is prefixed with grpc instead of tcp it will use the gRPC Client priv-validator-laddr = "{{ .BaseConfig.PrivValidatorListenAddr }}" +# Client certificate generated while creating needed files for secure connection. +# If a remote validator address is provided but no certificate, the connection will be insecure +priv-validator-client-certificate-file = "{{ js .BaseConfig.PrivValidatorClientCertificate }}" + +# Client key generated while creating certificates for secure connection +priv-validator-client-key-file = "{{ js .BaseConfig.PrivValidatorClientKey }}" + +# Path Root Certificate Authority used to sign both client and server certificates +priv-validator-certificate-authority = "{{ js .BaseConfig.PrivValidatorRootCA }}" + # Path to the JSON file containing the private key to use for node authentication in the p2p protocol node-key-file = "{{ js .BaseConfig.NodeKey }}" diff --git a/docs/nodes/remote_signer.md b/docs/nodes/remote_signer.md new file mode 100644 index 000000000..e7dfccacd --- /dev/null +++ b/docs/nodes/remote_signer.md @@ -0,0 +1,70 @@ +--- +order: 7 +--- + +# Remote signer + +Tendermint provides a remote signer option for validators. A remote signer enables the operator to store the validator key on a different machine minimizing the attack surface if a server were to be compromised. + +The remote signer protocol implements a [client and server architecture](https://en.wikipedia.org/wiki/Client%E2%80%93server_model). When Tendermint requires the public key or signature for a proposal or vote it requests it from the remote signer. + +To run a secure validator and remote signer system it is recommended to use a VPC (virtual private cloud) or a private connection. + +There are two different configurations that can be used: Raw or gRPC. + +## Raw + +While both options use tcp or unix sockets the raw option uses tcp or unix sockets without http. The raw protocol sets up Tendermint as the server and the remote signer as the client. This aids in not exposing the remote signer to public network. + +> Warning: Raw will be deprecated in a future major release, we recommend implementing your key management server against the gRPC configuration. + +## gRPC + +[gRPC](https://grpc.io/) is an RPC framework built with [HTTP/2](https://en.wikipedia.org/wiki/HTTP/2), uses [Protocol Buffers](https://developers.google.com/protocol-buffers) to define services and has been standardized within the cloud infrastructure community. gRPC provides a language agnostic way to implement services. This aids developers in the writing key management servers in various different languages. + +GRPC utilizes [TLS](https://en.wikipedia.org/wiki/Transport_Layer_Security), another widely standardized protocol, to secure connections. There are two forms of TLS to secure a connection, one-way and two-way. One way is when the client identifies the server but the server allows anyone to connect to it. Two-way is when the client identifies the server and the server identifies the client, prohibiting connections from unknown parties. + +When using gRPC Tendermint is setup as the client. Tendermint will make calls to the remote signer. We recommend not exposing the remote signer to the public network with the use of virtual private cloud. + +Securing your remote signers connection is highly recommended, but we provide the option to run it with a insecure connection. + +### Generating Certificates + +To run a secure connection with gRPC we need to generate certificates and keys. We will walkthrough how to self sign certificates for two-way TLS. + +There are two ways to generate certificates, [openssl](https://www.openssl.org/) and [certstarp](https://github.com/square/certstrap). Both of these options can be used but we will be covering `certstrap` because it provides a simpler process then openssl. + +- Install `Certstrap`: + +```sh + go get github.com/square/certstrap@v1.2.0 +``` + +- Create certificate authority for self signing. + +```sh + # generate self signing ceritificate authority + certstrap init --common-name "" --expires "20 years" +``` + +- Request a certificate for the server. + - For generalization purposes we set the ip to `127.0.0.1`, but for your node please use the servers IP. +- Sign the servers certificate with your certificate authority + +```sh + # generate server cerificate + certstrap request-cert -cn server -ip 127.0.0.1 + # self-sign server cerificate with rootCA + certstrap sign server --CA "" 127.0.0.1 + ``` + +- Request a certificate for the client. + - For generalization purposes we set the ip to `127.0.0.1`, but for your node please use the clients IP. +- Sign the clients certificate with your certificate authority + +```sh +# generate client cerificate + certstrap request-cert -cn client -ip 127.0.0.1 +# self-sign client cerificate with rootCA + certstrap sign client --CA "" 127.0.0.1 +``` diff --git a/go.mod b/go.mod index 08fc9efc4..90a93e7ab 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,8 @@ require ( github.com/golang/protobuf v1.4.3 github.com/google/orderedcode v0.0.1 github.com/gorilla/websocket v1.4.2 + github.com/grpc-ecosystem/go-grpc-middleware v1.2.2 + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/gtank/merlin v0.1.1 github.com/hdevalence/ed25519consensus v0.0.0-20201207055737-7fde80a9d5ff github.com/libp2p/go-buffer-pool v0.0.2 diff --git a/go.sum b/go.sum index d110a35e2..baf3477ef 100644 --- a/go.sum +++ b/go.sum @@ -32,9 +32,11 @@ github.com/Workiva/go-datastructures v1.0.52/go.mod h1:Z+F2Rca0qCsVYDS8z7bAGm8f3 github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= @@ -243,7 +245,9 @@ github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.2.1/go.mod h1:EaizFBKfUKtMIF5iaDEhniwNedqGo9FuLFzppDr3uwI= +github.com/grpc-ecosystem/go-grpc-middleware v1.2.2 h1:FlFbCRLd5Jr4iYXZufAvgWN6Ao0JrI5chLINnUXDDr0= github.com/grpc-ecosystem/go-grpc-middleware v1.2.2/go.mod h1:EaizFBKfUKtMIF5iaDEhniwNedqGo9FuLFzppDr3uwI= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= @@ -481,6 +485,7 @@ github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= @@ -812,6 +817,7 @@ google.golang.org/protobuf v1.24.0 h1:UhZDfRO8JRQru4/+LlLE0BRKGF8L+PICnvYZmx/fEG google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/libs/net/net_test.go b/libs/net/net_test.go index 38cd58f6a..5ce5965c2 100644 --- a/libs/net/net_test.go +++ b/libs/net/net_test.go @@ -18,6 +18,11 @@ func TestProtocolAndAddress(t *testing.T) { "tcp", "mydomain:80", }, + { + "grpc://mydomain:80", + "grpc", + "mydomain:80", + }, { "mydomain:80", "tcp", diff --git a/node/node.go b/node/node.go index b79aa1d6c..d9e504758 100644 --- a/node/node.go +++ b/node/node.go @@ -8,12 +8,13 @@ import ( "net" "net/http" _ "net/http/pprof" // nolint: gosec // securely exposed on separate, optional port - "strings" "time" + grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/cors" + "google.golang.org/grpc" dbm "github.com/tendermint/tm-db" @@ -26,6 +27,7 @@ import ( "github.com/tendermint/tendermint/evidence" tmjson "github.com/tendermint/tendermint/libs/json" "github.com/tendermint/tendermint/libs/log" + tmnet "github.com/tendermint/tendermint/libs/net" tmpubsub "github.com/tendermint/tendermint/libs/pubsub" "github.com/tendermint/tendermint/libs/service" "github.com/tendermint/tendermint/light" @@ -33,6 +35,7 @@ import ( "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/p2p/pex" "github.com/tendermint/tendermint/privval" + tmgrpc "github.com/tendermint/tendermint/privval/grpc" "github.com/tendermint/tendermint/proxy" rpccore "github.com/tendermint/tendermint/rpc/core" grpccore "github.com/tendermint/tendermint/rpc/grpc" @@ -675,10 +678,19 @@ func NewNode(config *cfg.Config, // If an address is provided, listen on the socket for a connection from an // external signing process. if config.PrivValidatorListenAddr != "" { + protocol, address := tmnet.ProtocolAndAddress(config.PrivValidatorListenAddr) // FIXME: we should start services inside OnStart - privValidator, err = createAndStartPrivValidatorSocketClient(config.PrivValidatorListenAddr, genDoc.ChainID, logger) - if err != nil { - return nil, fmt.Errorf("error with private validator socket client: %w", err) + switch protocol { + case "grpc": + privValidator, err = createAndStartPrivValidatorGRPCClient(config, address, genDoc.ChainID, logger) + if err != nil { + return nil, fmt.Errorf("error with private validator grpc client: %w", err) + } + default: + privValidator, err = createAndStartPrivValidatorSocketClient(config.PrivValidatorListenAddr, genDoc.ChainID, logger) + if err != nil { + return nil, fmt.Errorf("error with private validator socket client: %w", err) + } } } @@ -1395,6 +1407,7 @@ func createAndStartPrivValidatorSocketClient( chainID string, logger log.Logger, ) (types.PrivValidator, error) { + pve, err := privval.NewSignerListener(listenAddr, logger) if err != nil { return nil, fmt.Errorf("failed to start private validator: %w", err) @@ -1420,23 +1433,44 @@ func createAndStartPrivValidatorSocketClient( return pvscWithRetries, nil } -// splitAndTrimEmpty slices s into all subslices separated by sep and returns a -// slice of the string s with all leading and trailing Unicode code points -// contained in cutset removed. If sep is empty, SplitAndTrim splits after each -// UTF-8 sequence. First part is equivalent to strings.SplitN with a count of -// -1. also filter out empty strings, only return non-empty strings. -func splitAndTrimEmpty(s, sep, cutset string) []string { - if s == "" { - return []string{} +func createAndStartPrivValidatorGRPCClient( + config *cfg.Config, + address, + chainID string, + logger log.Logger, +) (types.PrivValidator, error) { + var transportSecurity grpc.DialOption + if config.BaseConfig.ArePrivValidatorClientSecurityOptionsPresent() { + transportSecurity = tmgrpc.GenerateTLS(config.PrivValidatorClientCertificateFile(), + config.PrivValidatorClientKeyFile(), config.PrivValidatorRootCAFile(), logger) + } else { + transportSecurity = grpc.WithInsecure() + logger.Info("Using an insecure gRPC connection!") + } + dialOptions := tmgrpc.DefaultDialOptions() + if config.Instrumentation.Prometheus { + grpcMetrics := grpc_prometheus.DefaultClientMetrics + dialOptions = append(dialOptions, grpc.WithUnaryInterceptor(grpcMetrics.UnaryClientInterceptor())) } - spl := strings.Split(s, sep) - nonEmptyStrings := make([]string, 0, len(spl)) - for i := 0; i < len(spl); i++ { - element := strings.Trim(spl[i], cutset) - if element != "" { - nonEmptyStrings = append(nonEmptyStrings, element) - } + dialOptions = append(dialOptions, transportSecurity) + + ctx := context.Background() + + conn, err := grpc.DialContext(ctx, address, dialOptions...) + if err != nil { + logger.Error("unable to connect to server", "target", address, "err", err) } - return nonEmptyStrings + pvsc, err := tmgrpc.NewSignerClient(conn, chainID, logger) + if err != nil { + return nil, fmt.Errorf("failed to start private validator: %w", err) + } + + // try to get a pubkey from private validate first time + _, err = pvsc.GetPubKey() + if err != nil { + return nil, fmt.Errorf("can't get pubkey: %w", err) + } + + return pvsc, nil } diff --git a/node/utils.go b/node/utils.go new file mode 100644 index 000000000..a96dd457f --- /dev/null +++ b/node/utils.go @@ -0,0 +1,26 @@ +package node + +import ( + "strings" +) + +// splitAndTrimEmpty slices s into all subslices separated by sep and returns a +// slice of the string s with all leading and trailing Unicode code points +// contained in cutset removed. If sep is empty, SplitAndTrim splits after each +// UTF-8 sequence. First part is equivalent to strings.SplitN with a count of +// -1. also filter out empty strings, only return non-empty strings. +func splitAndTrimEmpty(s, sep, cutset string) []string { + if s == "" { + return []string{} + } + + spl := strings.Split(s, sep) + nonEmptyStrings := make([]string, 0, len(spl)) + for i := 0; i < len(spl); i++ { + element := strings.Trim(spl[i], cutset) + if element != "" { + nonEmptyStrings = append(nonEmptyStrings, element) + } + } + return nonEmptyStrings +} diff --git a/privval/grpc/client.go b/privval/grpc/client.go new file mode 100644 index 000000000..5b44973e8 --- /dev/null +++ b/privval/grpc/client.go @@ -0,0 +1,108 @@ +package grpc + +import ( + "context" + "time" + + grpc "google.golang.org/grpc" + "google.golang.org/grpc/status" + + "github.com/tendermint/tendermint/crypto" + cryptoenc "github.com/tendermint/tendermint/crypto/encoding" + "github.com/tendermint/tendermint/libs/log" + privvalproto "github.com/tendermint/tendermint/proto/tendermint/privval" + tmproto "github.com/tendermint/tendermint/proto/tendermint/types" + "github.com/tendermint/tendermint/types" +) + +// SignerClient implements PrivValidator. +// Handles remote validator connections that provide signing services +type SignerClient struct { + logger log.Logger + + client privvalproto.PrivValidatorAPIClient + conn *grpc.ClientConn + chainID string +} + +var _ types.PrivValidator = (*SignerClient)(nil) + +// NewSignerClient returns an instance of SignerClient. +// it will start the endpoint (if not already started) +func NewSignerClient(conn *grpc.ClientConn, + chainID string, log log.Logger) (*SignerClient, error) { + + sc := &SignerClient{ + logger: log, + chainID: chainID, + client: privvalproto.NewPrivValidatorAPIClient(conn), // Create the Private Validator Client + } + + return sc, nil +} + +// Close closes the underlying connection +func (sc *SignerClient) Close() error { + sc.logger.Info("Stopping service") + if sc.conn != nil { + return sc.conn.Close() + } + return nil +} + +//-------------------------------------------------------- +// Implement PrivValidator + +// GetPubKey retrieves a public key from a remote signer +// returns an error if client is not able to provide the key +func (sc *SignerClient) GetPubKey() (crypto.PubKey, error) { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) // Todo: should this be configurable? + defer cancel() + resp, err := sc.client.GetPubKey(ctx, &privvalproto.PubKeyRequest{ChainId: sc.chainID}) + if err != nil { + errStatus, _ := status.FromError(err) + sc.logger.Error("SignerClient::GetPubKey", "err", errStatus.Message()) + return nil, errStatus.Err() + } + + pk, err := cryptoenc.PubKeyFromProto(resp.PubKey) + if err != nil { + return nil, err + } + + return pk, nil +} + +// SignVote requests a remote signer to sign a vote +func (sc *SignerClient) SignVote(chainID string, vote *tmproto.Vote) error { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + resp, err := sc.client.SignVote(ctx, &privvalproto.SignVoteRequest{ChainId: sc.chainID, Vote: vote}) + if err != nil { + errStatus, _ := status.FromError(err) + sc.logger.Error("Client SignVote", "err", errStatus.Message()) + return errStatus.Err() + } + + *vote = resp.Vote + + return nil +} + +// SignProposal requests a remote signer to sign a proposal +func (sc *SignerClient) SignProposal(chainID string, proposal *tmproto.Proposal) error { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + resp, err := sc.client.SignProposal( + ctx, &privvalproto.SignProposalRequest{ChainId: chainID, Proposal: proposal}) + + if err != nil { + errStatus, _ := status.FromError(err) + sc.logger.Error("SignerClient::SignProposal", "err", errStatus.Message()) + return errStatus.Err() + } + + *proposal = resp.Proposal + + return nil +} diff --git a/privval/grpc/client_test.go b/privval/grpc/client_test.go new file mode 100644 index 000000000..09feea731 --- /dev/null +++ b/privval/grpc/client_test.go @@ -0,0 +1,168 @@ +package grpc_test + +import ( + "context" + "net" + "testing" + "time" + + grpc "google.golang.org/grpc" + "google.golang.org/grpc/test/bufconn" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/tendermint/tendermint/crypto" + "github.com/tendermint/tendermint/crypto/tmhash" + "github.com/tendermint/tendermint/libs/log" + tmrand "github.com/tendermint/tendermint/libs/rand" + tmgrpc "github.com/tendermint/tendermint/privval/grpc" + privvalproto "github.com/tendermint/tendermint/proto/tendermint/privval" + tmproto "github.com/tendermint/tendermint/proto/tendermint/types" + "github.com/tendermint/tendermint/types" +) + +const chainID = "chain-id" + +func dialer(pv types.PrivValidator, logger log.Logger) (*grpc.Server, func(context.Context, string) (net.Conn, error)) { + listener := bufconn.Listen(1024 * 1024) + + server := grpc.NewServer() + + s := tmgrpc.NewSignerServer(chainID, pv, logger) + + privvalproto.RegisterPrivValidatorAPIServer(server, s) + + go func() { + if err := server.Serve(listener); err != nil { + panic(err) + } + }() + + return server, func(context.Context, string) (net.Conn, error) { + return listener.Dial() + } +} + +func TestSignerClient_GetPubKey(t *testing.T) { + + ctx := context.Background() + mockPV := types.NewMockPV() + logger := log.TestingLogger() + srv, dialer := dialer(mockPV, logger) + defer srv.Stop() + + conn, err := grpc.DialContext(ctx, "", grpc.WithInsecure(), grpc.WithContextDialer(dialer)) + if err != nil { + panic(err) + } + defer conn.Close() + + client, err := tmgrpc.NewSignerClient(conn, chainID, logger) + require.NoError(t, err) + + pk, err := client.GetPubKey() + require.NoError(t, err) + assert.Equal(t, mockPV.PrivKey.PubKey(), pk) +} + +func TestSignerClient_SignVote(t *testing.T) { + + ctx := context.Background() + mockPV := types.NewMockPV() + logger := log.TestingLogger() + srv, dialer := dialer(mockPV, logger) + defer srv.Stop() + + conn, err := grpc.DialContext(ctx, "", grpc.WithInsecure(), grpc.WithContextDialer(dialer)) + if err != nil { + panic(err) + } + defer conn.Close() + + client, err := tmgrpc.NewSignerClient(conn, chainID, logger) + require.NoError(t, err) + + ts := time.Now() + hash := tmrand.Bytes(tmhash.Size) + valAddr := tmrand.Bytes(crypto.AddressSize) + + want := &types.Vote{ + Type: tmproto.PrecommitType, + Height: 1, + Round: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + ValidatorAddress: valAddr, + ValidatorIndex: 1, + } + + have := &types.Vote{ + Type: tmproto.PrecommitType, + Height: 1, + Round: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + ValidatorAddress: valAddr, + ValidatorIndex: 1, + } + + pbHave := have.ToProto() + + err = client.SignVote(chainID, pbHave) + require.NoError(t, err) + + pbWant := want.ToProto() + + require.NoError(t, mockPV.SignVote(chainID, pbWant)) + + assert.Equal(t, pbWant.Signature, pbHave.Signature) +} + +func TestSignerClient_SignProposal(t *testing.T) { + + ctx := context.Background() + mockPV := types.NewMockPV() + logger := log.TestingLogger() + srv, dialer := dialer(mockPV, logger) + defer srv.Stop() + + conn, err := grpc.DialContext(ctx, "", grpc.WithInsecure(), grpc.WithContextDialer(dialer)) + if err != nil { + panic(err) + } + defer conn.Close() + + client, err := tmgrpc.NewSignerClient(conn, chainID, logger) + require.NoError(t, err) + + ts := time.Now() + hash := tmrand.Bytes(tmhash.Size) + + have := &types.Proposal{ + Type: tmproto.ProposalType, + Height: 1, + Round: 2, + POLRound: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + } + want := &types.Proposal{ + Type: tmproto.ProposalType, + Height: 1, + Round: 2, + POLRound: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + } + + pbHave := have.ToProto() + + err = client.SignProposal(chainID, pbHave) + require.NoError(t, err) + + pbWant := want.ToProto() + + require.NoError(t, mockPV.SignProposal(chainID, pbWant)) + + assert.Equal(t, pbWant.Signature, pbHave.Signature) +} diff --git a/privval/grpc/server.go b/privval/grpc/server.go new file mode 100644 index 000000000..763e50aaa --- /dev/null +++ b/privval/grpc/server.go @@ -0,0 +1,87 @@ +package grpc + +import ( + context "context" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/tendermint/tendermint/crypto" + cryptoenc "github.com/tendermint/tendermint/crypto/encoding" + "github.com/tendermint/tendermint/libs/log" + privvalproto "github.com/tendermint/tendermint/proto/tendermint/privval" + "github.com/tendermint/tendermint/types" +) + +// SignerServer implements PrivValidatorAPIServer 9generated via protobuf services) +// Handles remote validator connections that provide signing services +type SignerServer struct { + logger log.Logger + chainID string + privVal types.PrivValidator +} + +func NewSignerServer(chainID string, + privVal types.PrivValidator, log log.Logger) *SignerServer { + + return &SignerServer{ + logger: log, + chainID: chainID, + privVal: privVal, + } +} + +var _ privvalproto.PrivValidatorAPIServer = (*SignerServer)(nil) + +// PubKey receives a request for the pubkey +// returns the pubkey on success and error on failure +func (ss *SignerServer) GetPubKey(ctx context.Context, req *privvalproto.PubKeyRequest) ( + *privvalproto.PubKeyResponse, error) { + var pubKey crypto.PubKey + + pubKey, err := ss.privVal.GetPubKey() + if err != nil { + return nil, status.Errorf(codes.NotFound, "error getting pubkey: %v", err) + } + + pk, err := cryptoenc.PubKeyToProto(pubKey) + if err != nil { + return nil, status.Errorf(codes.Internal, "error transitioning pubkey to proto: %v", err) + } + + ss.logger.Info("SignerServer: GetPubKey Success") + + return &privvalproto.PubKeyResponse{PubKey: pk}, nil +} + +// SignVote receives a vote sign requests, attempts to sign it +// returns SignedVoteResponse on success and error on failure +func (ss *SignerServer) SignVote(ctx context.Context, req *privvalproto.SignVoteRequest) ( + *privvalproto.SignedVoteResponse, error) { + vote := req.Vote + + err := ss.privVal.SignVote(req.ChainId, vote) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "error signing vote: %v", err) + } + + ss.logger.Info("SignerServer: SignVote Success") + + return &privvalproto.SignedVoteResponse{Vote: *vote}, nil +} + +// SignProposal receives a proposal sign requests, attempts to sign it +// returns SignedProposalResponse on success and error on failure +func (ss *SignerServer) SignProposal(ctx context.Context, req *privvalproto.SignProposalRequest) ( + *privvalproto.SignedProposalResponse, error) { + proposal := req.Proposal + + err := ss.privVal.SignProposal(req.ChainId, proposal) + if err != nil { + return nil, status.Errorf(codes.InvalidArgument, "error signing proposal: %v", err) + } + + ss.logger.Info("SignerServer: SignProposal Success") + + return &privvalproto.SignedProposalResponse{Proposal: *proposal}, nil +} diff --git a/privval/grpc/server_test.go b/privval/grpc/server_test.go new file mode 100644 index 000000000..ab728fdad --- /dev/null +++ b/privval/grpc/server_test.go @@ -0,0 +1,187 @@ +package grpc_test + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/tendermint/tendermint/crypto" + "github.com/tendermint/tendermint/crypto/tmhash" + "github.com/tendermint/tendermint/libs/log" + tmrand "github.com/tendermint/tendermint/libs/rand" + tmgrpc "github.com/tendermint/tendermint/privval/grpc" + privvalproto "github.com/tendermint/tendermint/proto/tendermint/privval" + tmproto "github.com/tendermint/tendermint/proto/tendermint/types" + "github.com/tendermint/tendermint/types" +) + +const ChainID = "123" + +func TestGetPubKey(t *testing.T) { + + testCases := []struct { + name string + pv types.PrivValidator + err bool + }{ + {name: "valid", pv: types.NewMockPV(), err: false}, + {name: "error on pubkey", pv: types.NewErroringMockPV(), err: true}, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + s := tmgrpc.NewSignerServer(ChainID, tc.pv, log.TestingLogger()) + + req := &privvalproto.PubKeyRequest{ChainId: ChainID} + resp, err := s.GetPubKey(context.Background(), req) + if tc.err { + require.Error(t, err) + } else { + pk, err := tc.pv.GetPubKey() + require.NoError(t, err) + assert.Equal(t, resp.PubKey.GetEd25519(), pk.Bytes()) + } + }) + } + +} + +func TestSignVote(t *testing.T) { + + ts := time.Now() + hash := tmrand.Bytes(tmhash.Size) + valAddr := tmrand.Bytes(crypto.AddressSize) + + testCases := []struct { + name string + pv types.PrivValidator + have, want *types.Vote + err bool + }{ + {name: "valid", pv: types.NewMockPV(), have: &types.Vote{ + Type: tmproto.PrecommitType, + Height: 1, + Round: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + ValidatorAddress: valAddr, + ValidatorIndex: 1, + }, want: &types.Vote{ + Type: tmproto.PrecommitType, + Height: 1, + Round: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + ValidatorAddress: valAddr, + ValidatorIndex: 1, + }, + err: false}, + {name: "invalid vote", pv: types.NewErroringMockPV(), have: &types.Vote{ + Type: tmproto.PrecommitType, + Height: 1, + Round: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + ValidatorAddress: valAddr, + ValidatorIndex: 1, + Signature: []byte("signed"), + }, want: &types.Vote{ + Type: tmproto.PrecommitType, + Height: 1, + Round: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + ValidatorAddress: valAddr, + ValidatorIndex: 1, + Signature: []byte("signed"), + }, + err: true}, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + s := tmgrpc.NewSignerServer(ChainID, tc.pv, log.TestingLogger()) + + req := &privvalproto.SignVoteRequest{ChainId: ChainID, Vote: tc.have.ToProto()} + resp, err := s.SignVote(context.Background(), req) + if tc.err { + require.Error(t, err) + } else { + pbVote := tc.want.ToProto() + + require.NoError(t, tc.pv.SignVote(ChainID, pbVote)) + assert.Equal(t, pbVote.Signature, resp.Vote.Signature) + } + }) + } +} + +func TestSignProposal(t *testing.T) { + + ts := time.Now() + hash := tmrand.Bytes(tmhash.Size) + + testCases := []struct { + name string + pv types.PrivValidator + have, want *types.Proposal + err bool + }{ + {name: "valid", pv: types.NewMockPV(), have: &types.Proposal{ + Type: tmproto.ProposalType, + Height: 1, + Round: 2, + POLRound: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + }, want: &types.Proposal{ + Type: tmproto.ProposalType, + Height: 1, + Round: 2, + POLRound: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + }, + err: false}, + {name: "invalid proposal", pv: types.NewErroringMockPV(), have: &types.Proposal{ + Type: tmproto.ProposalType, + Height: 1, + Round: 2, + POLRound: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + Signature: []byte("signed"), + }, want: &types.Proposal{ + Type: tmproto.ProposalType, + Height: 1, + Round: 2, + POLRound: 2, + BlockID: types.BlockID{Hash: hash, PartSetHeader: types.PartSetHeader{Hash: hash, Total: 2}}, + Timestamp: ts, + Signature: []byte("signed"), + }, + err: true}, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + s := tmgrpc.NewSignerServer(ChainID, tc.pv, log.TestingLogger()) + + req := &privvalproto.SignProposalRequest{ChainId: ChainID, Proposal: tc.have.ToProto()} + resp, err := s.SignProposal(context.Background(), req) + if tc.err { + require.Error(t, err) + } else { + pbProposal := tc.want.ToProto() + require.NoError(t, tc.pv.SignProposal(ChainID, pbProposal)) + assert.Equal(t, pbProposal.Signature, resp.Proposal.Signature) + } + }) + } +} diff --git a/privval/grpc/util.go b/privval/grpc/util.go new file mode 100644 index 000000000..66b5397b6 --- /dev/null +++ b/privval/grpc/util.go @@ -0,0 +1,82 @@ +package grpc + +import ( + "crypto/tls" + "crypto/x509" + "io/ioutil" + "os" + "time" + + grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" + "github.com/tendermint/tendermint/libs/log" + grpc "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" +) + +// DefaultDialOptions constructs a list of grpc dial options +func DefaultDialOptions( + extraOpts ...grpc.DialOption, +) []grpc.DialOption { + const ( + retries = 50 // 50 * 100ms = 5s total + timeout = 1 * time.Second + maxCallRecvMsgSize = 1 << 20 // Default 5Mb + ) + + var kacp = keepalive.ClientParameters{ + Time: 10 * time.Second, // send pings every 10 seconds if there is no activity + Timeout: 2 * time.Second, // wait 2 seconds for ping ack before considering the connection dead + } + + opts := []grpc_retry.CallOption{ + grpc_retry.WithBackoff(grpc_retry.BackoffExponential(timeout)), + } + + dialOpts := []grpc.DialOption{ + grpc.WithKeepaliveParams(kacp), + grpc.WithDefaultCallOptions( + grpc.MaxCallRecvMsgSize(maxCallRecvMsgSize), + grpc_retry.WithMax(retries), + ), + grpc.WithUnaryInterceptor( + grpc_retry.UnaryClientInterceptor(opts...), + ), + } + + dialOpts = append(dialOpts, extraOpts...) + + return dialOpts +} + +func GenerateTLS(certPath, keyPath, ca string, log log.Logger) grpc.DialOption { + certificate, err := tls.LoadX509KeyPair( + certPath, + keyPath, + ) + if err != nil { + log.Error("error", err) + os.Exit(1) + } + + certPool := x509.NewCertPool() + bs, err := ioutil.ReadFile(ca) + if err != nil { + log.Error("failed to read ca cert:", "error", err) + os.Exit(1) + } + + ok := certPool.AppendCertsFromPEM(bs) + if !ok { + log.Error("failed to append certs") + os.Exit(1) + } + + transportCreds := credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{certificate}, + RootCAs: certPool, + MinVersion: tls.VersionTLS13, + }) + + return grpc.WithTransportCredentials(transportCreds) +} diff --git a/proto/tendermint/privval/service.pb.go b/proto/tendermint/privval/service.pb.go new file mode 100644 index 000000000..a9ecd76ed --- /dev/null +++ b/proto/tendermint/privval/service.pb.go @@ -0,0 +1,199 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: tendermint/privval/service.proto + +package privval + +import ( + context "context" + fmt "fmt" + proto "github.com/gogo/protobuf/proto" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" + math "math" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package + +func init() { proto.RegisterFile("tendermint/privval/service.proto", fileDescriptor_7afe74f9f46d3dc9) } + +var fileDescriptor_7afe74f9f46d3dc9 = []byte{ + // 251 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x52, 0x28, 0x49, 0xcd, 0x4b, + 0x49, 0x2d, 0xca, 0xcd, 0xcc, 0x2b, 0xd1, 0x2f, 0x28, 0xca, 0x2c, 0x2b, 0x4b, 0xcc, 0xd1, 0x2f, + 0x4e, 0x2d, 0x2a, 0xcb, 0x4c, 0x4e, 0xd5, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x12, 0x42, 0xa8, + 0xd0, 0x83, 0xaa, 0x90, 0x92, 0xc3, 0xa2, 0xab, 0xa4, 0xb2, 0x20, 0xb5, 0x18, 0xa2, 0xc7, 0x68, + 0x09, 0x13, 0x97, 0x40, 0x40, 0x51, 0x66, 0x59, 0x58, 0x62, 0x4e, 0x66, 0x4a, 0x62, 0x49, 0x7e, + 0x91, 0x63, 0x80, 0xa7, 0x50, 0x10, 0x17, 0xa7, 0x7b, 0x6a, 0x49, 0x40, 0x69, 0x92, 0x77, 0x6a, + 0xa5, 0x90, 0xa2, 0x1e, 0xa6, 0xb1, 0x7a, 0x10, 0xb9, 0xa0, 0xd4, 0xc2, 0xd2, 0xd4, 0xe2, 0x12, + 0x29, 0x25, 0x7c, 0x4a, 0x8a, 0x0b, 0xf2, 0xf3, 0x8a, 0x53, 0x85, 0xc2, 0xb9, 0x38, 0x82, 0x33, + 0xd3, 0xf3, 0xc2, 0xf2, 0x4b, 0x52, 0x85, 0x94, 0xb1, 0xa9, 0x87, 0xc9, 0xc2, 0x0c, 0x55, 0xc3, + 0xa5, 0x28, 0x35, 0x05, 0xa2, 0x0c, 0x6a, 0x70, 0x32, 0x17, 0x0f, 0x48, 0x34, 0xa0, 0x28, 0xbf, + 0x20, 0xbf, 0x38, 0x31, 0x47, 0x48, 0x1d, 0x97, 0x3e, 0x98, 0x0a, 0x98, 0x05, 0x5a, 0xb8, 0x2d, + 0x40, 0x28, 0x85, 0x58, 0xe2, 0x14, 0x7c, 0xe2, 0x91, 0x1c, 0xe3, 0x85, 0x47, 0x72, 0x8c, 0x0f, + 0x1e, 0xc9, 0x31, 0x4e, 0x78, 0x2c, 0xc7, 0x70, 0xe1, 0xb1, 0x1c, 0xc3, 0x8d, 0xc7, 0x72, 0x0c, + 0x51, 0x96, 0xe9, 0x99, 0x25, 0x19, 0xa5, 0x49, 0x7a, 0xc9, 0xf9, 0xb9, 0xfa, 0x48, 0x61, 0x8d, + 0x12, 0xec, 0xf9, 0x25, 0xf9, 0xfa, 0x98, 0xf1, 0x90, 0xc4, 0x06, 0x96, 0x31, 0x06, 0x04, 0x00, + 0x00, 0xff, 0xff, 0x42, 0x60, 0x24, 0x48, 0xda, 0x01, 0x00, 0x00, +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// PrivValidatorAPIClient is the client API for PrivValidatorAPI service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type PrivValidatorAPIClient interface { + GetPubKey(ctx context.Context, in *PubKeyRequest, opts ...grpc.CallOption) (*PubKeyResponse, error) + SignVote(ctx context.Context, in *SignVoteRequest, opts ...grpc.CallOption) (*SignedVoteResponse, error) + SignProposal(ctx context.Context, in *SignProposalRequest, opts ...grpc.CallOption) (*SignedProposalResponse, error) +} + +type privValidatorAPIClient struct { + cc *grpc.ClientConn +} + +func NewPrivValidatorAPIClient(cc *grpc.ClientConn) PrivValidatorAPIClient { + return &privValidatorAPIClient{cc} +} + +func (c *privValidatorAPIClient) GetPubKey(ctx context.Context, in *PubKeyRequest, opts ...grpc.CallOption) (*PubKeyResponse, error) { + out := new(PubKeyResponse) + err := c.cc.Invoke(ctx, "/tendermint.privval.PrivValidatorAPI/GetPubKey", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *privValidatorAPIClient) SignVote(ctx context.Context, in *SignVoteRequest, opts ...grpc.CallOption) (*SignedVoteResponse, error) { + out := new(SignedVoteResponse) + err := c.cc.Invoke(ctx, "/tendermint.privval.PrivValidatorAPI/SignVote", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *privValidatorAPIClient) SignProposal(ctx context.Context, in *SignProposalRequest, opts ...grpc.CallOption) (*SignedProposalResponse, error) { + out := new(SignedProposalResponse) + err := c.cc.Invoke(ctx, "/tendermint.privval.PrivValidatorAPI/SignProposal", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// PrivValidatorAPIServer is the server API for PrivValidatorAPI service. +type PrivValidatorAPIServer interface { + GetPubKey(context.Context, *PubKeyRequest) (*PubKeyResponse, error) + SignVote(context.Context, *SignVoteRequest) (*SignedVoteResponse, error) + SignProposal(context.Context, *SignProposalRequest) (*SignedProposalResponse, error) +} + +// UnimplementedPrivValidatorAPIServer can be embedded to have forward compatible implementations. +type UnimplementedPrivValidatorAPIServer struct { +} + +func (*UnimplementedPrivValidatorAPIServer) GetPubKey(ctx context.Context, req *PubKeyRequest) (*PubKeyResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetPubKey not implemented") +} +func (*UnimplementedPrivValidatorAPIServer) SignVote(ctx context.Context, req *SignVoteRequest) (*SignedVoteResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method SignVote not implemented") +} +func (*UnimplementedPrivValidatorAPIServer) SignProposal(ctx context.Context, req *SignProposalRequest) (*SignedProposalResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method SignProposal not implemented") +} + +func RegisterPrivValidatorAPIServer(s *grpc.Server, srv PrivValidatorAPIServer) { + s.RegisterService(&_PrivValidatorAPI_serviceDesc, srv) +} + +func _PrivValidatorAPI_GetPubKey_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PubKeyRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(PrivValidatorAPIServer).GetPubKey(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/tendermint.privval.PrivValidatorAPI/GetPubKey", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(PrivValidatorAPIServer).GetPubKey(ctx, req.(*PubKeyRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _PrivValidatorAPI_SignVote_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(SignVoteRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(PrivValidatorAPIServer).SignVote(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/tendermint.privval.PrivValidatorAPI/SignVote", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(PrivValidatorAPIServer).SignVote(ctx, req.(*SignVoteRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _PrivValidatorAPI_SignProposal_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(SignProposalRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(PrivValidatorAPIServer).SignProposal(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/tendermint.privval.PrivValidatorAPI/SignProposal", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(PrivValidatorAPIServer).SignProposal(ctx, req.(*SignProposalRequest)) + } + return interceptor(ctx, in, info, handler) +} + +var _PrivValidatorAPI_serviceDesc = grpc.ServiceDesc{ + ServiceName: "tendermint.privval.PrivValidatorAPI", + HandlerType: (*PrivValidatorAPIServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "GetPubKey", + Handler: _PrivValidatorAPI_GetPubKey_Handler, + }, + { + MethodName: "SignVote", + Handler: _PrivValidatorAPI_SignVote_Handler, + }, + { + MethodName: "SignProposal", + Handler: _PrivValidatorAPI_SignProposal_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "tendermint/privval/service.proto", +} diff --git a/proto/tendermint/privval/service.proto b/proto/tendermint/privval/service.proto new file mode 100644 index 000000000..63e9afca7 --- /dev/null +++ b/proto/tendermint/privval/service.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; +package tendermint.privval; +option go_package = "github.com/tendermint/tendermint/proto/tendermint/privval"; + +import "tendermint/privval/types.proto"; + +//---------------------------------------- +// Service Definition + +service PrivValidatorAPI { + rpc GetPubKey(PubKeyRequest) returns (PubKeyResponse); + rpc SignVote(SignVoteRequest) returns (SignedVoteResponse); + rpc SignProposal(SignProposalRequest) returns (SignedProposalResponse); +} diff --git a/test/e2e/app/main.go b/test/e2e/app/main.go index 34d99b909..e641f0e8c 100644 --- a/test/e2e/app/main.go +++ b/test/e2e/app/main.go @@ -3,12 +3,14 @@ package main import ( "errors" "fmt" + "net" "os" "path/filepath" "strconv" "time" "github.com/spf13/viper" + "google.golang.org/grpc" "github.com/tendermint/tendermint/abci/server" "github.com/tendermint/tendermint/config" @@ -19,6 +21,8 @@ import ( "github.com/tendermint/tendermint/node" "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/privval" + grpcprivval "github.com/tendermint/tendermint/privval/grpc" + privvalproto "github.com/tendermint/tendermint/proto/tendermint/privval" "github.com/tendermint/tendermint/proxy" mcs "github.com/tendermint/tendermint/test/maverick/consensus" maverick "github.com/tendermint/tendermint/test/maverick/node" @@ -182,6 +186,24 @@ func startSigner(cfg *Config) error { dialFn = privval.DialTCPFn(address, 3*time.Second, ed25519.GenPrivKey()) case "unix": dialFn = privval.DialUnixFn(address) + case "grpc": + lis, err := net.Listen("tcp", address) + if err != nil { + return err + } + ss := grpcprivval.NewSignerServer(cfg.ChainID, filePV, logger) + + s := grpc.NewServer() + + privvalproto.RegisterPrivValidatorAPIServer(s, ss) + + go func() { // no need to clean up since we remove docker containers + if err := s.Serve(lis); err != nil { + panic(err) + } + }() + + return nil default: return fmt.Errorf("invalid privval protocol %q", protocol) } @@ -193,6 +215,7 @@ func startSigner(cfg *Config) error { if err != nil { return err } + logger.Info(fmt.Sprintf("Remote signer connecting to %v", cfg.PrivValServer)) return nil } diff --git a/test/e2e/generator/generate.go b/test/e2e/generator/generate.go index de6aea731..42cb3d1a5 100644 --- a/test/e2e/generator/generate.go +++ b/test/e2e/generator/generate.go @@ -30,7 +30,7 @@ var ( nodeDatabases = uniformChoice{"goleveldb", "cleveldb", "rocksdb", "boltdb", "badgerdb"} // FIXME: grpc disabled due to https://github.com/tendermint/tendermint/issues/5439 nodeABCIProtocols = uniformChoice{"unix", "tcp", "builtin"} // "grpc" - nodePrivvalProtocols = uniformChoice{"file", "unix", "tcp"} + nodePrivvalProtocols = uniformChoice{"file", "unix", "tcp", "grpc"} // FIXME: v2 disabled due to flake nodeFastSyncs = uniformChoice{"", "v0"} // "v2" nodeStateSyncs = uniformChoice{false, true} diff --git a/test/e2e/networks/ci.toml b/test/e2e/networks/ci.toml index d0dc9db6d..e27092040 100644 --- a/test/e2e/networks/ci.toml +++ b/test/e2e/networks/ci.toml @@ -51,7 +51,7 @@ seeds = ["seed01"] database = "badgerdb" # FIXME: should be grpc, disabled due to https://github.com/tendermint/tendermint/issues/5439 #abci_protocol = "grpc" -privval_protocol = "unix" +privval_protocol = "grpc" persist_interval = 3 retain_blocks = 3 perturb = ["kill"] diff --git a/test/e2e/pkg/manifest.go b/test/e2e/pkg/manifest.go index e94fd0790..492e362d8 100644 --- a/test/e2e/pkg/manifest.go +++ b/test/e2e/pkg/manifest.go @@ -78,8 +78,9 @@ type ManifestNode struct { ABCIProtocol string `toml:"abci_protocol"` // PrivvalProtocol specifies the protocol used to sign consensus messages: - // "file", "unix", or "tcp". Defaults to "file". For unix and tcp, the ABCI + // "file", "unix", "tcp", or "grpc". Defaults to "file". For tcp and unix, the ABCI // application will launch a remote signer client in a separate goroutine. + // For grpc the ABCI application will launch a remote signer server. // Only nodes with mode=validator will actually make use of this. PrivvalProtocol string `toml:"privval_protocol"` diff --git a/test/e2e/pkg/testnet.go b/test/e2e/pkg/testnet.go index eaae577b1..da0890b00 100644 --- a/test/e2e/pkg/testnet.go +++ b/test/e2e/pkg/testnet.go @@ -322,7 +322,7 @@ func (n Node) Validate(testnet Testnet) error { return fmt.Errorf("invalid ABCI protocol setting %q", n.ABCIProtocol) } switch n.PrivvalProtocol { - case ProtocolFile, ProtocolUNIX, ProtocolTCP: + case ProtocolFile, ProtocolTCP, ProtocolGRPC, ProtocolUNIX: default: return fmt.Errorf("invalid privval protocol setting %q", n.PrivvalProtocol) } diff --git a/test/e2e/runner/setup.go b/test/e2e/runner/setup.go index c0dd893d7..ceb7f29d9 100644 --- a/test/e2e/runner/setup.go +++ b/test/e2e/runner/setup.go @@ -32,6 +32,7 @@ const ( AppAddressUNIX = "unix:///var/run/app.sock" PrivvalAddressTCP = "tcp://0.0.0.0:27559" + PrivvalAddressGRPC = "grpc://0.0.0.0:27559" PrivvalAddressUNIX = "unix:///var/run/privval.sock" PrivvalKeyFile = "config/priv_validator_key.json" PrivvalStateFile = "data/priv_validator_state.json" @@ -265,6 +266,8 @@ func MakeConfig(node *e2e.Node) (*config.Config, error) { cfg.PrivValidatorListenAddr = PrivvalAddressUNIX case e2e.ProtocolTCP: cfg.PrivValidatorListenAddr = PrivvalAddressTCP + case e2e.ProtocolGRPC: + cfg.PrivValidatorListenAddr = PrivvalAddressGRPC default: return nil, fmt.Errorf("invalid privval protocol setting %q", node.PrivvalProtocol) } @@ -351,6 +354,10 @@ func MakeAppConfig(node *e2e.Node) ([]byte, error) { cfg["privval_server"] = PrivvalAddressUNIX cfg["privval_key"] = PrivvalKeyFile cfg["privval_state"] = PrivvalStateFile + case e2e.ProtocolGRPC: + cfg["privval_server"] = PrivvalAddressGRPC + cfg["privval_key"] = PrivvalKeyFile + cfg["privval_state"] = PrivvalStateFile default: return nil, fmt.Errorf("unexpected privval protocol setting %q", node.PrivvalProtocol) } diff --git a/types/priv_validator.go b/types/priv_validator.go index 49211773a..3ce02511a 100644 --- a/types/priv_validator.go +++ b/types/priv_validator.go @@ -128,6 +128,11 @@ type ErroringMockPV struct { var ErroringMockPVErr = errors.New("erroringMockPV always returns an error") +// Implements PrivValidator. +func (pv *ErroringMockPV) GetPubKey() (crypto.PubKey, error) { + return nil, ErroringMockPVErr +} + // Implements PrivValidator. func (pv *ErroringMockPV) SignVote(chainID string, vote *tmproto.Vote) error { return ErroringMockPVErr From 66ba12d9bce6c0b8109e7100db6f39108f286359 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 8 Jan 2021 12:06:51 +0100 Subject: [PATCH 2/7] test/e2e: tolerate up to 2/3 missed signatures for a validator (#5878) E2E tests often fail due to fast sync stalls causing the validator to miss signing blocks. This increases the tolerance for missed signatures to 2/3 to allow validators to spend more time starting up. --- test/e2e/tests/validator_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/tests/validator_test.go b/test/e2e/tests/validator_test.go index 8a36bb55d..f358296f2 100644 --- a/test/e2e/tests/validator_test.go +++ b/test/e2e/tests/validator_test.go @@ -115,8 +115,8 @@ func TestValidator_Sign(t *testing.T) { } require.False(t, signCount == 0 && expectCount > 0, - "node did not sign any blocks (expected %v)", expectCount) - require.Less(t, float64(expectCount-signCount)/float64(expectCount), 0.5, + "validator did not sign any blocks (expected %v)", expectCount) + require.Less(t, float64(expectCount-signCount)/float64(expectCount), 0.33, "validator missed signing too many blocks (signed %v out of %v)", signCount, expectCount) }) } From 385ea1db7d718e9c3be43055671f7397b78906be Mon Sep 17 00:00:00 2001 From: Callum Waters Date: Fri, 8 Jan 2021 13:12:54 +0100 Subject: [PATCH 3/7] store: use db iterators for pruning and range-based queries (#5848) --- CHANGELOG_PENDING.md | 3 +- cmd/tendermint/commands/light.go | 8 +- light/client_benchmark_test.go | 6 +- light/client_test.go | 42 ++-- light/detector_test.go | 10 +- light/example_test.go | 4 +- light/store/db/db.go | 66 +++--- light/store/db/db_test.go | 10 +- proto/tendermint/store/types.pb.go | 337 ---------------------------- proto/tendermint/store/types.proto | 9 - statesync/stateprovider.go | 2 +- store/store.go | 342 ++++++++++++++++------------- store/store_test.go | 77 +------ 13 files changed, 269 insertions(+), 647 deletions(-) delete mode 100644 proto/tendermint/store/types.pb.go delete mode 100644 proto/tendermint/store/types.proto diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index a3126d1f5..909dd8bdc 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -18,7 +18,7 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi - [config] \#5598 The `test_fuzz` and `test_fuzz_config` P2P settings have been removed. (@erikgrinaker) - [config] \#5728 `fast_sync = "v1"` is no longer supported (@melekes) - [cli] \#5772 `gen_node_key` prints JSON-encoded `NodeKey` rather than ID and does not save it to `node_key.json` (@melekes) - - [cli] \#5777 use hypen-case instead of snake_case for all cli comamnds and config parameters + - [cli] \#5777 use hyphen-case instead of snake_case for all cli commands and config parameters (@cmwaters) - Apps - [ABCI] \#5447 Remove `SetOption` method from `ABCI.Client` interface @@ -35,6 +35,7 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi - [proto/p2p] Rename `NodeInfo.default_node_id` to `node_id` (@erikgrinaker) - [libs/os] `EnsureDir` now propagates IO errors and checks the file type (@erikgrinaker) - [libs/os] Kill() and {Must,}{Read,Write}File() functions have been removed. (@alessio) + - [store] \#5848 Remove block store state in favor of using the db iterators directly (@cmwaters) - Blockchain Protocol diff --git a/cmd/tendermint/commands/light.go b/cmd/tendermint/commands/light.go index dcf89fc6d..80d8f0750 100644 --- a/cmd/tendermint/commands/light.go +++ b/cmd/tendermint/commands/light.go @@ -122,10 +122,12 @@ func runProxy(cmd *cobra.Command, args []string) error { witnessesAddrs = strings.Split(witnessAddrsJoined, ",") } - db, err := dbm.NewGoLevelDB("light-client-db", dir) + lightDB, err := dbm.NewGoLevelDB("light-client-db", dir) if err != nil { return fmt.Errorf("can't create a db: %w", err) } + // create a prefixed db on the chainID + db := dbm.NewPrefixDB(lightDB, []byte(chainID)) if primaryAddr == "" { // check to see if we can start from an existing state var err error @@ -187,7 +189,7 @@ func runProxy(cmd *cobra.Command, args []string) error { }, primaryAddr, witnessesAddrs, - dbs.New(db, chainID), + dbs.New(db), options..., ) } else { // continue from latest state @@ -196,7 +198,7 @@ func runProxy(cmd *cobra.Command, args []string) error { trustingPeriod, primaryAddr, witnessesAddrs, - dbs.New(db, chainID), + dbs.New(db), options..., ) } diff --git a/light/client_benchmark_test.go b/light/client_benchmark_test.go index eb02686b8..72930928d 100644 --- a/light/client_benchmark_test.go +++ b/light/client_benchmark_test.go @@ -37,7 +37,7 @@ func BenchmarkSequence(b *testing.B) { }, benchmarkFullNode, []provider.Provider{benchmarkFullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.SequentialVerification(), ) @@ -65,7 +65,7 @@ func BenchmarkBisection(b *testing.B) { }, benchmarkFullNode, []provider.Provider{benchmarkFullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) if err != nil { @@ -93,7 +93,7 @@ func BenchmarkBackwards(b *testing.B) { }, benchmarkFullNode, []provider.Provider{benchmarkFullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) if err != nil { diff --git a/light/client_test.go b/light/client_test.go index 13d550de7..7e375ade3 100644 --- a/light/client_test.go +++ b/light/client_test.go @@ -231,7 +231,7 @@ func TestClient_SequentialVerification(t *testing.T) { tc.otherHeaders, tc.vals, )}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.SequentialVerification(), light.Logger(log.TestingLogger()), ) @@ -356,7 +356,7 @@ func TestClient_SkippingVerification(t *testing.T) { tc.otherHeaders, tc.vals, )}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.SkippingVerification(light.DefaultTrustLevel), light.Logger(log.TestingLogger()), ) @@ -394,7 +394,7 @@ func TestClientLargeBisectionVerification(t *testing.T) { }, veryLargeFullNode, []provider.Provider{veryLargeFullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.SkippingVerification(light.DefaultTrustLevel), ) require.NoError(t, err) @@ -416,7 +416,7 @@ func TestClientBisectionBetweenTrustedHeaders(t *testing.T) { }, fullNode, []provider.Provider{fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.SkippingVerification(light.DefaultTrustLevel), ) require.NoError(t, err) @@ -440,7 +440,7 @@ func TestClient_Cleanup(t *testing.T) { trustOptions, fullNode, []provider.Provider{fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) require.NoError(t, err) @@ -460,7 +460,7 @@ func TestClient_Cleanup(t *testing.T) { func TestClientRestoresTrustedHeaderAfterStartup1(t *testing.T) { // 1. options.Hash == trustedHeader.Hash { - trustedStore := dbs.New(dbm.NewMemDB(), chainID) + trustedStore := dbs.New(dbm.NewMemDB()) err := trustedStore.SaveLightBlock(l1) require.NoError(t, err) @@ -484,7 +484,7 @@ func TestClientRestoresTrustedHeaderAfterStartup1(t *testing.T) { // 2. options.Hash != trustedHeader.Hash { - trustedStore := dbs.New(dbm.NewMemDB(), chainID) + trustedStore := dbs.New(dbm.NewMemDB()) err := trustedStore.SaveLightBlock(l1) require.NoError(t, err) @@ -529,7 +529,7 @@ func TestClientRestoresTrustedHeaderAfterStartup1(t *testing.T) { func TestClientRestoresTrustedHeaderAfterStartup2(t *testing.T) { // 1. options.Hash == trustedHeader.Hash { - trustedStore := dbs.New(dbm.NewMemDB(), chainID) + trustedStore := dbs.New(dbm.NewMemDB()) err := trustedStore.SaveLightBlock(l1) require.NoError(t, err) @@ -559,7 +559,7 @@ func TestClientRestoresTrustedHeaderAfterStartup2(t *testing.T) { // 2. options.Hash != trustedHeader.Hash // This could happen if previous provider was lying to us. { - trustedStore := dbs.New(dbm.NewMemDB(), chainID) + trustedStore := dbs.New(dbm.NewMemDB()) err := trustedStore.SaveLightBlock(l1) require.NoError(t, err) @@ -606,7 +606,7 @@ func TestClientRestoresTrustedHeaderAfterStartup3(t *testing.T) { // 1. options.Hash == trustedHeader.Hash { // load the first three headers into the trusted store - trustedStore := dbs.New(dbm.NewMemDB(), chainID) + trustedStore := dbs.New(dbm.NewMemDB()) err := trustedStore.SaveLightBlock(l1) require.NoError(t, err) @@ -644,7 +644,7 @@ func TestClientRestoresTrustedHeaderAfterStartup3(t *testing.T) { // 2. options.Hash != trustedHeader.Hash // This could happen if previous provider was lying to us. { - trustedStore := dbs.New(dbm.NewMemDB(), chainID) + trustedStore := dbs.New(dbm.NewMemDB()) err := trustedStore.SaveLightBlock(l1) require.NoError(t, err) @@ -704,7 +704,7 @@ func TestClient_Update(t *testing.T) { trustOptions, fullNode, []provider.Provider{fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) require.NoError(t, err) @@ -725,7 +725,7 @@ func TestClient_Concurrency(t *testing.T) { trustOptions, fullNode, []provider.Provider{fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) require.NoError(t, err) @@ -766,7 +766,7 @@ func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) { trustOptions, deadNode, []provider.Provider{fullNode, fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), ) @@ -792,7 +792,7 @@ func TestClient_BackwardsVerification(t *testing.T) { }, largeFullNode, []provider.Provider{largeFullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) require.NoError(t, err) @@ -874,7 +874,7 @@ func TestClient_BackwardsVerification(t *testing.T) { }, tc.provider, []provider.Provider{tc.provider}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) require.NoError(t, err, idx) @@ -887,7 +887,7 @@ func TestClient_BackwardsVerification(t *testing.T) { func TestClient_NewClientFromTrustedStore(t *testing.T) { // 1) Initiate DB and fill with a "trusted" header - db := dbs.New(dbm.NewMemDB(), chainID) + db := dbs.New(dbm.NewMemDB()) err := db.SaveLightBlock(l1) require.NoError(t, err) @@ -944,7 +944,7 @@ func TestClientRemovesWitnessIfItSendsUsIncorrectHeader(t *testing.T) { trustOptions, fullNode, []provider.Provider{badProvider1, badProvider2}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), ) @@ -994,7 +994,7 @@ func TestClient_TrustedValidatorSet(t *testing.T) { trustOptions, fullNode, []provider.Provider{badValSetNode, fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), ) require.NoError(t, err) @@ -1012,7 +1012,7 @@ func TestClientPrunesHeadersAndValidatorSets(t *testing.T) { trustOptions, fullNode, []provider.Provider{fullNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.PruningSize(1), ) @@ -1085,7 +1085,7 @@ func TestClientEnsureValidHeadersAndValSets(t *testing.T) { trustOptions, badNode, []provider.Provider{badNode, badNode}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.MaxRetryAttempts(1), ) require.NoError(t, err) diff --git a/light/detector_test.go b/light/detector_test.go index bcf494159..04d3c3377 100644 --- a/light/detector_test.go +++ b/light/detector_test.go @@ -54,7 +54,7 @@ func TestLightClientAttackEvidence_Lunatic(t *testing.T) { }, primary, []provider.Provider{witness}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), ) @@ -136,7 +136,7 @@ func TestLightClientAttackEvidence_Equivocation(t *testing.T) { }, primary, []provider.Provider{witness}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), verificationOption, @@ -191,7 +191,7 @@ func TestClientDivergentTraces1(t *testing.T) { }, primary, []provider.Provider{witness}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), ) @@ -215,7 +215,7 @@ func TestClientDivergentTraces2(t *testing.T) { }, primary, []provider.Provider{deadNode, deadNode, primary}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), ) @@ -250,7 +250,7 @@ func TestClientDivergentTraces3(t *testing.T) { }, primary, []provider.Provider{witness}, - dbs.New(dbm.NewMemDB(), chainID), + dbs.New(dbm.NewMemDB()), light.Logger(log.TestingLogger()), light.MaxRetryAttempts(1), ) diff --git a/light/example_test.go b/light/example_test.go index b599778b8..1f67402fa 100644 --- a/light/example_test.go +++ b/light/example_test.go @@ -61,7 +61,7 @@ func ExampleClient_Update() { }, primary, []provider.Provider{primary}, // NOTE: primary should not be used here - dbs.New(db, chainID), + dbs.New(db), light.Logger(log.TestingLogger()), ) if err != nil { @@ -129,7 +129,7 @@ func ExampleClient_VerifyLightBlockAtHeight() { }, primary, []provider.Provider{primary}, // NOTE: primary should not be used here - dbs.New(db, chainID), + dbs.New(db), light.Logger(log.TestingLogger()), ) if err != nil { diff --git a/light/store/db/db.go b/light/store/db/db.go index 25991f476..fb9e04cce 100644 --- a/light/store/db/db.go +++ b/light/store/db/db.go @@ -19,18 +19,17 @@ const ( ) type dbs struct { - db dbm.DB - prefix string + db dbm.DB mtx tmsync.RWMutex size uint16 } -// New returns a Store that wraps any DB (with an optional prefix in case you -// want to use one DB with many light clients). -func New(db dbm.DB, prefix string) store.Store { +// New returns a Store that wraps any DB +// If you want to share one DB across many light clients consider using PrefixDB +func New(db dbm.DB) store.Store { - lightStore := &dbs{db: db, prefix: prefix} + lightStore := &dbs{db: db} // retrieve the size of the db size := uint16(0) @@ -197,11 +196,17 @@ func (s *dbs) LightBlockBefore(height int64) (*types.LightBlock, error) { defer itr.Close() if itr.Valid() { - existingHeight, err := s.decodeLbKey(itr.Key()) + var lbpb tmproto.LightBlock + err = lbpb.Unmarshal(itr.Value()) if err != nil { - return nil, err + return nil, fmt.Errorf("unmarshal error: %w", err) } - return s.LightBlock(existingHeight) + + lightBlock, err := types.LightBlockFromProto(&lbpb) + if err != nil { + return nil, fmt.Errorf("proto conversion error: %w", err) + } + return lightBlock, nil } if err = itr.Error(); err != nil { return nil, err @@ -238,39 +243,32 @@ func (s *dbs) Prune(size uint16) error { b := s.db.NewBatch() defer b.Close() - pruned := 0 for itr.Valid() && numToPrune > 0 { - key := itr.Key() - height, err := s.decodeLbKey(key) - if err != nil { - return err - } - if err = b.Delete(s.lbKey(height)); err != nil { + if err = b.Delete(itr.Key()); err != nil { return err } itr.Next() numToPrune-- - pruned++ } if err = itr.Error(); err != nil { return err } + // 3) // update size + s.mtx.Lock() + s.size = size + s.mtx.Unlock() + + if wErr := b.Set(s.sizeKey(), marshalSize(size)); wErr != nil { + return fmt.Errorf("failed to persist size: %w", wErr) + } + + // 4) write batch deletion to disk err = b.WriteSync() if err != nil { return err } - // 3) Update size. - s.mtx.Lock() - defer s.mtx.Unlock() - - s.size -= uint16(pruned) - - if wErr := s.db.SetSync(s.sizeKey(), marshalSize(size)); wErr != nil { - return fmt.Errorf("failed to persist size: %w", wErr) - } - return nil } @@ -284,7 +282,7 @@ func (s *dbs) Size() uint16 { } func (s *dbs) sizeKey() []byte { - key, err := orderedcode.Append(nil, s.prefix, prefixSize) + key, err := orderedcode.Append(nil, prefixSize) if err != nil { panic(err) } @@ -292,7 +290,7 @@ func (s *dbs) sizeKey() []byte { } func (s *dbs) lbKey(height int64) []byte { - key, err := orderedcode.Append(nil, s.prefix, prefixLightBlock, height) + key, err := orderedcode.Append(nil, prefixLightBlock, height) if err != nil { panic(err) } @@ -300,11 +298,8 @@ func (s *dbs) lbKey(height int64) []byte { } func (s *dbs) decodeLbKey(key []byte) (height int64, err error) { - var ( - dbPrefix string - lightBlockPrefix int64 - ) - remaining, err := orderedcode.Parse(string(key), &dbPrefix, &lightBlockPrefix, &height) + var lightBlockPrefix int64 + remaining, err := orderedcode.Parse(string(key), &lightBlockPrefix, &height) if err != nil { err = fmt.Errorf("failed to parse light block key: %w", err) } @@ -314,9 +309,6 @@ func (s *dbs) decodeLbKey(key []byte) (height int64, err error) { if lightBlockPrefix != prefixLightBlock { err = fmt.Errorf("expected light block prefix but got: %d", lightBlockPrefix) } - if dbPrefix != s.prefix { - err = fmt.Errorf("parsed key has a different prefix. Expected: %s, got: %s", s.prefix, dbPrefix) - } return } diff --git a/light/store/db/db_test.go b/light/store/db/db_test.go index 1912baca3..d0dbe2896 100644 --- a/light/store/db/db_test.go +++ b/light/store/db/db_test.go @@ -19,7 +19,7 @@ import ( ) func TestLast_FirstLightBlockHeight(t *testing.T) { - dbStore := New(dbm.NewMemDB(), "TestLast_FirstLightBlockHeight") + dbStore := New(dbm.NewMemDB()) // Empty store height, err := dbStore.LastLightBlockHeight() @@ -44,7 +44,7 @@ func TestLast_FirstLightBlockHeight(t *testing.T) { } func Test_SaveLightBlock(t *testing.T) { - dbStore := New(dbm.NewMemDB(), "Test_SaveLightBlockAndValidatorSet") + dbStore := New(dbm.NewMemDB()) // Empty store h, err := dbStore.LightBlock(1) @@ -74,7 +74,7 @@ func Test_SaveLightBlock(t *testing.T) { } func Test_LightBlockBefore(t *testing.T) { - dbStore := New(dbm.NewMemDB(), "Test_LightBlockBefore") + dbStore := New(dbm.NewMemDB()) assert.Panics(t, func() { _, _ = dbStore.LightBlockBefore(0) @@ -95,7 +95,7 @@ func Test_LightBlockBefore(t *testing.T) { } func Test_Prune(t *testing.T) { - dbStore := New(dbm.NewMemDB(), "Test_Prune") + dbStore := New(dbm.NewMemDB()) // Empty store assert.EqualValues(t, 0, dbStore.Size()) @@ -132,7 +132,7 @@ func Test_Prune(t *testing.T) { } func Test_Concurrency(t *testing.T) { - dbStore := New(dbm.NewMemDB(), "Test_Prune") + dbStore := New(dbm.NewMemDB()) var wg sync.WaitGroup for i := 1; i <= 100; i++ { diff --git a/proto/tendermint/store/types.pb.go b/proto/tendermint/store/types.pb.go deleted file mode 100644 index c18c88fa4..000000000 --- a/proto/tendermint/store/types.pb.go +++ /dev/null @@ -1,337 +0,0 @@ -// Code generated by protoc-gen-gogo. DO NOT EDIT. -// source: tendermint/store/types.proto - -package store - -import ( - fmt "fmt" - proto "github.com/gogo/protobuf/proto" - io "io" - math "math" - math_bits "math/bits" -) - -// Reference imports to suppress errors if they are not otherwise used. -var _ = proto.Marshal -var _ = fmt.Errorf -var _ = math.Inf - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the proto package it is being compiled against. -// A compilation error at this line likely means your copy of the -// proto package needs to be updated. -const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package - -type BlockStoreState struct { - Base int64 `protobuf:"varint,1,opt,name=base,proto3" json:"base,omitempty"` - Height int64 `protobuf:"varint,2,opt,name=height,proto3" json:"height,omitempty"` -} - -func (m *BlockStoreState) Reset() { *m = BlockStoreState{} } -func (m *BlockStoreState) String() string { return proto.CompactTextString(m) } -func (*BlockStoreState) ProtoMessage() {} -func (*BlockStoreState) Descriptor() ([]byte, []int) { - return fileDescriptor_ff9e53a0a74267f7, []int{0} -} -func (m *BlockStoreState) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *BlockStoreState) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_BlockStoreState.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *BlockStoreState) XXX_Merge(src proto.Message) { - xxx_messageInfo_BlockStoreState.Merge(m, src) -} -func (m *BlockStoreState) XXX_Size() int { - return m.Size() -} -func (m *BlockStoreState) XXX_DiscardUnknown() { - xxx_messageInfo_BlockStoreState.DiscardUnknown(m) -} - -var xxx_messageInfo_BlockStoreState proto.InternalMessageInfo - -func (m *BlockStoreState) GetBase() int64 { - if m != nil { - return m.Base - } - return 0 -} - -func (m *BlockStoreState) GetHeight() int64 { - if m != nil { - return m.Height - } - return 0 -} - -func init() { - proto.RegisterType((*BlockStoreState)(nil), "tendermint.store.BlockStoreState") -} - -func init() { proto.RegisterFile("tendermint/store/types.proto", fileDescriptor_ff9e53a0a74267f7) } - -var fileDescriptor_ff9e53a0a74267f7 = []byte{ - // 165 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x92, 0x29, 0x49, 0xcd, 0x4b, - 0x49, 0x2d, 0xca, 0xcd, 0xcc, 0x2b, 0xd1, 0x2f, 0x2e, 0xc9, 0x2f, 0x4a, 0xd5, 0x2f, 0xa9, 0x2c, - 0x48, 0x2d, 0xd6, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x12, 0x40, 0xc8, 0xea, 0x81, 0x65, 0x95, - 0x6c, 0xb9, 0xf8, 0x9d, 0x72, 0xf2, 0x93, 0xb3, 0x83, 0x41, 0xbc, 0xe0, 0x92, 0xc4, 0x92, 0x54, - 0x21, 0x21, 0x2e, 0x96, 0xa4, 0xc4, 0xe2, 0x54, 0x09, 0x46, 0x05, 0x46, 0x0d, 0xe6, 0x20, 0x30, - 0x5b, 0x48, 0x8c, 0x8b, 0x2d, 0x23, 0x35, 0x33, 0x3d, 0xa3, 0x44, 0x82, 0x09, 0x2c, 0x0a, 0xe5, - 0x39, 0x05, 0x9e, 0x78, 0x24, 0xc7, 0x78, 0xe1, 0x91, 0x1c, 0xe3, 0x83, 0x47, 0x72, 0x8c, 0x13, - 0x1e, 0xcb, 0x31, 0x5c, 0x78, 0x2c, 0xc7, 0x70, 0xe3, 0xb1, 0x1c, 0x43, 0x94, 0x79, 0x7a, 0x66, - 0x49, 0x46, 0x69, 0x92, 0x5e, 0x72, 0x7e, 0xae, 0x3e, 0x92, 0x9b, 0x90, 0x98, 0x60, 0x27, 0xe9, - 0xa3, 0xbb, 0x37, 0x89, 0x0d, 0x2c, 0x6e, 0x0c, 0x08, 0x00, 0x00, 0xff, 0xff, 0xef, 0xa6, 0x30, - 0x63, 0xca, 0x00, 0x00, 0x00, -} - -func (m *BlockStoreState) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *BlockStoreState) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *BlockStoreState) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - if m.Height != 0 { - i = encodeVarintTypes(dAtA, i, uint64(m.Height)) - i-- - dAtA[i] = 0x10 - } - if m.Base != 0 { - i = encodeVarintTypes(dAtA, i, uint64(m.Base)) - i-- - dAtA[i] = 0x8 - } - return len(dAtA) - i, nil -} - -func encodeVarintTypes(dAtA []byte, offset int, v uint64) int { - offset -= sovTypes(v) - base := offset - for v >= 1<<7 { - dAtA[offset] = uint8(v&0x7f | 0x80) - v >>= 7 - offset++ - } - dAtA[offset] = uint8(v) - return base -} -func (m *BlockStoreState) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - if m.Base != 0 { - n += 1 + sovTypes(uint64(m.Base)) - } - if m.Height != 0 { - n += 1 + sovTypes(uint64(m.Height)) - } - return n -} - -func sovTypes(x uint64) (n int) { - return (math_bits.Len64(x|1) + 6) / 7 -} -func sozTypes(x uint64) (n int) { - return sovTypes(uint64((x << 1) ^ uint64((int64(x) >> 63)))) -} -func (m *BlockStoreState) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowTypes - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: BlockStoreState: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: BlockStoreState: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Base", wireType) - } - m.Base = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowTypes - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Base |= int64(b&0x7F) << shift - if b < 0x80 { - break - } - } - case 2: - if wireType != 0 { - return fmt.Errorf("proto: wrong wireType = %d for field Height", wireType) - } - m.Height = 0 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowTypes - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - m.Height |= int64(b&0x7F) << shift - if b < 0x80 { - break - } - } - default: - iNdEx = preIndex - skippy, err := skipTypes(dAtA[iNdEx:]) - if err != nil { - return err - } - if skippy < 0 { - return ErrInvalidLengthTypes - } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthTypes - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func skipTypes(dAtA []byte) (n int, err error) { - l := len(dAtA) - iNdEx := 0 - depth := 0 - for iNdEx < l { - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowTypes - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - wireType := int(wire & 0x7) - switch wireType { - case 0: - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowTypes - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - iNdEx++ - if dAtA[iNdEx-1] < 0x80 { - break - } - } - case 1: - iNdEx += 8 - case 2: - var length int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowTypes - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - length |= (int(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - if length < 0 { - return 0, ErrInvalidLengthTypes - } - iNdEx += length - case 3: - depth++ - case 4: - if depth == 0 { - return 0, ErrUnexpectedEndOfGroupTypes - } - depth-- - case 5: - iNdEx += 4 - default: - return 0, fmt.Errorf("proto: illegal wireType %d", wireType) - } - if iNdEx < 0 { - return 0, ErrInvalidLengthTypes - } - if depth == 0 { - return iNdEx, nil - } - } - return 0, io.ErrUnexpectedEOF -} - -var ( - ErrInvalidLengthTypes = fmt.Errorf("proto: negative length found during unmarshaling") - ErrIntOverflowTypes = fmt.Errorf("proto: integer overflow") - ErrUnexpectedEndOfGroupTypes = fmt.Errorf("proto: unexpected end of group") -) diff --git a/proto/tendermint/store/types.proto b/proto/tendermint/store/types.proto deleted file mode 100644 index af2f97ad0..000000000 --- a/proto/tendermint/store/types.proto +++ /dev/null @@ -1,9 +0,0 @@ -syntax = "proto3"; -package tendermint.store; - -option go_package = "github.com/tendermint/tendermint/proto/tendermint/store"; - -message BlockStoreState { - int64 base = 1; - int64 height = 2; -} diff --git a/statesync/stateprovider.go b/statesync/stateprovider.go index 4b1c75e32..17aeed77c 100644 --- a/statesync/stateprovider.go +++ b/statesync/stateprovider.go @@ -72,7 +72,7 @@ func NewLightClientStateProvider( } lc, err := light.NewClient(ctx, chainID, trustOptions, providers[0], providers[1:], - lightdb.New(dbm.NewMemDB(), ""), light.Logger(logger), light.MaxRetryAttempts(5)) + lightdb.New(dbm.NewMemDB()), light.Logger(logger), light.MaxRetryAttempts(5)) if err != nil { return nil, err } diff --git a/store/store.go b/store/store.go index e19d2ecc8..6d1ac870f 100644 --- a/store/store.go +++ b/store/store.go @@ -8,8 +8,6 @@ import ( "github.com/google/orderedcode" dbm "github.com/tendermint/tm-db" - tmsync "github.com/tendermint/tendermint/libs/sync" - tmstore "github.com/tendermint/tendermint/proto/tendermint/store" tmproto "github.com/tendermint/tendermint/proto/tendermint/types" "github.com/tendermint/tendermint/types" ) @@ -33,60 +31,98 @@ The store can be assumed to contain all contiguous blocks between base and heigh */ type BlockStore struct { db dbm.DB - - // mtx guards access to the struct fields listed below it. We rely on the database to enforce - // fine-grained concurrency control for its data, and thus this mutex does not apply to - // database contents. The only reason for keeping these fields in the struct is that the data - // can't efficiently be queried from the database since the key encoding we use is not - // lexicographically ordered (see https://github.com/tendermint/tendermint/issues/4567). - mtx tmsync.RWMutex - base int64 - height int64 } // NewBlockStore returns a new BlockStore with the given DB, // initialized to the last height that was committed to the DB. func NewBlockStore(db dbm.DB) *BlockStore { - bs := LoadBlockStoreState(db) - return &BlockStore{ - base: bs.Base, - height: bs.Height, - db: db, - } + return &BlockStore{db} } // Base returns the first known contiguous block height, or 0 for empty block stores. func (bs *BlockStore) Base() int64 { - bs.mtx.RLock() - defer bs.mtx.RUnlock() - return bs.base + iter, err := bs.db.Iterator( + blockMetaKey(1), + blockMetaKey(1<<63-1), + ) + if err != nil { + panic(err) + } + defer iter.Close() + + if iter.Valid() { + height, err := decodeBlockMetaKey(iter.Key()) + if err == nil { + return height + } + } + if err := iter.Error(); err != nil { + panic(err) + } + + return 0 } // Height returns the last known contiguous block height, or 0 for empty block stores. func (bs *BlockStore) Height() int64 { - bs.mtx.RLock() - defer bs.mtx.RUnlock() - return bs.height + iter, err := bs.db.ReverseIterator( + blockMetaKey(1), + blockMetaKey(1<<63-1), + ) + if err != nil { + panic(err) + } + defer iter.Close() + + if iter.Valid() { + height, err := decodeBlockMetaKey(iter.Key()) + if err == nil { + return height + } + } + if err := iter.Error(); err != nil { + panic(err) + } + + return 0 } // Size returns the number of blocks in the block store. func (bs *BlockStore) Size() int64 { - bs.mtx.RLock() - defer bs.mtx.RUnlock() - if bs.height == 0 { + height := bs.Height() + if height == 0 { return 0 } - return bs.height - bs.base + 1 + return height + 1 - bs.Base() } // LoadBase atomically loads the base block meta, or returns nil if no base is found. func (bs *BlockStore) LoadBaseMeta() *types.BlockMeta { - bs.mtx.RLock() - defer bs.mtx.RUnlock() - if bs.base == 0 { + iter, err := bs.db.Iterator( + blockMetaKey(1), + blockMetaKey(1<<63-1), + ) + if err != nil { return nil } - return bs.LoadBlockMeta(bs.base) + defer iter.Close() + + if iter.Valid() { + var pbbm = new(tmproto.BlockMeta) + err = proto.Unmarshal(iter.Value(), pbbm) + if err != nil { + panic(fmt.Errorf("unmarshal to tmproto.BlockMeta: %w", err)) + } + + blockMeta, err := types.BlockMetaFromProto(pbbm) + if err != nil { + panic(fmt.Errorf("error from proto blockMeta: %w", err)) + } + + return blockMeta + } + + return nil } // LoadBlock returns the block with the given height. @@ -245,82 +281,133 @@ func (bs *BlockStore) LoadSeenCommit(height int64) *types.Commit { return commit } -// PruneBlocks removes block up to (but not including) a height. It returns number of blocks pruned. +// PruneBlocks removes block up to (but not including) a height. It returns the number of blocks pruned. func (bs *BlockStore) PruneBlocks(height int64) (uint64, error) { if height <= 0 { return 0, fmt.Errorf("height must be greater than 0") } - bs.mtx.RLock() - if height > bs.height { - bs.mtx.RUnlock() - return 0, fmt.Errorf("cannot prune beyond the latest height %v", bs.height) - } - base := bs.base - bs.mtx.RUnlock() - if height < base { - return 0, fmt.Errorf("cannot prune to height %v, it is lower than base height %v", - height, base) + + if height > bs.Height() { + return 0, fmt.Errorf("height must be equal to or less than the latest height %d", bs.Height()) } - pruned := uint64(0) - batch := bs.db.NewBatch() - defer batch.Close() - flush := func(batch dbm.Batch, base int64) error { - // We can't trust batches to be atomic, so update base first to make sure noone - // tries to access missing blocks. - bs.mtx.Lock() - bs.base = base - bs.mtx.Unlock() - bs.saveState() - - err := batch.WriteSync() + // when removing the block meta, use the hash to remove the hash key at the same time + removeBlockHash := func(key, value []byte, batch dbm.Batch) error { + // unmarshal block meta + var pbbm = new(tmproto.BlockMeta) + err := proto.Unmarshal(value, pbbm) if err != nil { - return fmt.Errorf("failed to prune up to height %v: %w", base, err) + return fmt.Errorf("unmarshal to tmproto.BlockMeta: %w", err) } - batch.Close() + + blockMeta, err := types.BlockMetaFromProto(pbbm) + if err != nil { + return fmt.Errorf("error from proto blockMeta: %w", err) + } + + // delete the hash key corresponding to the block meta's hash + if err := batch.Delete(blockHashKey(blockMeta.BlockID.Hash)); err != nil { + return fmt.Errorf("failed to delete hash key: %X: %w", blockHashKey(blockMeta.BlockID.Hash), err) + } + return nil } - for h := base; h < height; h++ { - meta := bs.LoadBlockMeta(h) - if meta == nil { // assume already deleted - continue - } - if err := batch.Delete(blockMetaKey(h)); err != nil { - return 0, err - } - if err := batch.Delete(blockHashKey(meta.BlockID.Hash)); err != nil { - return 0, err - } - if err := batch.Delete(blockCommitKey(h)); err != nil { - return 0, err - } - if err := batch.Delete(seenCommitKey(h)); err != nil { - return 0, err - } - for p := 0; p < int(meta.BlockID.PartSetHeader.Total); p++ { - if err := batch.Delete(blockPartKey(h, p)); err != nil { - return 0, err - } - } - pruned++ + // remove block meta first as this is used to indicate whether the block exists. + // For this reason, we also use ony block meta as a measure of the amount of blocks pruned + pruned, err := bs.batchDelete(blockMetaKey(0), blockMetaKey(height), removeBlockHash) + if err != nil { + return pruned, err + } - // flush every 1000 blocks to avoid batches becoming too large - if pruned%1000 == 0 && pruned > 0 { - err := flush(batch, h) - if err != nil { - return 0, err + if _, err := bs.batchDelete(blockPartKey(0, 0), blockPartKey(height, 0), nil); err != nil { + return pruned, err + } + + if _, err := bs.batchDelete(blockCommitKey(0), blockCommitKey(height), nil); err != nil { + return pruned, err + } + + if _, err := bs.batchDelete(seenCommitKey(0), seenCommitKey(height), nil); err != nil { + return pruned, err + } + + return pruned, nil +} + +// batchDelete is a generic function for deleting a range of values based on the lowest +// height up to but excluding retainHeight. For each key/value pair, an optional hook can be +// executed before the deletion itself is made +func (bs *BlockStore) batchDelete( + start []byte, + end []byte, + preDeletionHook func(key, value []byte, batch dbm.Batch) error, +) (uint64, error) { + iter, err := bs.db.Iterator(start, end) + if err != nil { + panic(err) + } + defer iter.Close() + + batch := bs.db.NewBatch() + defer batch.Close() + + pruned := uint64(0) + flushed := pruned + for iter.Valid() { + key := iter.Key() + if preDeletionHook != nil { + if err := preDeletionHook(key, iter.Value(), batch); err != nil { + return flushed, err } + } + + if err := batch.Delete(key); err != nil { + return flushed, fmt.Errorf("pruning error at key %X: %w", iter.Key(), err) + } + + pruned++ + // avoid batches growing too large by flushing to database regularly + if pruned%1000 == 0 { + if err := iter.Error(); err != nil { + return flushed, err + } + if err := iter.Close(); err != nil { + return flushed, err + } + + err := batch.Write() + if err != nil { + return flushed, fmt.Errorf("pruning error at key %X: %w", iter.Key(), err) + } + if err := batch.Close(); err != nil { + return flushed, err + } + flushed = pruned + + iter, err = bs.db.Iterator(start, end) + if err != nil { + panic(err) + } + defer iter.Close() + batch = bs.db.NewBatch() defer batch.Close() + } else { + iter.Next() } } - - err := flush(batch, height) - if err != nil { - return 0, err + flushed = pruned + if err := iter.Error(); err != nil { + return flushed, err } - return pruned, nil + + err = batch.WriteSync() + if err != nil { + return flushed, fmt.Errorf("pruning error at key %X: %w", iter.Key(), err) + } + + return flushed, nil } // SaveBlock persists the given block, blockParts, and seenCommit to the underlying db. @@ -378,20 +465,10 @@ func (bs *BlockStore) SaveBlock(block *types.Block, blockParts *types.PartSet, s // NOTE: we can delete this at a later height pbsc := seenCommit.ToProto() seenCommitBytes := mustEncode(pbsc) - if err := bs.db.Set(seenCommitKey(height), seenCommitBytes); err != nil { + if err := bs.db.SetSync(seenCommitKey(height), seenCommitBytes); err != nil { panic(err) } - // Done! - bs.mtx.Lock() - bs.height = height - if bs.base == 0 { - bs.base = height - } - bs.mtx.Unlock() - - // Save new BlockStoreState descriptor. This also flushes the database. - bs.saveState() } func (bs *BlockStore) saveBlockPart(height int64, index int, part *types.Part) { @@ -405,16 +482,6 @@ func (bs *BlockStore) saveBlockPart(height int64, index int, part *types.Part) { } } -func (bs *BlockStore) saveState() { - bs.mtx.RLock() - bss := tmstore.BlockStoreState{ - Base: bs.base, - Height: bs.height, - } - bs.mtx.RUnlock() - SaveBlockStoreState(&bss, bs.db) -} - // SaveSeenCommit saves a seen commit, used by e.g. the state sync reactor when bootstrapping node. func (bs *BlockStore) SaveSeenCommit(height int64, seenCommit *types.Commit) error { pbc := seenCommit.ToProto() @@ -445,6 +512,21 @@ func blockMetaKey(height int64) []byte { return key } +func decodeBlockMetaKey(key []byte) (height int64, err error) { + var prefix int64 + remaining, err := orderedcode.Parse(string(key), &prefix, &height) + if err != nil { + return + } + if len(remaining) != 0 { + return -1, fmt.Errorf("expected complete key but got remainder: %s", remaining) + } + if prefix != prefixBlockMeta { + return -1, fmt.Errorf("incorrect prefix. Expected %v, got %v", prefixBlockMeta, prefix) + } + return +} + func blockPartKey(height int64, partIndex int) []byte { key, err := orderedcode.Append(nil, prefixBlockPart, height, int64(partIndex)) if err != nil { @@ -479,46 +561,6 @@ func blockHashKey(hash []byte) []byte { //----------------------------------------------------------------------------- -var blockStoreKey = []byte("blockStore") - -// SaveBlockStoreState persists the blockStore state to the database. -func SaveBlockStoreState(bsj *tmstore.BlockStoreState, db dbm.DB) { - bytes, err := proto.Marshal(bsj) - if err != nil { - panic(fmt.Sprintf("Could not marshal state bytes: %v", err)) - } - if err := db.SetSync(blockStoreKey, bytes); err != nil { - panic(err) - } -} - -// LoadBlockStoreState returns the BlockStoreState as loaded from disk. -// If no BlockStoreState was previously persisted, it returns the zero value. -func LoadBlockStoreState(db dbm.DB) tmstore.BlockStoreState { - bytes, err := db.Get(blockStoreKey) - if err != nil { - panic(err) - } - - if len(bytes) == 0 { - return tmstore.BlockStoreState{ - Base: 0, - Height: 0, - } - } - - var bsj tmstore.BlockStoreState - if err := proto.Unmarshal(bytes, &bsj); err != nil { - panic(fmt.Sprintf("Could not unmarshal bytes: %X", bytes)) - } - - // Backwards compatibility with persisted data from before Base existed. - if bsj.Height > 0 && bsj.Base == 0 { - bsj.Base = 1 - } - return bsj -} - // mustEncode proto encodes a proto.message and panics if fails func mustEncode(pb proto.Message) []byte { bz, err := proto.Marshal(pb) diff --git a/store/store_test.go b/store/store_test.go index be87cad67..2438ec859 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -9,7 +9,6 @@ import ( "testing" "time" - "github.com/gogo/protobuf/proto" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" dbm "github.com/tendermint/tm-db" @@ -18,7 +17,6 @@ import ( "github.com/tendermint/tendermint/crypto" "github.com/tendermint/tendermint/libs/log" tmrand "github.com/tendermint/tendermint/libs/rand" - tmstore "github.com/tendermint/tendermint/proto/tendermint/store" tmversion "github.com/tendermint/tendermint/proto/tendermint/version" sm "github.com/tendermint/tendermint/state" "github.com/tendermint/tendermint/types" @@ -68,66 +66,6 @@ func makeStateAndBlockStore(logger log.Logger) (sm.State, *BlockStore, cleanupFu return state, NewBlockStore(blockDB), func() { os.RemoveAll(config.RootDir) } } -func TestLoadBlockStoreState(t *testing.T) { - - type blockStoreTest struct { - testName string - bss *tmstore.BlockStoreState - want tmstore.BlockStoreState - } - - testCases := []blockStoreTest{ - {"success", &tmstore.BlockStoreState{Base: 100, Height: 1000}, - tmstore.BlockStoreState{Base: 100, Height: 1000}}, - {"empty", &tmstore.BlockStoreState{}, tmstore.BlockStoreState{}}, - {"no base", &tmstore.BlockStoreState{Height: 1000}, tmstore.BlockStoreState{Base: 1, Height: 1000}}, - } - - for _, tc := range testCases { - db := dbm.NewMemDB() - SaveBlockStoreState(tc.bss, db) - retrBSJ := LoadBlockStoreState(db) - assert.Equal(t, tc.want, retrBSJ, "expected the retrieved DBs to match: %s", tc.testName) - } -} - -func TestNewBlockStore(t *testing.T) { - db := dbm.NewMemDB() - bss := tmstore.BlockStoreState{Base: 100, Height: 10000} - bz, _ := proto.Marshal(&bss) - err := db.Set(blockStoreKey, bz) - require.NoError(t, err) - bs := NewBlockStore(db) - require.Equal(t, int64(100), bs.Base(), "failed to properly parse blockstore") - require.Equal(t, int64(10000), bs.Height(), "failed to properly parse blockstore") - - panicCausers := []struct { - data []byte - wantErr string - }{ - {[]byte("artful-doger"), "not unmarshal bytes"}, - {[]byte(" "), "unmarshal bytes"}, - } - - for i, tt := range panicCausers { - tt := tt - // Expecting a panic here on trying to parse an invalid blockStore - _, _, panicErr := doFn(func() (interface{}, error) { - err := db.Set(blockStoreKey, tt.data) - require.NoError(t, err) - _ = NewBlockStore(db) - return nil, nil - }) - require.NotNil(t, panicErr, "#%d panicCauser: %q expected a panic", i, tt.data) - assert.Contains(t, fmt.Sprintf("%#v", panicErr), tt.wantErr, "#%d data: %q", i, tt.data) - } - - err = db.Set(blockStoreKey, []byte{}) - require.NoError(t, err) - bs = NewBlockStore(db) - assert.Equal(t, bs.Height(), int64(0), "expecting empty bytes to be unmarshaled alright") -} - func freshBlockStore() (*BlockStore, dbm.DB) { db := dbm.NewMemDB() return NewBlockStore(db), db @@ -379,8 +317,9 @@ func TestLoadBaseMeta(t *testing.T) { bs.SaveBlock(block, partSet, seenCommit) } - _, err = bs.PruneBlocks(4) + pruned, err := bs.PruneBlocks(4) require.NoError(t, err) + assert.EqualValues(t, 3, pruned) baseBlock := bs.LoadBaseMeta() assert.EqualValues(t, 4, baseBlock.Header.Height) @@ -432,10 +371,6 @@ func TestPruneBlocks(t *testing.T) { assert.EqualValues(t, 0, bs.Height()) assert.EqualValues(t, 0, bs.Size()) - // pruning an empty store should error, even when pruning to 0 - _, err = bs.PruneBlocks(1) - require.Error(t, err) - _, err = bs.PruneBlocks(0) require.Error(t, err) @@ -460,10 +395,6 @@ func TestPruneBlocks(t *testing.T) { assert.EqualValues(t, 1200, bs.Base()) assert.EqualValues(t, 1500, bs.Height()) assert.EqualValues(t, 301, bs.Size()) - assert.EqualValues(t, tmstore.BlockStoreState{ - Base: 1200, - Height: 1500, - }, LoadBlockStoreState(db)) require.NotNil(t, bs.LoadBlock(1200)) require.Nil(t, bs.LoadBlock(1199)) @@ -479,9 +410,9 @@ func TestPruneBlocks(t *testing.T) { require.NotNil(t, bs.LoadBlock(i)) } - // Pruning below the current base should error + // Pruning below the current base should not error _, err = bs.PruneBlocks(1199) - require.Error(t, err) + require.NoError(t, err) // Pruning to the current base should work pruned, err = bs.PruneBlocks(1200) From c61cd3fd05ebe6bc785f252b25ae6f4b051be64e Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 8 Jan 2021 16:32:11 +0100 Subject: [PATCH 4/7] p2p: add Router prototype (#5831) Early but functional prototype of the new `p2p.Router`, see its GoDoc comment for details on how it works. Expect much of this logic to change and improve as we evolve the new P2P stack. There is a simple test that sets up an in-memory network of four routers with reactors and passes messages between them, but otherwise no exhaustive tests since this is very much a work-in-progress. --- p2p/channel.go | 10 + p2p/peer.go | 230 +++++++++++++++++- p2p/queue.go | 59 +++++ p2p/router.go | 568 +++++++++++++++++++++++++++++++++++++++++++++ p2p/router_test.go | 117 ++++++++++ p2p/transport.go | 30 ++- 6 files changed, 1004 insertions(+), 10 deletions(-) create mode 100644 p2p/queue.go create mode 100644 p2p/router.go create mode 100644 p2p/router_test.go diff --git a/p2p/channel.go b/p2p/channel.go index 26808afc7..b2bcd156e 100644 --- a/p2p/channel.go +++ b/p2p/channel.go @@ -15,6 +15,16 @@ type Envelope struct { To NodeID // Message receiver, or empty for inbound messages. Broadcast bool // Send message to all connected peers, ignoring To. Message proto.Message // Payload. + + // For internal use in the Router. + channelID ChannelID +} + +// Strip strips internal information from the envelope. Primarily used for +// testing, such that returned envelopes can be compared with literals. +func (e Envelope) Strip() Envelope { + e.channelID = 0 + return e } // Channel is a bidirectional channel for Protobuf message exchange with peers. diff --git a/p2p/peer.go b/p2p/peer.go index 3d9772622..a9839ddfe 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -1,20 +1,128 @@ package p2p import ( + "context" + "errors" "fmt" "io" "net" + "net/url" "runtime/debug" + "strconv" "sync" "time" "github.com/tendermint/tendermint/libs/cmap" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/libs/service" - tmconn "github.com/tendermint/tendermint/p2p/conn" ) +// PeerAddress is a peer address URL. +type PeerAddress struct { + *url.URL +} + +// ParsePeerAddress parses a peer address URL into a PeerAddress. +func ParsePeerAddress(address string) (PeerAddress, error) { + u, err := url.Parse(address) + if err != nil || u == nil { + return PeerAddress{}, fmt.Errorf("unable to parse peer address %q: %w", address, err) + } + if u.Scheme == "" { + u.Scheme = string(defaultProtocol) + } + pa := PeerAddress{URL: u} + if err = pa.Validate(); err != nil { + return PeerAddress{}, err + } + return pa, nil +} + +// NodeID returns the address node ID. +func (a PeerAddress) NodeID() NodeID { + return NodeID(a.User.Username()) +} + +// Resolve resolves a PeerAddress into a set of Endpoints, by expanding +// out a DNS name in Host to its IP addresses. Field mapping: +// +// Scheme → Endpoint.Protocol +// Host → Endpoint.IP +// User → Endpoint.PeerID +// Port → Endpoint.Port +// Path+Query+Fragment,Opaque → Endpoint.Path +// +func (a PeerAddress) Resolve(ctx context.Context) ([]Endpoint, error) { + ips, err := net.DefaultResolver.LookupIP(ctx, "ip", a.Host) + if err != nil { + return nil, err + } + port, err := a.parsePort() + if err != nil { + return nil, err + } + + path := a.Path + if a.RawPath != "" { + path = a.RawPath + } + if a.Opaque != "" { // used for e.g. "about:blank" style URLs + path = a.Opaque + } + if a.RawQuery != "" { + path += "?" + a.RawQuery + } + if a.RawFragment != "" { + path += "#" + a.RawFragment + } + + endpoints := make([]Endpoint, len(ips)) + for i, ip := range ips { + endpoints[i] = Endpoint{ + PeerID: a.NodeID(), + Protocol: Protocol(a.Scheme), + IP: ip, + Port: port, + Path: path, + } + } + return endpoints, nil +} + +// Validates validates a PeerAddress. +func (a PeerAddress) Validate() error { + if a.Scheme == "" { + return errors.New("no protocol") + } + if id := a.User.Username(); id == "" { + return errors.New("no peer ID") + } else if err := NodeID(id).Validate(); err != nil { + return fmt.Errorf("invalid peer ID: %w", err) + } + if a.Hostname() == "" && len(a.Query()) == 0 && a.Opaque == "" { + return errors.New("no host or path given") + } + if port, err := a.parsePort(); err != nil { + return err + } else if port > 0 && a.Hostname() == "" { + return errors.New("cannot specify port without host") + } + return nil +} + +// parsePort returns the port number as a uint16. +func (a PeerAddress) parsePort() (uint16, error) { + if portString := a.Port(); portString != "" { + port64, err := strconv.ParseUint(portString, 10, 16) + if err != nil { + return 0, fmt.Errorf("invalid port %q: %w", portString, err) + } + return uint16(port64), nil + } + return 0, nil +} + // PeerStatus specifies peer statuses. type PeerStatus string @@ -106,6 +214,126 @@ type PeerUpdate struct { Status PeerStatus } +// peerStore manages information about peers. It is currently a bare-bones +// in-memory store of peer addresses, and will be fleshed out later. +// +// The main function of peerStore is currently to dispense peers to connect to +// (via peerStore.Dispense), giving the caller exclusive "ownership" of that +// peer until the peer is returned (via peerStore.Return). This is used to +// schedule and synchronize peer dialing and accepting in the Router, e.g. +// making sure we only have a single connection (in either direction) to peers. +type peerStore struct { + mtx sync.Mutex + peers map[NodeID]*peerInfo + claimed map[NodeID]bool +} + +// newPeerStore creates a new peer store. +func newPeerStore() *peerStore { + return &peerStore{ + peers: map[NodeID]*peerInfo{}, + claimed: map[NodeID]bool{}, + } +} + +// Add adds a peer to the store, given as an address. +func (s *peerStore) Add(address PeerAddress) error { + if err := address.Validate(); err != nil { + return err + } + peerID := address.NodeID() + + s.mtx.Lock() + defer s.mtx.Unlock() + + peer, ok := s.peers[peerID] + if !ok { + peer = newStorePeer(peerID) + s.peers[peerID] = peer + } else if s.claimed[peerID] { + // FIXME: We need to handle modifications of claimed peers somehow. + return fmt.Errorf("peer %q is claimed", peerID) + } + peer.AddAddress(address) + return nil +} + +// Claim claims a peer. The caller has exclusive ownership of the peer, and must +// return it by calling Return(). Returns nil if the peer could not be claimed. +// If the peer is not known to the store, it is registered and claimed. +func (s *peerStore) Claim(id NodeID) *peerInfo { + s.mtx.Lock() + defer s.mtx.Unlock() + if s.claimed[id] { + return nil + } + peer, ok := s.peers[id] + if !ok { + peer = newStorePeer(id) + s.peers[id] = peer + } + s.claimed[id] = true + return peer +} + +// Dispense finds an appropriate peer to contact and claims it. The caller has +// exclusive ownership of the peer, and must return it by calling Return(). The +// peer will not be dispensed again until returned. +// +// Returns nil if no appropriate peers are available. +func (s *peerStore) Dispense() *peerInfo { + s.mtx.Lock() + defer s.mtx.Unlock() + for key, peer := range s.peers { + switch { + case len(peer.Addresses) == 0: + case s.claimed[key]: + default: + s.claimed[key] = true + return peer + } + } + return nil +} + +// Return returns a claimed peer, making it available for other +// callers to claim. +func (s *peerStore) Return(id NodeID) { + s.mtx.Lock() + defer s.mtx.Unlock() + delete(s.claimed, id) +} + +// peerInfo is a peer stored in the peerStore. +// +// FIXME: This should be renamed peer or something else once the old peer is +// removed. +type peerInfo struct { + ID NodeID + Addresses []PeerAddress +} + +// newStorePeer creates a new storePeer. +func newStorePeer(id NodeID) *peerInfo { + return &peerInfo{ + ID: id, + Addresses: []PeerAddress{}, + } +} + +// AddAddress adds an address to a peer, unless it already exists. It does not +// validate the address. +func (p *peerInfo) AddAddress(address PeerAddress) { + // We just do a linear search for now. + addressString := address.String() + for _, a := range p.Addresses { + if a.String() == addressString { + return + } + } + p.Addresses = append(p.Addresses, address) +} + // ============================================================================ // Types and business logic below may be deprecated. // diff --git a/p2p/queue.go b/p2p/queue.go new file mode 100644 index 000000000..873792f0e --- /dev/null +++ b/p2p/queue.go @@ -0,0 +1,59 @@ +package p2p + +import "sync" + +// queue does QoS scheduling for Envelopes, enqueueing and dequeueing according +// to some policy. Queues are used at contention points, i.e.: +// +// - Receiving inbound messages to a single channel from all peers. +// - Sending outbound messages to a single peer from all channels. +type queue interface { + // enqueue returns a channel for submitting envelopes. + enqueue() chan<- Envelope + + // dequeue returns a channel ordered according to some queueing policy. + dequeue() <-chan Envelope + + // close closes the queue. After this call enqueue() will block, so the + // caller must select on closed() as well to avoid blocking forever. The + // enqueue() and dequeue() channels will not be closed. + close() + + // closed returns a channel that's closed when the scheduler is closed. + closed() <-chan struct{} +} + +// fifoQueue is a simple unbuffered lossless queue that passes messages through +// in the order they were received, and blocks until message is received. +type fifoQueue struct { + queueCh chan Envelope + closeCh chan struct{} + closeOnce sync.Once +} + +var _ queue = (*fifoQueue)(nil) + +func newFIFOQueue() *fifoQueue { + return &fifoQueue{ + queueCh: make(chan Envelope), + closeCh: make(chan struct{}), + } +} + +func (q *fifoQueue) enqueue() chan<- Envelope { + return q.queueCh +} + +func (q *fifoQueue) dequeue() <-chan Envelope { + return q.queueCh +} + +func (q *fifoQueue) close() { + q.closeOnce.Do(func() { + close(q.closeCh) + }) +} + +func (q *fifoQueue) closed() <-chan struct{} { + return q.closeCh +} diff --git a/p2p/router.go b/p2p/router.go new file mode 100644 index 000000000..ec63447f2 --- /dev/null +++ b/p2p/router.go @@ -0,0 +1,568 @@ +package p2p + +import ( + "context" + "errors" + "fmt" + "io" + "sync" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/tendermint/tendermint/libs/log" + "github.com/tendermint/tendermint/libs/service" +) + +// Router manages peer connections and routes messages between peers and reactor +// channels. This is an early prototype. +// +// Channels are registered via OpenChannel(). When called, we register an input +// message queue for the channel in channelQueues and spawn off a goroutine for +// Router.routeChannel(). This goroutine reads off outbound messages and puts +// them in the appropriate peer message queue, and processes peer errors which +// will close (and thus disconnect) the appriate peer queue. It runs until +// either the channel is closed by the caller or the router is stopped, at which +// point the input message queue is closed and removed. +// +// On startup, the router spawns off two primary goroutines that maintain +// connections to peers and run for the lifetime of the router: +// +// Router.dialPeers(): in a loop, asks the peerStore to dispense an +// eligible peer to connect to, and attempts to resolve and dial each +// address until successful. +// +// Router.acceptPeers(): in a loop, waits for the next inbound connection +// from a peer, and attempts to claim it in the peerStore. +// +// Once either an inbound or outbound connection has been made, an outbound +// message queue is registered in Router.peerQueues and a goroutine is spawned +// off for Router.routePeer() which will spawn off additional goroutines for +// Router.sendPeer() that sends outbound messages from the peer queue over the +// connection and for Router.receivePeer() that reads inbound messages from +// the connection and places them in the appropriate channel queue. When either +// goroutine exits, the connection and peer queue is closed, which will cause +// the other goroutines to close as well. +// +// The peerStore is used to coordinate peer connections, by only allowing a peer +// to be claimed (owned) by a single caller at a time (both for outbound and +// inbound connections). This is done either via peerStore.Dispense() which +// dispenses and claims an eligible peer to dial, or via peerStore.Claim() which +// attempts to claim a given peer for an inbound connection. Peers must be +// returned to the peerStore with peerStore.Return() to release the claim. Over +// time, the peerStore will also do peer scheduling and prioritization, e.g. +// ensuring we do exponential backoff on dial failures and connecting to +// more important peers first (such as persistent peers and validators). +// +// An additional goroutine Router.broadcastPeerUpdates() is also spawned off +// on startup, which consumes peer updates from Router.peerUpdatesCh (currently +// only connections and disconnections), and broadcasts them to all peer update +// subscriptions registered via SubscribePeerUpdates(). +// +// On router shutdown, we close Router.stopCh which will signal to all +// goroutines to terminate. This in turn will cause all pending channel/peer +// queues to close, and we wait for this as a signal that goroutines have ended. +// +// All message scheduling should be limited to the queue implementations used +// for channel queues and peer queues. All message sending throughout the router +// is blocking, and if any messages should be dropped or buffered this is the +// sole responsibility of the queue, such that we can limit this logic to a +// single place. There is currently only a FIFO queue implementation that always +// blocks and never drops messages, but this must be improved with other +// implementations. The only exception is that all message sending must also +// select on appropriate channel/queue/router closure signals, to avoid blocking +// forever on a channel that has no consumer. +type Router struct { + *service.BaseService + logger log.Logger + transports map[Protocol]Transport + store *peerStore + + // FIXME: Consider using sync.Map. + peerMtx sync.RWMutex + peerQueues map[NodeID]queue + + // FIXME: We don't strictly need to use a mutex for this if we seal the + // channels on router start. This depends on whether we want to allow + // dynamic channels in the future. + channelMtx sync.RWMutex + channelQueues map[ChannelID]queue + channelMessages map[ChannelID]proto.Message + + peerUpdatesCh chan PeerUpdate + peerUpdatesMtx sync.RWMutex + peerUpdatesSubs map[*PeerUpdatesCh]*PeerUpdatesCh // keyed by struct identity (address) + + // stopCh is used to signal router shutdown, by closing the channel. + stopCh chan struct{} +} + +// NewRouter creates a new Router, dialing the given peers. +// +// FIXME: providing protocol/transport maps is cumbersome in tests, we should +// consider adding Protocols() to the Transport interface instead and register +// protocol/transport mappings automatically on a first-come basis. +func NewRouter(logger log.Logger, transports map[Protocol]Transport, peers []PeerAddress) *Router { + router := &Router{ + logger: logger, + transports: transports, + store: newPeerStore(), + stopCh: make(chan struct{}), + channelQueues: map[ChannelID]queue{}, + channelMessages: map[ChannelID]proto.Message{}, + peerQueues: map[NodeID]queue{}, + peerUpdatesCh: make(chan PeerUpdate), + peerUpdatesSubs: map[*PeerUpdatesCh]*PeerUpdatesCh{}, + } + router.BaseService = service.NewBaseService(logger, "router", router) + + for _, address := range peers { + if err := router.store.Add(address); err != nil { + logger.Error("failed to add peer", "address", address, "err", err) + } + } + + return router +} + +// OpenChannel opens a new channel for the given message type. The caller must +// close the channel when done, and this must happen before the router stops. +func (r *Router) OpenChannel(id ChannelID, messageType proto.Message) (*Channel, error) { + // FIXME: NewChannel should take directional channels so we can pass + // queue.dequeue() instead of reaching inside for queue.queueCh. + queue := newFIFOQueue() + channel := NewChannel(id, messageType, queue.queueCh, make(chan Envelope), make(chan PeerError)) + + r.channelMtx.Lock() + defer r.channelMtx.Unlock() + + if _, ok := r.channelQueues[id]; ok { + return nil, fmt.Errorf("channel %v already exists", id) + } + r.channelQueues[id] = queue + r.channelMessages[id] = messageType + + go func() { + defer func() { + r.channelMtx.Lock() + delete(r.channelQueues, id) + delete(r.channelMessages, id) + r.channelMtx.Unlock() + queue.close() + }() + r.routeChannel(channel) + }() + + return channel, nil +} + +// routeChannel receives outbound messages and errors from a channel and routes +// them to the appropriate peer. It returns when either the channel is closed or +// the router is shutting down. +func (r *Router) routeChannel(channel *Channel) { + for { + select { + case envelope, ok := <-channel.outCh: + if !ok { + return + } + + // FIXME: This is a bit unergonomic, maybe it'd be better for Wrap() + // to return a wrapped copy. + if _, ok := channel.messageType.(Wrapper); ok { + wrapper := proto.Clone(channel.messageType) + if err := wrapper.(Wrapper).Wrap(envelope.Message); err != nil { + r.Logger.Error("failed to wrap message", "err", err) + continue + } + envelope.Message = wrapper + } + envelope.channelID = channel.id + + if envelope.Broadcast { + r.peerMtx.RLock() + peerQueues := make(map[NodeID]queue, len(r.peerQueues)) + for peerID, peerQueue := range r.peerQueues { + peerQueues[peerID] = peerQueue + } + r.peerMtx.RUnlock() + + for peerID, peerQueue := range peerQueues { + e := envelope + e.Broadcast = false + e.To = peerID + select { + case peerQueue.enqueue() <- e: + case <-peerQueue.closed(): + case <-r.stopCh: + return + } + } + + } else { + r.peerMtx.RLock() + peerQueue, ok := r.peerQueues[envelope.To] + r.peerMtx.RUnlock() + if !ok { + r.logger.Error("dropping message for non-connected peer", + "peer", envelope.To, "channel", channel.id) + continue + } + + select { + case peerQueue.enqueue() <- envelope: + case <-peerQueue.closed(): + r.logger.Error("dropping message for non-connected peer", + "peer", envelope.To, "channel", channel.id) + case <-r.stopCh: + return + } + } + + case peerError, ok := <-channel.errCh: + if !ok { + return + } + // FIXME: We just disconnect the peer for now + r.logger.Error("peer error, disconnecting", "peer", peerError.PeerID, "err", peerError.Err) + r.peerMtx.RLock() + peerQueue, ok := r.peerQueues[peerError.PeerID] + r.peerMtx.RUnlock() + if ok { + peerQueue.close() + } + + case <-channel.Done(): + return + case <-r.stopCh: + return + } + } +} + +// acceptPeers accepts inbound connections from peers on the given transport. +func (r *Router) acceptPeers(transport Transport) { + for { + select { + case <-r.stopCh: + return + default: + } + + conn, err := transport.Accept(context.Background()) + switch err { + case nil: + case ErrTransportClosed{}, io.EOF: + r.logger.Info("transport closed; stopping accept routine", "transport", transport) + return + default: + r.logger.Error("failed to accept connection", "transport", transport, "err", err) + continue + } + + peerID := conn.NodeInfo().NodeID + if r.store.Claim(peerID) == nil { + r.logger.Error("already connected to peer, rejecting connection", "peer", peerID) + _ = conn.Close() + continue + } + + queue := newFIFOQueue() + r.peerMtx.Lock() + r.peerQueues[peerID] = queue + r.peerMtx.Unlock() + + go func() { + defer func() { + r.peerMtx.Lock() + delete(r.peerQueues, peerID) + r.peerMtx.Unlock() + queue.close() + _ = conn.Close() + r.store.Return(peerID) + }() + + r.routePeer(peerID, conn, queue) + }() + } +} + +// dialPeers maintains outbound connections to peers. +func (r *Router) dialPeers() { + for { + select { + case <-r.stopCh: + return + default: + } + + peer := r.store.Dispense() + if peer == nil { + r.logger.Debug("no eligible peers, sleeping") + select { + case <-time.After(time.Second): + continue + case <-r.stopCh: + return + } + } + + go func() { + defer r.store.Return(peer.ID) + conn, err := r.dialPeer(peer) + if err != nil { + r.logger.Error("failed to dial peer, will retry", "peer", peer.ID) + return + } + defer conn.Close() + + queue := newFIFOQueue() + defer queue.close() + r.peerMtx.Lock() + r.peerQueues[peer.ID] = queue + r.peerMtx.Unlock() + + defer func() { + r.peerMtx.Lock() + delete(r.peerQueues, peer.ID) + r.peerMtx.Unlock() + }() + + r.routePeer(peer.ID, conn, queue) + }() + } +} + +// dialPeer attempts to connect to a peer. +func (r *Router) dialPeer(peer *peerInfo) (Connection, error) { + ctx := context.Background() + + for _, address := range peer.Addresses { + resolveCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + r.logger.Info("resolving peer address", "peer", peer.ID, "address", address) + endpoints, err := address.Resolve(resolveCtx) + if err != nil { + r.logger.Error("failed to resolve address", "address", address, "err", err) + continue + } + + for _, endpoint := range endpoints { + t, ok := r.transports[endpoint.Protocol] + if !ok { + r.logger.Error("no transport found for protocol", "protocol", endpoint.Protocol) + continue + } + dialCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + conn, err := t.Dial(dialCtx, endpoint) + if err != nil { + r.logger.Error("failed to dial endpoint", "endpoint", endpoint) + } else { + r.logger.Info("connected to peer", "peer", peer.ID, "endpoint", endpoint) + return conn, nil + } + } + } + return nil, errors.New("failed to connect to peer") +} + +// routePeer routes inbound messages from a peer to channels, and also sends +// outbound queued messages to the peer. It will close the connection and send +// queue, using this as a signal to coordinate the internal receivePeer() and +// sendPeer() goroutines. It blocks until the peer is done, e.g. when the +// connection or queue is closed. +func (r *Router) routePeer(peerID NodeID, conn Connection, sendQueue queue) { + // FIXME: Peer updates should probably be handled by the peer store. + r.peerUpdatesCh <- PeerUpdate{ + PeerID: peerID, + Status: PeerStatusUp, + } + defer func() { + r.peerUpdatesCh <- PeerUpdate{ + PeerID: peerID, + Status: PeerStatusDown, + } + }() + + resultsCh := make(chan error, 2) + go func() { + resultsCh <- r.receivePeer(peerID, conn) + }() + go func() { + resultsCh <- r.sendPeer(peerID, conn, sendQueue) + }() + + err := <-resultsCh + _ = conn.Close() + sendQueue.close() + if e := <-resultsCh; err == nil { + // The first err was nil, so we update it with the second result, + // which may or may not be nil. + err = e + } + switch err { + case nil, io.EOF, ErrTransportClosed{}: + r.logger.Info("peer disconnected", "peer", peerID) + default: + r.logger.Error("peer failure", "peer", peerID, "err", err) + } +} + +// receivePeer receives inbound messages from a peer, deserializes them and +// passes them on to the appropriate channel. +func (r *Router) receivePeer(peerID NodeID, conn Connection) error { + for { + chID, bz, err := conn.ReceiveMessage() + if err != nil { + return err + } + + r.channelMtx.RLock() + queue, ok := r.channelQueues[ChannelID(chID)] + messageType := r.channelMessages[ChannelID(chID)] + r.channelMtx.RUnlock() + if !ok { + r.logger.Error("dropping message for unknown channel", "peer", peerID, "channel", chID) + continue + } + + msg := proto.Clone(messageType) + if err := proto.Unmarshal(bz, msg); err != nil { + r.logger.Error("message decoding failed, dropping message", "peer", peerID, "err", err) + continue + } + if wrapper, ok := msg.(Wrapper); ok { + msg, err = wrapper.Unwrap() + if err != nil { + r.logger.Error("failed to unwrap message", "err", err) + continue + } + } + + select { + // FIXME: ReceiveMessage() should return ChannelID. + case queue.enqueue() <- Envelope{channelID: ChannelID(chID), From: peerID, Message: msg}: + r.logger.Debug("received message", "peer", peerID, "message", msg) + case <-queue.closed(): + r.logger.Error("channel closed, dropping message", "peer", peerID, "channel", chID) + case <-r.stopCh: + return nil + } + } +} + +// sendPeer sends queued messages to a peer. +func (r *Router) sendPeer(peerID NodeID, conn Connection, queue queue) error { + for { + select { + case envelope := <-queue.dequeue(): + bz, err := proto.Marshal(envelope.Message) + if err != nil { + r.logger.Error("failed to marshal message", "peer", peerID, "err", err) + continue + } + + // FIXME: SendMessage() should take ChannelID. + _, err = conn.SendMessage(byte(envelope.channelID), bz) + if err != nil { + return err + } + r.logger.Debug("sent message", "peer", envelope.To, "message", envelope.Message) + + case <-queue.closed(): + return nil + + case <-r.stopCh: + return nil + } + } +} + +// SubscribePeerUpdates creates a new peer updates subscription. The caller must +// consume the peer updates in a timely fashion, since delivery is guaranteed and +// will block peer connection/disconnection otherwise. +func (r *Router) SubscribePeerUpdates() (*PeerUpdatesCh, error) { + // FIXME: We may want to use a size 1 buffer here. When the router + // broadcasts a peer update it has to loop over all of the + // subscriptions, and we want to avoid blocking and waiting for a + // context switch before continuing to the next subscription. This also + // prevents tail latencies from compounding across updates. We also want + // to make sure the subscribers are reasonably in sync, so it should be + // kept at 1. However, this should be benchmarked first. + peerUpdates := NewPeerUpdates(make(chan PeerUpdate)) + r.peerUpdatesMtx.Lock() + r.peerUpdatesSubs[peerUpdates] = peerUpdates + r.peerUpdatesMtx.Unlock() + + go func() { + select { + case <-peerUpdates.Done(): + r.peerUpdatesMtx.Lock() + delete(r.peerUpdatesSubs, peerUpdates) + r.peerUpdatesMtx.Unlock() + case <-r.stopCh: + } + }() + return peerUpdates, nil +} + +// broadcastPeerUpdates broadcasts peer updates received from the router +// to all subscriptions. +func (r *Router) broadcastPeerUpdates() { + for { + select { + case peerUpdate := <-r.peerUpdatesCh: + subs := []*PeerUpdatesCh{} + r.peerUpdatesMtx.RLock() + for _, sub := range r.peerUpdatesSubs { + subs = append(subs, sub) + } + r.peerUpdatesMtx.RUnlock() + + for _, sub := range subs { + select { + case sub.updatesCh <- peerUpdate: + case <-sub.doneCh: + case <-r.stopCh: + return + } + } + + case <-r.stopCh: + return + } + } +} + +// OnStart implements service.Service. +func (r *Router) OnStart() error { + go r.broadcastPeerUpdates() + go r.dialPeers() + for _, transport := range r.transports { + go r.acceptPeers(transport) + } + return nil +} + +// OnStop implements service.Service. +func (r *Router) OnStop() { + // Collect all active queues, so we can wait for them to close. + queues := []queue{} + r.channelMtx.RLock() + for _, q := range r.channelQueues { + queues = append(queues, q) + } + r.channelMtx.RUnlock() + r.peerMtx.RLock() + for _, q := range r.peerQueues { + queues = append(queues, q) + } + r.peerMtx.RUnlock() + + // Signal router shutdown, and wait for queues (and thus goroutines) + // to complete. + close(r.stopCh) + for _, q := range queues { + <-q.closed() + } +} diff --git a/p2p/router_test.go b/p2p/router_test.go new file mode 100644 index 000000000..042200909 --- /dev/null +++ b/p2p/router_test.go @@ -0,0 +1,117 @@ +package p2p_test + +import ( + "errors" + "testing" + + gogotypes "github.com/gogo/protobuf/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/tendermint/tendermint/libs/log" + "github.com/tendermint/tendermint/p2p" +) + +type TestMessage = gogotypes.StringValue + +func echoReactor(channel *p2p.Channel) { + for { + select { + case envelope := <-channel.In(): + channel.Out() <- p2p.Envelope{ + To: envelope.From, + Message: &TestMessage{Value: envelope.Message.(*TestMessage).Value}, + } + case <-channel.Done(): + return + } + } +} + +func TestRouter(t *testing.T) { + logger := log.TestingLogger() + network := p2p.NewMemoryNetwork(logger) + transport := network.GenerateTransport() + chID := p2p.ChannelID(1) + + // Start some other in-memory network nodes to communicate with, running + // a simple echo reactor that returns received messages. + peers := []p2p.PeerAddress{} + for i := 0; i < 3; i++ { + peerTransport := network.GenerateTransport() + peerRouter := p2p.NewRouter(logger.With("peerID", i), map[p2p.Protocol]p2p.Transport{ + p2p.MemoryProtocol: peerTransport, + }, nil) + peers = append(peers, peerTransport.Endpoints()[0].PeerAddress()) + + channel, err := peerRouter.OpenChannel(chID, &TestMessage{}) + require.NoError(t, err) + defer channel.Close() + go echoReactor(channel) + + err = peerRouter.Start() + require.NoError(t, err) + defer func() { require.NoError(t, peerRouter.Stop()) }() + } + + // Start the main router and connect it to the peers above. + router := p2p.NewRouter(logger, map[p2p.Protocol]p2p.Transport{ + p2p.MemoryProtocol: transport, + }, peers) + channel, err := router.OpenChannel(chID, &TestMessage{}) + require.NoError(t, err) + peerUpdates, err := router.SubscribePeerUpdates() + require.NoError(t, err) + + err = router.Start() + require.NoError(t, err) + defer func() { + channel.Close() + peerUpdates.Close() + require.NoError(t, router.Stop()) + }() + + // Wait for peers to come online, and ping them as they do. + for i := 0; i < len(peers); i++ { + peerUpdate := <-peerUpdates.Updates() + peerID := peerUpdate.PeerID + require.Equal(t, p2p.PeerUpdate{ + PeerID: peerID, + Status: p2p.PeerStatusUp, + }, peerUpdate) + + channel.Out() <- p2p.Envelope{To: peerID, Message: &TestMessage{Value: "hi!"}} + assert.Equal(t, p2p.Envelope{ + From: peerID, + Message: &TestMessage{Value: "hi!"}, + }, (<-channel.In()).Strip()) + } + + // We then submit an error for a peer, and watch it get disconnected. + channel.Error() <- p2p.PeerError{ + PeerID: peers[0].NodeID(), + Err: errors.New("test error"), + Severity: p2p.PeerErrorSeverityCritical, + } + peerUpdate := <-peerUpdates.Updates() + require.Equal(t, p2p.PeerUpdate{ + PeerID: peers[0].NodeID(), + Status: p2p.PeerStatusDown, + }, peerUpdate) + + // We now broadcast a message, which we should receive back from only two peers. + channel.Out() <- p2p.Envelope{ + Broadcast: true, + Message: &TestMessage{Value: "broadcast"}, + } + for i := 0; i < len(peers)-1; i++ { + envelope := <-channel.In() + require.NotEqual(t, peers[0].NodeID(), envelope.From) + require.Equal(t, &TestMessage{Value: "broadcast"}, envelope.Message) + } + select { + case envelope := <-channel.In(): + t.Errorf("unexpected message: %v", envelope) + default: + } +} diff --git a/p2p/transport.go b/p2p/transport.go index 8d49b9538..5e15444fd 100644 --- a/p2p/transport.go +++ b/p2p/transport.go @@ -11,9 +11,15 @@ import ( "github.com/tendermint/tendermint/p2p/conn" ) +const ( + defaultProtocol Protocol = MConnProtocol +) + // Transport is an arbitrary mechanism for exchanging bytes with a peer. type Transport interface { - // Accept waits for the next inbound connection on a listening endpoint. + // Accept waits for the next inbound connection on a listening endpoint. If + // this returns io.EOF or ErrTransportClosed the transport should be + // considered closed and further Accept() calls are futile. Accept(context.Context) (Connection, error) // Dial creates an outbound connection to an endpoint. @@ -60,21 +66,27 @@ type Endpoint struct { Port uint16 } -// String formats an endpoint as a URL string. -func (e Endpoint) String() string { - u := url.URL{Scheme: string(e.Protocol)} - if e.PeerID != "" { - u.User = url.User(string(e.PeerID)) +// PeerAddress converts the endpoint into a peer address URL. +func (e Endpoint) PeerAddress() PeerAddress { + u := &url.URL{ + Scheme: string(e.Protocol), + User: url.User(string(e.PeerID)), } - if len(e.IP) > 0 { + if e.IP != nil { u.Host = e.IP.String() if e.Port > 0 { u.Host = net.JoinHostPort(u.Host, fmt.Sprintf("%v", e.Port)) } - } else if e.Path != "" { + u.Path = e.Path + } else { u.Opaque = e.Path } - return u.String() + return PeerAddress{URL: u} +} + +// String formats an endpoint as a URL string. +func (e Endpoint) String() string { + return e.PeerAddress().URL.String() } // Validate validates an endpoint. From 03a6fb277702bf6d7773d6519e23fed1f1933930 Mon Sep 17 00:00:00 2001 From: Callum Waters Date: Fri, 8 Jan 2021 17:05:27 +0100 Subject: [PATCH 5/7] state: prune states using an iterator (#5864) --- CHANGELOG_PENDING.md | 1 + consensus/common_test.go | 2 +- consensus/replay_test.go | 2 +- consensus/state.go | 5 +- state/export_test.go | 11 -- state/helpers_test.go | 33 +++++ state/mocks/evidence_pool.go | 2 +- state/mocks/store.go | 12 +- state/store.go | 239 +++++++++++++++++-------------- state/store_test.go | 121 ++++++++++++---- test/maverick/consensus/state.go | 3 +- 11 files changed, 278 insertions(+), 153 deletions(-) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 909dd8bdc..d91ad4b76 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -36,6 +36,7 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi - [libs/os] `EnsureDir` now propagates IO errors and checks the file type (@erikgrinaker) - [libs/os] Kill() and {Must,}{Read,Write}File() functions have been removed. (@alessio) - [store] \#5848 Remove block store state in favor of using the db iterators directly (@cmwaters) + - [state] \#5864 Use an iterator when pruning state (@cmwaters) - Blockchain Protocol diff --git a/consensus/common_test.go b/consensus/common_test.go index 1fd5bb599..8502883e7 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -389,7 +389,7 @@ func newStateWithConfigAndBlockStore( evpool := sm.EmptyEvidencePool{} // Make State - stateDB := blockDB + stateDB := dbm.NewMemDB() stateStore := sm.NewStore(stateDB) if err := stateStore.Save(state); err != nil { // for save height 1's validators info panic(err) diff --git a/consensus/replay_test.go b/consensus/replay_test.go index ec9de5871..32436f014 100644 --- a/consensus/replay_test.go +++ b/consensus/replay_test.go @@ -158,7 +158,7 @@ LOOP: // create consensus state from a clean slate logger := log.NewNopLogger() blockDB := dbm.NewMemDB() - stateDB := blockDB + stateDB := dbm.NewMemDB() stateStore := sm.NewStore(stateDB) state, err := sm.MakeGenesisStateFromFile(consensusReplayConfig.GenesisFile()) require.NoError(t, err) diff --git a/consensus/state.go b/consensus/state.go index b542ae6c7..d57bbf42c 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1619,9 +1619,10 @@ func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { if err != nil { return 0, fmt.Errorf("failed to prune block store: %w", err) } - err = cs.blockExec.Store().PruneStates(base, retainHeight) + + err = cs.blockExec.Store().PruneStates(retainHeight) if err != nil { - return 0, fmt.Errorf("failed to prune state database: %w", err) + return 0, fmt.Errorf("failed to prune state store: %w", err) } return pruned, nil } diff --git a/state/export_test.go b/state/export_test.go index 56c3d764c..075a230cb 100644 --- a/state/export_test.go +++ b/state/export_test.go @@ -1,8 +1,6 @@ package state import ( - dbm "github.com/tendermint/tm-db" - abci "github.com/tendermint/tendermint/abci/types" tmstate "github.com/tendermint/tendermint/proto/tendermint/state" tmproto "github.com/tendermint/tendermint/proto/tendermint/types" @@ -20,8 +18,6 @@ import ( // easily testable from outside of the package. // -const ValSetCheckpointInterval = valSetCheckpointInterval - // UpdateState is an alias for updateState exported from execution.go, // exclusively and explicitly for testing. func UpdateState( @@ -39,10 +35,3 @@ func UpdateState( func ValidateValidatorUpdates(abciUpdates []abci.ValidatorUpdate, params tmproto.ValidatorParams) error { return validateValidatorUpdates(abciUpdates, params) } - -// SaveValidatorsInfo is an alias for the private saveValidatorsInfo method in -// store.go, exported exclusively and explicitly for testing. -func SaveValidatorsInfo(db dbm.DB, height, lastHeightChanged int64, valSet *types.ValidatorSet) error { - stateStore := dbStore{db} - return stateStore.saveValidatorsInfo(height, lastHeightChanged, valSet) -} diff --git a/state/helpers_test.go b/state/helpers_test.go index 19549f160..0c7224b83 100644 --- a/state/helpers_test.go +++ b/state/helpers_test.go @@ -228,6 +228,39 @@ func randomGenesisDoc() *types.GenesisDoc { } } +// used for testing by state store +func makeRandomStateFromValidatorSet( + lastValSet *types.ValidatorSet, + height, lastHeightValidatorsChanged int64, +) sm.State { + return sm.State{ + LastBlockHeight: height - 1, + NextValidators: lastValSet.CopyIncrementProposerPriority(2), + Validators: lastValSet.CopyIncrementProposerPriority(1), + LastValidators: lastValSet.Copy(), + LastHeightConsensusParamsChanged: height, + ConsensusParams: *types.DefaultConsensusParams(), + LastHeightValidatorsChanged: lastHeightValidatorsChanged, + InitialHeight: 1, + } +} + +func makeRandomStateFromConsensusParams(consensusParams *tmproto.ConsensusParams, + height, lastHeightConsensusParamsChanged int64) sm.State { + val, _ := types.RandValidator(true, 10) + valSet := types.NewValidatorSet([]*types.Validator{val}) + return sm.State{ + LastBlockHeight: height - 1, + ConsensusParams: *consensusParams, + LastHeightConsensusParamsChanged: lastHeightConsensusParamsChanged, + NextValidators: valSet.CopyIncrementProposerPriority(2), + Validators: valSet.CopyIncrementProposerPriority(1), + LastValidators: valSet.Copy(), + LastHeightValidatorsChanged: height, + InitialHeight: 1, + } +} + //---------------------------------------------------------------------------- type testApp struct { diff --git a/state/mocks/evidence_pool.go b/state/mocks/evidence_pool.go index 7292991ca..a7e01b41c 100644 --- a/state/mocks/evidence_pool.go +++ b/state/mocks/evidence_pool.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.1.0. DO NOT EDIT. +// Code generated by mockery v1.0.0. DO NOT EDIT. package mocks diff --git a/state/mocks/store.go b/state/mocks/store.go index 17e1ef7b9..6d7316687 100644 --- a/state/mocks/store.go +++ b/state/mocks/store.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.1.0. DO NOT EDIT. +// Code generated by mockery v1.0.0. DO NOT EDIT. package mocks @@ -162,13 +162,13 @@ func (_m *Store) LoadValidators(_a0 int64) (*tenderminttypes.ValidatorSet, error return r0, r1 } -// PruneStates provides a mock function with given fields: _a0, _a1 -func (_m *Store) PruneStates(_a0 int64, _a1 int64) error { - ret := _m.Called(_a0, _a1) +// PruneStates provides a mock function with given fields: _a0 +func (_m *Store) PruneStates(_a0 int64) error { + ret := _m.Called(_a0) var r0 error - if rf, ok := ret.Get(0).(func(int64, int64) error); ok { - r0 = rf(_a0, _a1) + if rf, ok := ret.Get(0).(func(int64) error); ok { + r0 = rf(_a0) } else { r0 = ret.Error(0) } diff --git a/state/store.go b/state/store.go index 4868057ed..4718d8465 100644 --- a/state/store.go +++ b/state/store.go @@ -1,6 +1,7 @@ package state import ( + "bytes" "errors" "fmt" @@ -82,8 +83,8 @@ type Store interface { SaveABCIResponses(int64, *tmstate.ABCIResponses) error // Bootstrap is used for bootstrapping state when not starting from a initial height. Bootstrap(State) error - // PruneStates takes the height from which to start prning and which height stop at - PruneStates(int64, int64) error + // PruneStates takes the height from which to prune up to (exclusive) + PruneStates(int64) error } // dbStore wraps a db (github.com/tendermint/tm-db) @@ -228,132 +229,158 @@ func (store dbStore) Bootstrap(state State) error { return store.db.SetSync(stateKey, state.Bytes()) } -// PruneStates deletes states between the given heights (including from, excluding to). It is not +// PruneStates deletes states up to the height specified (exclusive). It is not // guaranteed to delete all states, since the last checkpointed state and states being pointed to by -// e.g. `LastHeightChanged` must remain. The state at to must also exist. -// -// The from parameter is necessary since we can't do a key scan in a performant way due to the key -// encoding not preserving ordering: https://github.com/tendermint/tendermint/issues/4567 -// This will cause some old states to be left behind when doing incremental partial prunes, -// specifically older checkpoints and LastHeightChanged targets. -func (store dbStore) PruneStates(from int64, to int64) error { - if from <= 0 || to <= 0 { - return fmt.Errorf("from height %v and to height %v must be greater than 0", from, to) - } - if from >= to { - return fmt.Errorf("from height %v must be lower than to height %v", from, to) - } - valInfo, err := loadValidatorsInfo(store.db, to) - if err != nil { - return fmt.Errorf("validators at height %v not found: %w", to, err) - } - paramsInfo, err := store.loadConsensusParamsInfo(to) - if err != nil { - return fmt.Errorf("consensus params at height %v not found: %w", to, err) +// e.g. `LastHeightChanged` must remain. The state at retain height must also exist. +// Pruning is done in descending order. +func (store dbStore) PruneStates(retainHeight int64) error { + if retainHeight <= 0 { + return fmt.Errorf("height %v must be greater than 0", retainHeight) } - keepVals := make(map[int64]bool) - if valInfo.ValidatorSet == nil { - keepVals[valInfo.LastHeightChanged] = true - keepVals[lastStoredHeightFor(to, valInfo.LastHeightChanged)] = true // keep last checkpoint too + if err := store.pruneValidatorSets(retainHeight); err != nil { + return err } - keepParams := make(map[int64]bool) + + if err := store.pruneConsensusParams(retainHeight); err != nil { + return err + } + + if err := store.pruneABCIResponses(retainHeight); err != nil { + return err + } + + return nil +} + +// pruneValidatorSets calls a reverse iterator from base height to retain height (exclusive), deleting +// all validator sets in between. Due to the fact that most validator sets stored reference an earlier +// validator set, it is likely that there will remain one validator set left after pruning. +func (store dbStore) pruneValidatorSets(height int64) error { + valInfo, err := loadValidatorsInfo(store.db, height) + if err != nil { + return fmt.Errorf("validators at height %v not found: %w", height, err) + } + + // We will prune up to the validator set at the given "height". As we don't save validator sets every + // height but only when they change or at a check point, it is likely that the validator set at the height + // we prune to is empty and thus dependent on the validator set saved at a previous height. We must find + // that validator set and make sure it is not pruned. + lastRecordedValSetHeight := lastStoredHeightFor(height, valInfo.LastHeightChanged) + lastRecordedValSet, err := loadValidatorsInfo(store.db, lastRecordedValSetHeight) + if err != nil || lastRecordedValSet.ValidatorSet == nil { + return fmt.Errorf("couldn't find validators at height %d (height %d was originally requested): %w", + lastStoredHeightFor(height, valInfo.LastHeightChanged), + height, + err, + ) + } + + // batch delete all the validators sets up to height + return store.batchDelete( + validatorsKey(1), + validatorsKey(height), + validatorsKey(lastRecordedValSetHeight), + ) +} + +// pruneConsensusParams calls a reverse iterator from base height to retain height batch deleting +// all consensus params in between. If the consensus params at the new base height is dependent +// on a prior height then this will keep that lower height to. +func (store dbStore) pruneConsensusParams(retainHeight int64) error { + paramsInfo, err := store.loadConsensusParamsInfo(retainHeight) + if err != nil { + return fmt.Errorf("consensus params at height %v not found: %w", retainHeight, err) + } + + // As we don't save the consensus params at every height, only when there is a consensus params change, + // we must not prune (or save) the last consensus params that the consensus params info at height + // is dependent on. if paramsInfo.ConsensusParams.Equal(&tmproto.ConsensusParams{}) { - keepParams[paramsInfo.LastHeightChanged] = true + lastRecordedConsensusParams, err := store.loadConsensusParamsInfo(paramsInfo.LastHeightChanged) + if err != nil || lastRecordedConsensusParams.ConsensusParams.Equal(&tmproto.ConsensusParams{}) { + return fmt.Errorf( + "couldn't find consensus params at height %d as last changed from height %d: %w", + paramsInfo.LastHeightChanged, + retainHeight, + err, + ) + } } + // batch delete all the consensus params up to the retain height + return store.batchDelete( + consensusParamsKey(1), + consensusParamsKey(retainHeight), + consensusParamsKey(paramsInfo.LastHeightChanged), + ) +} + +// pruneABCIResponses calls a reverse iterator from base height to retain height batch deleting +// all abci responses in between +func (store dbStore) pruneABCIResponses(height int64) error { + return store.batchDelete(abciResponsesKey(1), abciResponsesKey(height), nil) +} + +// batchDelete is a generic function for deleting a range of keys in reverse order. It will +// skip keys that have been +func (store dbStore) batchDelete(start []byte, end []byte, exception []byte) error { + iter, err := store.db.ReverseIterator(start, end) + if err != nil { + return fmt.Errorf("iterator error: %w", err) + } + defer iter.Close() + batch := store.db.NewBatch() defer batch.Close() - pruned := uint64(0) - // We have to delete in reverse order, to avoid deleting previous heights that have validator - // sets and consensus params that we may need to retrieve. - for h := to - 1; h >= from; h-- { - // For heights we keep, we must make sure they have the full validator set or consensus - // params, otherwise they will panic if they're retrieved directly (instead of - // indirectly via a LastHeightChanged pointer). - if keepVals[h] { - v, err := loadValidatorsInfo(store.db, h) - if err != nil || v.ValidatorSet == nil { - vip, err := store.LoadValidators(h) - if err != nil { - return err - } - - pvi, err := vip.ToProto() - if err != nil { - return err - } - - v.ValidatorSet = pvi - v.LastHeightChanged = h - - bz, err := v.Marshal() - if err != nil { - return err - } - err = batch.Set(validatorsKey(h), bz) - if err != nil { - return err - } - } - } else { - err = batch.Delete(validatorsKey(h)) - if err != nil { - return err - } + pruned := 0 + for iter.Valid() { + key := iter.Key() + if bytes.Equal(key, exception) { + iter.Next() + continue } - if keepParams[h] { - p, err := store.loadConsensusParamsInfo(h) - if err != nil { - return err - } - - if p.ConsensusParams.Equal(&tmproto.ConsensusParams{}) { - p.ConsensusParams, err = store.LoadConsensusParams(h) - if err != nil { - return err - } - - p.LastHeightChanged = h - bz, err := p.Marshal() - if err != nil { - return err - } - - err = batch.Set(consensusParamsKey(h), bz) - if err != nil { - return err - } - } - } else { - err = batch.Delete(consensusParamsKey(h)) - if err != nil { - return err - } + if err := batch.Delete(key); err != nil { + return fmt.Errorf("pruning error at key %X: %w", key, err) } - err = batch.Delete(abciResponsesKey(h)) - if err != nil { - return err - } pruned++ - - // avoid batches growing too large by flushing to database regularly - if pruned%1000 == 0 && pruned > 0 { - err := batch.Write() - if err != nil { + // avoid batches growing too large by flushing to disk regularly + if pruned%1000 == 0 { + if err := iter.Error(); err != nil { return err } - batch.Close() + if err := iter.Close(); err != nil { + return err + } + + if err := batch.Write(); err != nil { + return fmt.Errorf("pruning error at key %X: %w", key, err) + } + if err := batch.Close(); err != nil { + return err + } + + iter, err = store.db.ReverseIterator(start, end) + if err != nil { + return fmt.Errorf("iterator error: %w", err) + } + defer iter.Close() + batch = store.db.NewBatch() defer batch.Close() + } else { + iter.Next() } } - err = batch.WriteSync() - if err != nil { + if err := iter.Error(); err != nil { + return fmt.Errorf("iterator error: %w", err) + } + + if err := batch.WriteSync(); err != nil { return err } diff --git a/state/store_test.go b/state/store_test.go index e43921519..6170f728f 100644 --- a/state/store_test.go +++ b/state/store_test.go @@ -21,31 +21,84 @@ import ( "github.com/tendermint/tendermint/types" ) +const ( + // make sure this is the same as in state/store.go + valSetCheckpointInterval = 100000 +) + +func TestStoreBootstrap(t *testing.T) { + stateDB := dbm.NewMemDB() + stateStore := sm.NewStore(stateDB) + val, _ := types.RandValidator(true, 10) + val2, _ := types.RandValidator(true, 10) + val3, _ := types.RandValidator(true, 10) + vals := types.NewValidatorSet([]*types.Validator{val, val2, val3}) + bootstrapState := makeRandomStateFromValidatorSet(vals, 100, 100) + err := stateStore.Bootstrap(bootstrapState) + require.NoError(t, err) + + // bootstrap should also save the previous validator + _, err = stateStore.LoadValidators(99) + require.NoError(t, err) + + _, err = stateStore.LoadValidators(100) + require.NoError(t, err) + + _, err = stateStore.LoadValidators(101) + require.NoError(t, err) + + state, err := stateStore.Load() + require.NoError(t, err) + require.Equal(t, bootstrapState, state) +} + func TestStoreLoadValidators(t *testing.T) { stateDB := dbm.NewMemDB() stateStore := sm.NewStore(stateDB) val, _ := types.RandValidator(true, 10) - vals := types.NewValidatorSet([]*types.Validator{val}) + val2, _ := types.RandValidator(true, 10) + val3, _ := types.RandValidator(true, 10) + vals := types.NewValidatorSet([]*types.Validator{val, val2, val3}) // 1) LoadValidators loads validators using a height where they were last changed - err := sm.SaveValidatorsInfo(stateDB, 1, 1, vals) + // Note that only the next validators at height h + 1 are saved + err := stateStore.Save(makeRandomStateFromValidatorSet(vals, 1, 1)) require.NoError(t, err) - err = sm.SaveValidatorsInfo(stateDB, 2, 1, vals) + err = stateStore.Save(makeRandomStateFromValidatorSet(vals.CopyIncrementProposerPriority(1), 2, 1)) require.NoError(t, err) - loadedVals, err := stateStore.LoadValidators(2) + loadedVals, err := stateStore.LoadValidators(3) require.NoError(t, err) - assert.NotZero(t, loadedVals.Size()) + require.Equal(t, vals.CopyIncrementProposerPriority(3), loadedVals) // 2) LoadValidators loads validators using a checkpoint height - err = sm.SaveValidatorsInfo(stateDB, sm.ValSetCheckpointInterval, 1, vals) + // add a validator set at the checkpoint + err = stateStore.Save(makeRandomStateFromValidatorSet(vals, valSetCheckpointInterval, 1)) require.NoError(t, err) - loadedVals, err = stateStore.LoadValidators(sm.ValSetCheckpointInterval) + // check that a request will go back to the last checkpoint + _, err = stateStore.LoadValidators(valSetCheckpointInterval + 1) + require.Error(t, err) + require.Equal(t, fmt.Sprintf("couldn't find validators at height %d (height %d was originally requested): "+ + "value retrieved from db is empty", + valSetCheckpointInterval, valSetCheckpointInterval+1), err.Error()) + + // now save a validator set at that checkpoint + err = stateStore.Save(makeRandomStateFromValidatorSet(vals, valSetCheckpointInterval-1, 1)) require.NoError(t, err) - assert.NotZero(t, loadedVals.Size()) + + loadedVals, err = stateStore.LoadValidators(valSetCheckpointInterval) + require.NoError(t, err) + // validator set gets updated with the one given hence we expect it to equal next validators (with an increment of one) + // as opposed to being equal to an increment of 100000 - 1 (if we didn't save at the checkpoint) + require.Equal(t, vals.CopyIncrementProposerPriority(2), loadedVals) + require.NotEqual(t, vals.CopyIncrementProposerPriority(valSetCheckpointInterval), loadedVals) } +// This benchmarks the speed of loading validators from different heights if there is no validator set change. +// NOTE: This isn't too indicative of validator retrieval speed as the db is always (regardless of height) only +// performing two operations: 1) retrieve validator info at height x, which has a last validator set change of 1 +// and 2) retrieve the validator set at the aforementioned height 1. func BenchmarkLoadValidators(b *testing.B) { const valSetSize = 100 @@ -67,9 +120,10 @@ func BenchmarkLoadValidators(b *testing.B) { for i := 10; i < 10000000000; i *= 10 { // 10, 100, 1000, ... i := i - if err := sm.SaveValidatorsInfo(stateDB, - int64(i), state.LastHeightValidatorsChanged, state.NextValidators); err != nil { - b.Fatal(err) + err = stateStore.Save(makeRandomStateFromValidatorSet(state.NextValidators, + int64(i)-1, state.LastHeightValidatorsChanged)) + if err != nil { + b.Fatalf("error saving store: %v", err) } b.Run(fmt.Sprintf("height=%d", i), func(b *testing.B) { @@ -83,25 +137,44 @@ func BenchmarkLoadValidators(b *testing.B) { } } +func TestStoreLoadConsensusParams(t *testing.T) { + stateDB := dbm.NewMemDB() + stateStore := sm.NewStore(stateDB) + err := stateStore.Save(makeRandomStateFromConsensusParams(types.DefaultConsensusParams(), 1, 1)) + require.NoError(t, err) + params, err := stateStore.LoadConsensusParams(1) + require.NoError(t, err) + require.Equal(t, types.DefaultConsensusParams(), ¶ms) + + // we give the state store different params but say that the height hasn't changed, hence + // it should save a pointer to the params at height 1 + differentParams := types.DefaultConsensusParams() + differentParams.Block.MaxBytes = 20000 + err = stateStore.Save(makeRandomStateFromConsensusParams(differentParams, 10, 1)) + require.NoError(t, err) + res, err := stateStore.LoadConsensusParams(10) + require.NoError(t, err) + require.Equal(t, res, params) + require.NotEqual(t, res, differentParams) +} + func TestPruneStates(t *testing.T) { testcases := map[string]struct { makeHeights int64 - pruneFrom int64 - pruneTo int64 + pruneHeight int64 expectErr bool expectVals []int64 expectParams []int64 expectABCI []int64 }{ - "error on pruning from 0": {100, 0, 5, true, nil, nil, nil}, - "error when from > to": {100, 3, 2, true, nil, nil, nil}, - "error when from == to": {100, 3, 3, true, nil, nil, nil}, - "error when to does not exist": {100, 1, 101, true, nil, nil, nil}, - "prune all": {100, 1, 100, false, []int64{93, 100}, []int64{95, 100}, []int64{100}}, - "prune some": {10, 2, 8, false, []int64{1, 3, 8, 9, 10}, - []int64{1, 5, 8, 9, 10}, []int64{1, 8, 9, 10}}, - "prune across checkpoint": {100001, 1, 100001, false, []int64{99993, 100000, 100001}, - []int64{99995, 100001}, []int64{100001}}, + "error when prune height is 0": {100, 0, true, nil, nil, nil}, + "error when prune height is negative": {100, -10, true, nil, nil, nil}, + "error when prune height does not exist": {100, 101, true, nil, nil, nil}, + "prune all": {100, 100, false, []int64{93, 100}, []int64{95, 100}, []int64{100}}, + "prune some": {10, 8, false, []int64{3, 8, 9, 10}, + []int64{5, 8, 9, 10}, []int64{8, 9, 10}}, + "prune across checkpoint": {100002, 100002, false, []int64{100000, 100002}, + []int64{99995, 100002}, []int64{100002}}, } for name, tc := range testcases { tc := tc @@ -158,7 +231,7 @@ func TestPruneStates(t *testing.T) { } // Test assertions - err := stateStore.PruneStates(tc.pruneFrom, tc.pruneTo) + err := stateStore.PruneStates(tc.pruneHeight) if tc.expectErr { require.Error(t, err) return @@ -182,7 +255,7 @@ func TestPruneStates(t *testing.T) { params, err := stateStore.LoadConsensusParams(h) if expectParams[h] { require.NoError(t, err, "params height %v", h) - require.False(t, params.Equal(&tmproto.ConsensusParams{})) + require.False(t, params.Equal(&tmproto.ConsensusParams{}), "params should not be empty") } else { require.Error(t, err, "params height %v", h) } diff --git a/test/maverick/consensus/state.go b/test/maverick/consensus/state.go index 13bc98954..852850425 100644 --- a/test/maverick/consensus/state.go +++ b/test/maverick/consensus/state.go @@ -1559,7 +1559,8 @@ func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { if err != nil { return 0, fmt.Errorf("failed to prune block store: %w", err) } - err = cs.blockExec.Store().PruneStates(base, retainHeight) + + err = cs.blockExec.Store().PruneStates(retainHeight) if err != nil { return 0, fmt.Errorf("failed to prune state database: %w", err) } From 78e8169750758538cbbd2ecb7f74bae91f1ad139 Mon Sep 17 00:00:00 2001 From: Tess Rinearson Date: Sat, 9 Jan 2021 15:05:34 +0100 Subject: [PATCH 6/7] docs: fix broken redirect links (#5881) --- docs/tendermint-core/metrics.md | 2 +- docs/tendermint-core/validators.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tendermint-core/metrics.md b/docs/tendermint-core/metrics.md index fd6ac354f..66a84a740 100644 --- a/docs/tendermint-core/metrics.md +++ b/docs/tendermint-core/metrics.md @@ -4,4 +4,4 @@ order: false # Metrics -This file has moved to the [node section](../node/metrics.md). +This file has moved to the [node section](../nodes/metrics.md). diff --git a/docs/tendermint-core/validators.md b/docs/tendermint-core/validators.md index b662b52d4..a1c8d6ff4 100644 --- a/docs/tendermint-core/validators.md +++ b/docs/tendermint-core/validators.md @@ -4,4 +4,4 @@ order: false # Validators -This file has moved to the [node section](../node/validators.md). +This file has moved to the [node section](../nodes/validators.md). From 1d16e39c0ea72a1e337662c35e4d50641a3df5fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Jan 2021 14:46:30 +0100 Subject: [PATCH 7/7] build(deps): Bump gaurav-nelson/github-action-markdown-link-check (#5884) Bumps [gaurav-nelson/github-action-markdown-link-check](https://github.com/gaurav-nelson/github-action-markdown-link-check) from 1.0.11 to 1.0.12. - [Release notes](https://github.com/gaurav-nelson/github-action-markdown-link-check/releases) - [Commits](https://github.com/gaurav-nelson/github-action-markdown-link-check/compare/1.0.11...0fe4911067fa322422f325b002d2038ba5602170) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/linkchecker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml index d8a1e341d..fac1fde66 100644 --- a/.github/workflows/linkchecker.yml +++ b/.github/workflows/linkchecker.yml @@ -7,6 +7,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - - uses: gaurav-nelson/github-action-markdown-link-check@1.0.11 + - uses: gaurav-nelson/github-action-markdown-link-check@1.0.12 with: folder-path: "docs"