From 1bb7e31d63e72e1017ac68a61a498b22a137a028 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Tue, 4 Dec 2018 19:16:06 -0500 Subject: [PATCH] p2p: panic on transport error (#2968) * p2p: panic on transport error Addresses #2823. Currently, the acceptRoutine exits if the transport returns an error trying to accept a new connection. Once this happens, the node can't accept any new connections. So here, we panic instead. While we could potentially be more intelligent by rerunning the acceptRoutine, the error may indicate something more fundamental (eg. file desriptor limit) that requires a restart anyways. We can leave it to process managers to handle that restart, and notify operators about the panic. * changelog --- CHANGELOG.md | 2 ++ p2p/switch.go | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce6f2f6dd..47528bce9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,8 @@ key types that can be used by validators. - keep accums averaged near 0 - [types] [\#2941](https://github.com/tendermint/tendermint/issues/2941) Preserve val.Accum during ValidatorSet.Update to avoid it being reset to 0 every time a validator is updated +- [p2p] \#2968 Panic on transport error rather than continuing to run but not + accept new connections ## v0.26.4 diff --git a/p2p/switch.go b/p2p/switch.go index 4996ebd91..eece71316 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -505,6 +505,12 @@ func (sw *Switch) acceptRoutine() { "err", err, "numPeers", sw.peers.Size(), ) + // We could instead have a retry loop around the acceptRoutine, + // but that would need to stop and let the node shutdown eventually. + // So might as well panic and let process managers restart the node. + // There's no point in letting the node run without the acceptRoutine, + // since it won't be able to accept new connections. + panic(fmt.Errorf("accept routine exited: %v", err)) } break