p2p: file descriptor leaks (#3150)

* close peer's connection to avoid fd leak

Fixes #2967

* rename peer#Addr to RemoteAddr

* fix test

* fixes after Ethan's review

* bring back the check

* changelog entry

* write a test for switch#acceptRoutine

* increase timeouts? :(

* remove extra assertNPeersWithTimeout

* simplify test

* assert number of peers (just to be safe)

* Cleanup in OnStop

* run tests with verbose flag on CircleCI

* spawn a reading routine to prevent connection from closing

* get port from the listener

random port is faster, but often results in

```
panic: listen tcp 127.0.0.1:44068: bind: address already in use [recovered]
        panic: listen tcp 127.0.0.1:44068: bind: address already in use

goroutine 79 [running]:
testing.tRunner.func1(0xc0001bd600)
        /usr/local/go/src/testing/testing.go:792 +0x387
panic(0x974d20, 0xc0001b0500)
        /usr/local/go/src/runtime/panic.go:513 +0x1b9
github.com/tendermint/tendermint/p2p.MakeSwitch(0xc0000f42a0, 0x0, 0x9fb9cc, 0x9, 0x9fc346, 0xb, 0xb42128, 0x0, 0x0, 0x0, ...)
        /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:182 +0xa28
github.com/tendermint/tendermint/p2p.MakeConnectedSwitches(0xc0000f42a0, 0x2, 0xb42128, 0xb41eb8, 0x4f1205, 0xc0001bed80, 0x4f16ed)
        /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/test_util.go:75 +0xf9
github.com/tendermint/tendermint/p2p.MakeSwitchPair(0xbb8d20, 0xc0001bd600, 0xb42128, 0x2f7, 0x4f16c0)
        /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:94 +0x4c
github.com/tendermint/tendermint/p2p.TestSwitches(0xc0001bd600)
        /home/vagrant/go/src/github.com/tendermint/tendermint/p2p/switch_test.go:117 +0x58
testing.tRunner(0xc0001bd600, 0xb42038)
        /usr/local/go/src/testing/testing.go:827 +0xbf
created by testing.(*T).Run
        /usr/local/go/src/testing/testing.go:878 +0x353
exit status 2
FAIL    github.com/tendermint/tendermint/p2p    0.350s
```
This commit is contained in:
Anton Kaliaev
2019-01-22 22:23:18 +04:00
committed by Ethan Buchman
parent 3362da0a69
commit 2449bf7300
12 changed files with 170 additions and 42 deletions

View File

@@ -210,6 +210,7 @@ func (sw *Switch) OnStart() error {
func (sw *Switch) OnStop() {
// Stop peers
for _, p := range sw.peers.List() {
sw.transport.Cleanup(p)
p.Stop()
if sw.peers.Remove(p) {
sw.metrics.Peers.Add(float64(-1))
@@ -304,6 +305,7 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
if sw.peers.Remove(peer) {
sw.metrics.Peers.Add(float64(-1))
}
sw.transport.Cleanup(peer)
peer.Stop()
for _, reactor := range sw.reactors {
reactor.RemovePeer(peer, reason)
@@ -529,13 +531,16 @@ func (sw *Switch) acceptRoutine() {
"max", sw.config.MaxNumInboundPeers,
)
_ = p.Stop()
sw.transport.Cleanup(p)
continue
}
if err := sw.addPeer(p); err != nil {
_ = p.Stop()
sw.transport.Cleanup(p)
if p.IsRunning() {
_ = p.Stop()
}
sw.Logger.Info(
"Ignoring inbound connection: error while adding peer",
"err", err,
@@ -593,7 +598,10 @@ func (sw *Switch) addOutboundPeerWithConfig(
}
if err := sw.addPeer(p); err != nil {
_ = p.Stop()
sw.transport.Cleanup(p)
if p.IsRunning() {
_ = p.Stop()
}
return err
}
@@ -628,7 +636,8 @@ func (sw *Switch) filterPeer(p Peer) error {
return nil
}
// addPeer starts up the Peer and adds it to the Switch.
// addPeer starts up the Peer and adds it to the Switch. Error is returned if
// the peer is filtered out or failed to start or can't be added.
func (sw *Switch) addPeer(p Peer) error {
if err := sw.filterPeer(p); err != nil {
return err
@@ -636,11 +645,15 @@ func (sw *Switch) addPeer(p Peer) error {
p.SetLogger(sw.Logger.With("peer", p.NodeInfo().NetAddress()))
// All good. Start peer
// Handle the shut down case where the switch has stopped but we're
// concurrently trying to add a peer.
if sw.IsRunning() {
// All good. Start peer
if err := sw.startInitPeer(p); err != nil {
return err
}
} else {
sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
}
// Add the peer to .peers.