range_streamer: Disable restream logic
Consider: - n1 and n2 in the cluster - n3 bootstraps to join - n1 does not hear gossip update from n3 due to network issue - n1 removes n3 from gossip and pending node list - stream between n1 and n3 fails - n1 and n3 network issue is fixed - n3 retry the stream with n1 - n3 finishes the stream with n1 - n3 advertises normal to join the cluster The problem is that n1 will not treat n3 as the pending node so writes will not route to n3 once n1 removes n3. Another problem is that when n1 gets normal gossip status update from n3. The gossip listener will fail because n1 has removed n3 so n1 could not find the host id for n3. This will cause n1 to abort. To fix, disable the retry logic in range_streamer so that once a stream with existing fails the bootstrap fails. The downside is that we lose the ability to restream caused by temporary network issue but since we have repair based node operation. We can use it to resume the previous failed node operations. Fixes: #9805 Closes #9806
This commit is contained in:
@@ -251,25 +251,6 @@ future<> range_streamer::add_ranges(const sstring& keyspace_name, dht::token_ran
|
||||
}
|
||||
|
||||
future<> range_streamer::stream_async() {
|
||||
return seastar::async([this] {
|
||||
int sleep_time = 60;
|
||||
for (;;) {
|
||||
try {
|
||||
do_stream_async().get();
|
||||
break;
|
||||
} catch (...) {
|
||||
logger.warn("{} failed to stream. Will retry in {} seconds ...", _description, sleep_time);
|
||||
sleep_abortable(std::chrono::seconds(sleep_time), _abort_source).get();
|
||||
sleep_time *= 1.5;
|
||||
if (++_nr_retried >= _nr_max_retry) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<> range_streamer::do_stream_async() {
|
||||
auto nr_ranges_remaining = nr_ranges_to_stream();
|
||||
logger.info("{} starts, nr_ranges_remaining={}", _description, nr_ranges_remaining);
|
||||
auto start = lowres_clock::now();
|
||||
|
||||
@@ -142,7 +142,6 @@ private:
|
||||
}
|
||||
public:
|
||||
future<> stream_async();
|
||||
future<> do_stream_async();
|
||||
size_t nr_ranges_to_stream();
|
||||
private:
|
||||
distributed<replica::database>& _db;
|
||||
@@ -155,9 +154,6 @@ private:
|
||||
streaming::stream_reason _reason;
|
||||
std::unordered_multimap<sstring, std::unordered_map<inet_address, dht::token_range_vector>> _to_stream;
|
||||
std::unordered_set<std::unique_ptr<i_source_filter>> _source_filters;
|
||||
// Retry the stream plan _nr_max_retry times
|
||||
unsigned _nr_retried = 0;
|
||||
unsigned _nr_max_retry = 5;
|
||||
// Number of tx and rx ranges added
|
||||
unsigned _nr_tx_added = 0;
|
||||
unsigned _nr_rx_added = 0;
|
||||
|
||||
Reference in New Issue
Block a user