mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
service: Improve error handling for split
Retry wasn't really happening since the loop was broken and sleep part was skipped on error. Also, we were treating abort of split during shutdown as if it were an actual error and that confused longevity tests that parse for logs with error level. The fix is about demoting the level of logs when we know the exception comes from shutdown. Fixes #20890.
This commit is contained in:
@@ -5386,9 +5386,9 @@ future<> storage_service::process_tablet_split_candidate(table_id table) noexcep
|
||||
};
|
||||
|
||||
exponential_backoff_retry split_retry = exponential_backoff_retry(std::chrono::seconds(5), std::chrono::seconds(300));
|
||||
bool sleep = false;
|
||||
|
||||
while (!_async_gate.is_closed() && !_group0_as.abort_requested()) {
|
||||
bool sleep = false;
|
||||
try {
|
||||
// Ensures that latest changes to tablet metadata, in group0, are visible
|
||||
auto guard = co_await _group0->client().start_operation(_group0_as);
|
||||
@@ -5406,11 +5406,16 @@ future<> storage_service::process_tablet_split_candidate(table_id table) noexcep
|
||||
release_guard(std::move(guard));
|
||||
co_await split_all_compaction_groups();
|
||||
}
|
||||
} catch (const seastar::abort_requested_exception& ex) {
|
||||
slogger.warn("Failed to complete splitting of table {} due to {}", table, ex);
|
||||
break;
|
||||
} catch (raft::request_aborted& ex) {
|
||||
slogger.warn("Failed to complete splitting of table {} due to {}", table, ex);
|
||||
break;
|
||||
} catch (...) {
|
||||
slogger.error("Failed to complete splitting of table {} due to {}, retrying after {} seconds",
|
||||
table, std::current_exception(), split_retry.sleep_time());
|
||||
sleep = true;
|
||||
break;
|
||||
}
|
||||
if (sleep) {
|
||||
co_await split_retry.retry(_group0_as);
|
||||
|
||||
Reference in New Issue
Block a user