From 523895145dac065d2ee695c52583f5ba8a693fa9 Mon Sep 17 00:00:00 2001 From: Asias He Date: Mon, 11 Mar 2024 15:20:55 +0800 Subject: [PATCH 1/2] repair: Abort load_history process in shutdown If the node is shutting down, there is no point to continue to load the repair history. Refs #17993 --- repair/row_level.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/repair/row_level.cc b/repair/row_level.cc index 0e8efcdc30..4003473e60 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -3251,6 +3251,7 @@ future<> repair_service::load_history() { rlogger.info("Loading repair history for keyspace={}, table={}, table_uuid={}", table->schema()->ks_name(), table->schema()->cf_name(), table_uuid); co_await _sys_ks.local().get_repair_history(table_uuid, [this] (const auto& entry) -> future<> { + get_repair_module().check_in_shutdown(); auto start = entry.range_start == std::numeric_limits::min() ? dht::minimum_token() : dht::token::from_int64(entry.range_start); auto end = entry.range_end == std::numeric_limits::min() ? dht::maximum_token() : dht::token::from_int64(entry.range_end); auto range = dht::token_range(dht::token_range::bound(start, false), dht::token_range::bound(end, true)); From 99b7ccfa8b51c47e5b5b098db169402007794d14 Mon Sep 17 00:00:00 2001 From: Asias He Date: Mon, 25 Mar 2024 15:28:54 +0800 Subject: [PATCH 2/2] repair: Load repair history in background Currently, we load the repair history during boot up. If the number of repair history entries is high, it might take a while to load them. In my test, to load 10M entries, it took around 60 seconds. It is not a must to load the entries during boot up. It is better to load them in the background to speed up the boot time. Fixes #17993 --- repair/row_level.cc | 3 ++- repair/row_level.hh | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/repair/row_level.cc b/repair/row_level.cc index 4003473e60..dbdc3e287a 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -3188,12 +3188,13 @@ repair_service::repair_service(distributed& gossiper, } future<> repair_service::start() { - co_await load_history(); + _load_history_done = load_history(); co_await init_ms_handlers(); } future<> repair_service::stop() { co_await _repair_module->stop(); + co_await std::move(_load_history_done); co_await uninit_ms_handlers(); if (this_shard_id() == 0) { co_await _gossiper.local().unregister_(_gossip_helper); diff --git a/repair/row_level.hh b/repair/row_level.hh index 9c4825956b..c9c72b6ba1 100644 --- a/repair/row_level.hh +++ b/repair/row_level.hh @@ -110,6 +110,8 @@ class repair_service : public seastar::peering_sharded_service { seastar::semaphore _memory_sem; seastar::named_semaphore _load_parallelism_semaphore = {16, named_semaphore_exception_factory{"Load repair history parallelism"}}; + future<> _load_history_done = make_ready_future<>(); + future<> init_ms_handlers(); future<> uninit_ms_handlers();