From 523895145dac065d2ee695c52583f5ba8a693fa9 Mon Sep 17 00:00:00 2001
From: Asias He <asias@scylladb.com>
Date: Mon, 11 Mar 2024 15:20:55 +0800
Subject: [PATCH 1/2] repair: Abort load_history process in shutdown

If the node is shutting down, there is no point to continue to load the
repair history.

Refs #17993
---
 repair/row_level.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/repair/row_level.cc b/repair/row_level.cc
index 0e8efcdc30..4003473e60 100644
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -3251,6 +3251,7 @@ future<> repair_service::load_history() {
         rlogger.info("Loading repair history for keyspace={}, table={}, table_uuid={}",
                 table->schema()->ks_name(), table->schema()->cf_name(), table_uuid);
         co_await _sys_ks.local().get_repair_history(table_uuid, [this] (const auto& entry) -> future<> {
+            get_repair_module().check_in_shutdown();
             auto start = entry.range_start == std::numeric_limits<int64_t>::min() ? dht::minimum_token() : dht::token::from_int64(entry.range_start);
             auto end = entry.range_end == std::numeric_limits<int64_t>::min() ? dht::maximum_token() : dht::token::from_int64(entry.range_end);
             auto range = dht::token_range(dht::token_range::bound(start, false), dht::token_range::bound(end, true));

From 99b7ccfa8b51c47e5b5b098db169402007794d14 Mon Sep 17 00:00:00 2001
From: Asias He <asias@scylladb.com>
Date: Mon, 25 Mar 2024 15:28:54 +0800
Subject: [PATCH 2/2] repair: Load repair history in background

Currently, we load the repair history during boot up. If the number of
repair history entries is high, it might take a while to load them.

In my test, to load 10M entries, it took around 60 seconds.

It is not a must to load the entries during boot up. It is better to
load them in the background to speed up the boot time.

Fixes #17993
---
 repair/row_level.cc | 3 ++-
 repair/row_level.hh | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/repair/row_level.cc b/repair/row_level.cc
index 4003473e60..dbdc3e287a 100644
--- a/repair/row_level.cc
+++ b/repair/row_level.cc
@@ -3188,12 +3188,13 @@ repair_service::repair_service(distributed<gms::gossiper>& gossiper,
 }
 
 future<> repair_service::start() {
-    co_await load_history();
+    _load_history_done = load_history();
     co_await init_ms_handlers();
 }
 
 future<> repair_service::stop() {
     co_await _repair_module->stop();
+    co_await std::move(_load_history_done);
     co_await uninit_ms_handlers();
     if (this_shard_id() == 0) {
         co_await _gossiper.local().unregister_(_gossip_helper);
diff --git a/repair/row_level.hh b/repair/row_level.hh
index 9c4825956b..c9c72b6ba1 100644
--- a/repair/row_level.hh
+++ b/repair/row_level.hh
@@ -110,6 +110,8 @@ class repair_service : public seastar::peering_sharded_service<repair_service> {
     seastar::semaphore _memory_sem;
     seastar::named_semaphore _load_parallelism_semaphore = {16, named_semaphore_exception_factory{"Load repair history parallelism"}};
 
+    future<> _load_history_done = make_ready_future<>();
+
     future<> init_ms_handlers();
     future<> uninit_ms_handlers();