From 74a3fa9671104e559f70a4cdc2a460a76cd52a15 Mon Sep 17 00:00:00 2001 From: Michael Litvak Date: Wed, 25 Jun 2025 14:10:52 +0300 Subject: [PATCH] batchlog_manager: set timeout on writes Set a timeout on writes of replayed batches by the batchlog manager. We want to avoid having infinite timeout for the writes in case it gets stuck for some unexpected reason. The timeout is set to be high enough to allow any reasonable write to complete. --- db/batchlog_manager.cc | 3 ++- db/batchlog_manager.hh | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/db/batchlog_manager.cc b/db/batchlog_manager.cc index 39c30a3514..33f18c6e44 100644 --- a/db/batchlog_manager.cc +++ b/db/batchlog_manager.cc @@ -244,7 +244,8 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle // send to partially or wholly fail in actually sending stuff. Since we don't // have hints (yet), send with CL=ALL, and hope we can re-do this soon. // See below, we use retry on write failure. - return _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), db::no_timeout); + auto timeout = db::timeout_clock::now() + write_timeout; + return _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout); }); }).then_wrapped([this, id](future<> batch_result) { try { diff --git a/db/batchlog_manager.hh b/db/batchlog_manager.hh index 6f54e32032..e66708e4a0 100644 --- a/db/batchlog_manager.hh +++ b/db/batchlog_manager.hh @@ -45,6 +45,7 @@ public: private: static constexpr uint32_t replay_interval = 60 * 1000; // milliseconds static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size. + static constexpr std::chrono::seconds write_timeout = std::chrono::seconds(300); using clock_type = lowres_clock;