From 132aa753daa83352dfcd2063956affffdda7e889 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 16 Dec 2025 13:56:34 +0300 Subject: [PATCH] sstables/storage_manager: Fix configured endpoints observer On start the manager creates observer for object_storage_endpoints config parameter. The goal is to refresh the maintained set of endpoint parameters and client upon config change. The observer is created on shard 0 only, and when kicked it calls manager.invoke-on-all to update manager on all shards. However, there's a race here. The thing is that db::config values are implicitly "sharded" under the hood with the help of plain array. When any code tries to read a value from db::config::something, the reading code secretly gets the value from this inner array indexed by the current shard id. Next, when the config is updated, it first assigns new values to [0] element of the hidden array, then calls broadcast_to_all_shards() helper that copies the valaues from zeroth slot to all the others. But the manager's observer is triggered when the new value is assigned on zero index, and if the invoke-on-all lambda (mentioned above) happens to be faster than broadcast_to_all_shards, the non-zero shards will read old values from db::config's inner array. The fix is to instantiate observer on all shards and update only local shard, whenever this update is triggered. Signed-off-by: Pavel Emelyanov --- sstables/sstables_manager.cc | 6 ++---- test/cluster/object_store/test_backup.py | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sstables/sstables_manager.cc b/sstables/sstables_manager.cc index 2c4dc8e811..4d2fe4c223 100644 --- a/sstables/sstables_manager.cc +++ b/sstables/sstables_manager.cc @@ -79,7 +79,7 @@ storage_manager::object_storage_endpoint::object_storage_endpoint(db::object_sto storage_manager::storage_manager(const db::config& cfg, config stm_cfg) : _object_storage_clients_memory(stm_cfg.object_storage_clients_memory) - , _config_updater(this_shard_id() == 0 ? std::make_unique(cfg, *this) : nullptr) + , _config_updater(std::make_unique(cfg, *this)) { for (auto& e : cfg.object_storage_endpoints()) { _object_storage_endpoints.emplace(std::make_pair(e.key(), e)); @@ -170,9 +170,7 @@ std::vector storage_manager::endpoints(sstring type) const noexcept { storage_manager::config_updater::config_updater(const db::config& cfg, storage_manager& sstm) : action([&sstm, &cfg] () mutable { - return sstm.container().invoke_on_all([&cfg](auto& sstm) -> future<> { - co_await sstm.update_config(cfg); - }); + return sstm.update_config(cfg); }) , observer(cfg.object_storage_endpoints.observe(action.make_observer())) {} diff --git a/test/cluster/object_store/test_backup.py b/test/cluster/object_store/test_backup.py index 601963bfb7..0b92c478b8 100644 --- a/test/cluster/object_store/test_backup.py +++ b/test/cluster/object_store/test_backup.py @@ -166,7 +166,6 @@ async def test_backup_with_non_existing_parameters(manager: ManagerClient, objec @pytest.mark.asyncio -@pytest.mark.xfail async def test_backup_endpoint_config_is_live_updateable(manager: ManagerClient, object_storage): '''backup should fail if the endpoint is invalid/inaccessible after updating the config, it should succeed'''