topology_coordinator: Refresh load stats after table is created or altered

We switched to the size-based load balancing, which now has more strict requirements for load stats. We no longer need only per-node stats (capacity), but also per-tablet stats. Bootstrapping a node triggers stats refresh, but allocating tablets on table creation didn't. So after creating a table, load balancer couldn't make progress for up to 60s (stats refresh period). This makes tests take longer, and can even cause failures if tests are using a low-enough timeout. Fixes #27921
2026-04-29 12:47:02 +00:00 · 2025-12-30 20:44:36 +01:00
parent 663831ebd7
commit 2b7aa3211d
1 changed files with 33 additions and 4 deletions
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -119,7 +119,8 @@ sstring get_application_state_gently(const gms::application_state_map& epmap, gm
 }
 } // namespace

-class topology_coordinator : public endpoint_lifecycle_subscriber {
+class topology_coordinator : public endpoint_lifecycle_subscriber
+                           , public migration_listener::empty_listener {
    sharded<db::system_distributed_keyspace>& _sys_dist_ks;
    gms::gossiper& _gossiper;
    netw::messaging_service& _messaging;
@@ -2296,6 +2297,30 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
        });
    }

+    virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {
+        // New tablets were allocated, we need per-tablet stats for them for tablet balancer to make progress.
+        trigger_load_stats_refresh();
+    }
+
+    virtual void on_create_view(const sstring& ks_name, const sstring& view_name) override {
+        trigger_load_stats_refresh();
+    }
+
+    virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) override {
+        // Tablet hints may have changed. Wake up so that load balancer re-evaluates tablet distribution.
+        _topo_sm.event.broadcast();
+    }
+
+    virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
+        // Tablet distribution has changed. Wake up the load balancer.
+        _topo_sm.event.broadcast();
+    }
+
+    virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override {
+        // Tablet distribution has changed. Wake up the load balancer.
+        _topo_sm.event.broadcast();
+    }
+
    future<> cancel_all_requests(group0_guard guard, std::unordered_set<raft::server_id> dead_nodes) {
        utils::chunked_vector<canonical_mutation> muts;
        std::vector<raft::server_id> reject_join;
@@ -3630,15 +3655,17 @@ public:
        , _voter_handler(group0, topo_sm._topology, gossiper, feature_service)
        , _topology_cmd_rpc_tracker(topology_cmd_rpc_tracker)
        , _async_gate("topology_coordinator")
-    {}
+    {
+        _db.get_notifier().register_listener(this);
+    }

    // Returns true if the upgrade was done, returns false if upgrade was interrupted.
    future<bool> maybe_run_upgrade();
    future<> run();
    future<> stop();

-    virtual void on_up(const gms::inet_address& endpoint, locator::host_id hid) { _topo_sm.event.broadcast(); };
-    virtual void on_down(const gms::inet_address& endpoint, locator::host_id hid) { _topo_sm.event.broadcast(); };
+    virtual void on_up(const gms::inet_address& endpoint, locator::host_id hid) override { _topo_sm.event.broadcast(); };
+    virtual void on_down(const gms::inet_address& endpoint, locator::host_id hid) override { _topo_sm.event.broadcast(); };

 private:
    tablet_ops_metrics _tablet_ops_metrics;
@@ -4225,6 +4252,8 @@ future<> topology_coordinator::run() {
 }

 future<> topology_coordinator::stop() {
+    co_await _db.get_notifier().unregister_listener(this);
+
    // if topology_coordinator::run() is aborted either because we are not a
    // leader anymore, or we are shutting down as a leader, we have to handle
    // futures in _tablets in case any of them failed, before these failures