Fix stack-use-after-return in client_routes delete/set functions

Change lambda captures in set_client_routes and delete_client_routes to move route_keys/route_entries into the inner lambda instead of capturing by reference. This prevents use-after-return when the with_retry coroutine suspends. Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
Initial plan
2025-12-21 07:54:34 +00:00 · 2025-12-21 07:50:24 +00:00 · 2025-12-19 12:53:40 +01:00 · 2025-12-19 12:30:00 +02:00 · 2025-12-19 10:54:14 +02:00 · 2025-12-19 09:55:31 +02:00
127 changed files with 3808 additions and 811 deletions
--- a/.github/workflows/call_sync_milestone_to_jira.yml
+++ b/.github/workflows/call_sync_milestone_to_jira.yml
@@ -0,0 +1,14 @@
+name: Call Jira release creation for new milestone
+
+on:
+  milestone:
+    types: [created]
+
+jobs:
+  sync-milestone-to-jira:
+    uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
+    with:
+      # Comma-separated list of Jira project keys
+      jira_project_keys: "SCYLLADB,CUSTOMER"
+    secrets:
+      caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
--- a/api/CMakeLists.txt
+++ b/api/CMakeLists.txt
@@ -31,6 +31,7 @@ set(swagger_files
  api-doc/column_family.json
  api-doc/commitlog.json
  api-doc/compaction_manager.json
+  api-doc/client_routes.json
  api-doc/config.json
  api-doc/cql_server_test.json
  api-doc/endpoint_snitch_info.json
@@ -68,6 +69,7 @@ target_sources(api
  PRIVATE
    api.cc
    cache_service.cc
+    client_routes.cc
    collectd.cc
    column_family.cc
    commitlog.cc
--- a/api/api-doc/client_routes.def.json
+++ b/api/api-doc/client_routes.def.json
@@ -0,0 +1,23 @@
+    , "client_routes_entry": {
+        "id": "client_routes_entry",
+        "summary": "An entry storing client routes",
+        "properties": {
+            "connection_id": {"type": "string"},
+            "host_id": {"type": "string", "format": "uuid"},
+            "address": {"type": "string"},
+            "port": {"type": "integer"},
+            "tls_port": {"type": "integer"},
+            "alternator_port": {"type": "integer"},
+            "alternator_https_port": {"type": "integer"}
+        },
+        "required": ["connection_id", "host_id", "address"]
+    }
+    , "client_routes_key": {
+        "id": "client_routes_key",
+        "summary": "A key of client_routes_entry",
+        "properties": {
+            "connection_id": {"type": "string"},
+            "host_id": {"type": "string", "format": "uuid"}
+        }
+    }
+
--- a/api/api-doc/client_routes.json
+++ b/api/api-doc/client_routes.json
@@ -0,0 +1,74 @@
+    , "/v2/client-routes":{
+        "get": {
+            "description":"List all client route entries",
+            "operationId":"get_client_routes",
+            "tags":["client_routes"],
+            "produces":[
+                "application/json"
+            ],
+            "parameters":[],
+            "responses":{
+                "200":{
+                    "schema":{
+                        "type":"array",
+                        "items":{ "$ref":"#/definitions/client_routes_entry" }
+                    }
+                },
+                "default":{
+                    "description":"unexpected error",
+                    "schema":{"$ref":"#/definitions/ErrorModel"}
+                }
+            }
+        },
+        "post": {
+            "description":"Upsert one or more client route entries",
+            "operationId":"set_client_routes",
+            "tags":["client_routes"],
+            "parameters":[
+                {
+                    "name":"body",
+                    "in":"body",
+                    "required":true,
+                    "schema":{
+                        "type":"array",
+                        "items":{ "$ref":"#/definitions/client_routes_entry" }
+                    }
+                }
+            ],
+            "responses":{
+                "200":{ "description": "OK" },
+                "default":{
+                    "description":"unexpected error",
+                    "schema":{ "$ref":"#/definitions/ErrorModel" }
+                }
+            }
+        },
+        "delete": {
+            "description":"Delete one or more client route entries",
+            "operationId":"delete_client_routes",
+            "tags":["client_routes"],
+            "parameters":[
+                {
+                    "name":"body",
+                    "in":"body",
+                    "required":true,
+                    "schema":{
+                        "type":"array",
+                        "items":{ "$ref":"#/definitions/client_routes_key" }
+                    }
+                }
+            ],
+            "responses":{
+                "200":{
+                    "description": "OK"
+                },
+                "default":{
+                    "description":"unexpected error",
+                    "schema":{
+                        "$ref":"#/definitions/ErrorModel"
+                    }
+                }
+            }
+        }
+    }
+
--- a/api/api.cc
+++ b/api/api.cc
@@ -37,6 +37,7 @@
 #include "raft.hh"
 #include "gms/gossip_address_map.hh"
 #include "service_levels.hh"
+#include "client_routes.hh"

 logging::logger apilog("api");

@@ -67,9 +68,11 @@ future<> set_server_init(http_context& ctx) {
        rb02->set_api_doc(r);
        rb02->register_api_file(r, "swagger20_header");
        rb02->register_api_file(r, "metrics");
+        rb02->register_api_file(r, "client_routes");
        rb->register_function(r, "system",
                "The system related API");
        rb02->add_definitions_file(r, "metrics");
+        rb02->add_definitions_file(r, "client_routes");
        set_system(ctx, r);
        rb->register_function(r, "error_injection",
            "The error injection API");
@@ -129,6 +132,16 @@ future<> unset_server_storage_service(http_context& ctx) {
    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
 }

+future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr) {
+    return ctx.http_server.set_routes([&ctx, &cr] (routes& r) {
+        set_client_routes(ctx, r, cr);
+    });
+}
+
+future<> unset_server_client_routes(http_context& ctx) {
+    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_client_routes(ctx, r); });
+}
+
 future<> set_load_meter(http_context& ctx, service::load_meter& lm) {
    return ctx.http_server.set_routes([&ctx, &lm] (routes& r) { set_load_meter(ctx, r, lm); });
 }
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -29,6 +29,7 @@ class storage_proxy;
 class storage_service;
 class raft_group0_client;
 class raft_group_registry;
+class client_routes_service;

 } // namespace service

@@ -99,6 +100,8 @@ future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snit
 future<> unset_server_snitch(http_context& ctx);
 future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
 future<> unset_server_storage_service(http_context& ctx);
+future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
+future<> unset_server_client_routes(http_context& ctx);
 future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
 future<> unset_server_sstables_loader(http_context& ctx);
 future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);
--- a/api/client_routes.cc
+++ b/api/client_routes.cc
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ *
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+ #include <seastar/http/short_streams.hh>
+
+#include "client_routes.hh"
+#include "api/api.hh"
+#include "service/storage_service.hh"
+#include "service/client_routes.hh"
+#include "utils/rjson.hh"
+
+
+#include "api/api-doc/client_routes.json.hh"
+
+using namespace seastar::httpd;
+using namespace std::chrono_literals;
+using namespace json;
+
+extern logging::logger apilog;
+
+namespace api {
+
+static void validate_client_routes_endpoint(sharded<service::client_routes_service>& cr, sstring endpoint_name) {
+    if (!cr.local().get_feature_service().client_routes) {
+        apilog.warn("{}: called before the cluster feature was enabled", endpoint_name);
+        throw std::runtime_error(fmt::format("{} requires all nodes to support the CLIENT_ROUTES cluster feature", endpoint_name));
+    }
+}
+
+static sstring parse_string(const char* name, rapidjson::Value const& v) {
+    const auto it = v.FindMember(name);
+    if (it == v.MemberEnd()) {
+        throw bad_param_exception(fmt::format("Missing '{}'", name));
+    }
+    if (!it->value.IsString()) {
+        throw bad_param_exception(fmt::format("'{}' must be a string", name));
+    }
+    return {it->value.GetString(), it->value.GetStringLength()};
+}
+
+static std::optional<uint32_t> parse_port(const char* name, rapidjson::Value const& v) {
+    const auto it = v.FindMember(name);
+    if (it == v.MemberEnd()) {
+        return std::nullopt;
+    }
+    if (!it->value.IsInt()) {
+        throw bad_param_exception(fmt::format("'{}' must be an integer", name));
+    }
+    auto port = it->value.GetInt();
+    if (port < 1 || port > 65535) {
+        throw bad_param_exception(fmt::format("'{}' value={} is outside the allowed port range", name, port));
+    }
+    return port;
+}
+
+static std::vector<service::client_routes_service::client_route_entry> parse_set_client_array(const rapidjson::Document& root) {
+    if (!root.IsArray()) {
+        throw bad_param_exception("Body must be a JSON array");
+    }
+
+    std::vector<service::client_routes_service::client_route_entry> v;
+    v.reserve(root.GetArray().Size());
+    for (const auto& element : root.GetArray()) {
+        if (!element.IsObject()) { throw bad_param_exception("Each element must be object"); }
+
+        const auto port = parse_port("port", element);
+        const auto tls_port = parse_port("tls_port", element);
+        const auto alternator_port = parse_port("alternator_port", element);
+        const auto alternator_https_port = parse_port("alternator_https_port", element);
+
+        if (!port.has_value() && !tls_port.has_value() && !alternator_port.has_value() && !alternator_https_port.has_value()) {
+            throw bad_param_exception("At least one port field ('port', 'tls_port', 'alternator_port', 'alternator_https_port') must be specified");
+        }
+
+        v.emplace_back(
+            parse_string("connection_id", element),
+            utils::UUID{parse_string("host_id", element)},
+            parse_string("address", element),
+            port,
+            tls_port,
+            alternator_port,
+            alternator_https_port
+        );
+    }
+
+    return v;
+}
+
+static
+future<json::json_return_type>
+rest_set_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
+    validate_client_routes_endpoint(cr, "rest_set_client_routes");
+
+    rapidjson::Document root;
+    auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
+    root.Parse(content.c_str());
+    const auto route_entries = parse_set_client_array(root);
+
+    co_await cr.local().set_client_routes(route_entries);
+    co_return seastar::json::json_void();
+}
+
+static std::vector<service::client_routes_service::client_route_key> parse_delete_client_array(const rapidjson::Document& root) {
+    if (!root.IsArray()) {
+        throw bad_param_exception("Body must be a JSON array");
+    }
+
+    std::vector<service::client_routes_service::client_route_key> v;
+    v.reserve(root.GetArray().Size());
+    for (const auto& element : root.GetArray()) {
+        v.emplace_back(
+            parse_string("connection_id", element),
+            utils::UUID{parse_string("host_id", element)}
+        );
+    }
+
+    return v;
+}
+
+static
+future<json::json_return_type>
+rest_delete_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
+    validate_client_routes_endpoint(cr, "delete_client_routes");
+
+    rapidjson::Document root;
+    auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
+    root.Parse(content.c_str());
+
+    const auto route_keys = parse_delete_client_array(root);
+    co_await cr.local().delete_client_routes(route_keys);
+    co_return seastar::json::json_void();
+}
+
+static
+future<json::json_return_type>
+rest_get_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
+    validate_client_routes_endpoint(cr, "get_client_routes");
+
+    co_return co_await cr.invoke_on(0, [] (service::client_routes_service& cr) -> future<json::json_return_type> {
+        co_return json::json_return_type(stream_range_as_array(co_await cr.get_client_routes(), [](const service::client_routes_service::client_route_entry & entry) {
+            seastar::httpd::client_routes_json::client_routes_entry obj;
+            obj.connection_id = entry.connection_id;
+            obj.host_id = fmt::to_string(entry.host_id);
+            obj.address = entry.address;
+            if (entry.port.has_value()) { obj.port = entry.port.value(); }
+            if (entry.tls_port.has_value()) { obj.tls_port = entry.tls_port.value(); }
+            if (entry.alternator_port.has_value()) { obj.alternator_port = entry.alternator_port.value(); }
+            if (entry.alternator_https_port.has_value()) { obj.alternator_https_port = entry.alternator_https_port.value(); }
+            return obj;
+        }));
+    });
+}
+
+void set_client_routes(http_context& ctx, routes& r, sharded<service::client_routes_service>& cr) {
+    seastar::httpd::client_routes_json::set_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
+        return rest_set_client_routes(ctx, cr, std::move(req));
+    });
+    seastar::httpd::client_routes_json::delete_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
+        return rest_delete_client_routes(ctx, cr, std::move(req));
+    });
+    seastar::httpd::client_routes_json::get_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
+        return rest_get_client_routes(ctx, cr, std::move(req));
+    });
+}
+
+void unset_client_routes(http_context& ctx, routes& r) {
+    seastar::httpd::client_routes_json::set_client_routes.unset(r);
+    seastar::httpd::client_routes_json::delete_client_routes.unset(r);
+    seastar::httpd::client_routes_json::get_client_routes.unset(r);
+}
+
+}
--- a/api/client_routes.hh
+++ b/api/client_routes.hh
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ *
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+#pragma once
+
+#include <seastar/core/sharded.hh>
+#include <seastar/json/json_elements.hh>
+#include "api/api_init.hh"
+
+namespace api {
+
+void set_client_routes(http_context& ctx, httpd::routes& r, sharded<service::client_routes_service>& cr);
+void unset_client_routes(http_context& ctx, httpd::routes& r);
+
+}
--- a/configure.py
+++ b/configure.py
@@ -1158,6 +1158,7 @@ scylla_core = (['message/messaging_service.cc',
                'locator/topology.cc',
                'locator/util.cc',
                'service/client_state.cc',
+                'service/client_routes.cc',
                'service/storage_service.cc',
                'service/session.cc',
                'service/task_manager_module.cc',
@@ -1318,6 +1319,8 @@ api = ['api/api.cc',
       'api/storage_proxy.cc',
       Json2Code('api/api-doc/cache_service.json'),
       'api/cache_service.cc',
+       Json2Code('api/api-doc/client_routes.json'),
+       'api/client_routes.cc',
       Json2Code('api/api-doc/collectd.json'),
       'api/collectd.cc',
       Json2Code('api/api-doc/endpoint_snitch_info.json'),
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -64,6 +64,10 @@ bool query_processor::topology_global_queue_empty() {
    return remote().first.get().ss.topology_global_queue_empty();
 }

+future<bool> query_processor::ongoing_rf_change(const service::group0_guard& guard, sstring ks) {
+    return remote().first.get().ss.ongoing_rf_change(guard, std::move(ks));
+}
+
 static service::query_state query_state_for_internal_call() {
    return {service::client_state::for_internal_calls(), empty_service_permit()};
 }
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -474,6 +474,7 @@ public:
    void reset_cache();

    bool topology_global_queue_empty();
+    future<bool> ongoing_rf_change(const service::group0_guard& guard, sstring ks);

    query_options make_internal_options(
            const statements::prepared_statement::checked_weak_ptr& p,
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -19,6 +19,7 @@
 #include "locator/abstract_replication_strategy.hh"
 #include "mutation/canonical_mutation.hh"
 #include "prepared_statement.hh"
+#include "seastar/coroutine/exception.hh"
 #include "service/migration_manager.hh"
 #include "service/storage_proxy.hh"
 #include "service/topology_mutation.hh"
@@ -138,6 +139,7 @@ bool cql3::statements::alter_keyspace_statement::changes_tablets(query_processor
 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>
 cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const {
    using namespace cql_transport;
+    bool unknown_keyspace = false;
    try {
        event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
        auto ks = qp.db().find_keyspace(_name);
@@ -158,8 +160,12 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
        //       when in reality nothing or only schema is being changed
        if (changes_tablets(qp)) {
            if (!qp.proxy().features().topology_global_request_queue && !qp.topology_global_queue_empty()) {
-                return make_exception_future<std::tuple<::shared_ptr<::cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(
-                        exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
+                co_await coroutine::return_exception(
+                    exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
+            }
+            if (qp.proxy().features().rack_list_rf && co_await qp.ongoing_rf_change(mc.guard(),_name)) {
+                co_await coroutine::return_exception(
+                        exceptions::invalid_request_exception(format("Another RF change for this keyspace {} ongoing, please retry.", _name)));
            }
            qp.db().real_database().validate_keyspace_update(*ks_md_update);

@@ -242,10 +248,15 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
                target_type,
                keyspace());
        mc.add_mutations(std::move(muts), "CQL alter keyspace");
-        return make_ready_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(std::make_tuple(std::move(ret), warnings));
+        co_return std::make_tuple(std::move(ret), warnings);
    } catch (data_dictionary::no_such_keyspace& e) {
-        return make_exception_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, cql3::cql_warnings_vec>>(exceptions::invalid_request_exception("Unknown keyspace " + _name));
+        unknown_keyspace = true;
    }
+    if (unknown_keyspace) {
+        co_await coroutine::return_exception(
+                exceptions::invalid_request_exception("Unknown keyspace " + _name));
+    }
+    std::unreachable();
 }

 std::unique_ptr<cql3::statements::prepared_statement>
--- a/cql3/statements/ks_prop_defs.cc
+++ b/cql3/statements/ks_prop_defs.cc
@@ -61,7 +61,7 @@ expand_to_racks(const locator::token_metadata& tm,

    // Handle ALTER:
    // ([]|0) -> numeric is allowed, there are no existing replicas
-    // numeric -> numeric' is not supported. User should convert RF to rack list of equal count first.
+    // numeric -> numeric' is not supported unless numeric == numeric'. User should convert RF to rack list of equal count first.
    // rack_list -> len(rack_list) is allowed (no-op)
    // rack_list -> numeric is not allowed
    if (old_options.contains(dc)) {
@@ -75,6 +75,8 @@ expand_to_racks(const locator::token_metadata& tm,
                        "Cannot change replication factor for '{}' from {} to numeric {}, use rack list instead",
                        dc, old_rf_val, data.count()));
            }
+        } else if (old_rf.count() == data.count()) {
+            return rf;
        } else if (old_rf.count() > 0) {
            throw exceptions::configuration_exception(fmt::format(
                    "Cannot change replication factor for '{}' from {} to {}, only rack list is allowed",
@@ -153,6 +155,8 @@ static locator::replication_strategy_config_options prepare_options(
    }

    // Validate options.
+    bool numeric_to_rack_list_transition = false;
+    bool rf_change = false;
    for (auto&& [dc, opt] : options) {
        locator::replication_factor_data rf(opt);

@@ -162,6 +166,7 @@ static locator::replication_strategy_config_options prepare_options(
            old_rf = locator::replication_factor_data(i->second);
        }

+        rf_change = rf_change || (old_rf && old_rf->count() != rf.count()) || (!old_rf && rf.count() != 0);
        if (!rf.is_rack_based()) {
            if (old_rf && old_rf->is_rack_based() && rf.count() != 0) {
                if (old_rf->count() != rf.count()) {
@@ -187,12 +192,11 @@ static locator::replication_strategy_config_options prepare_options(
            throw exceptions::configuration_exception(fmt::format(
                    "Rack list for '{}' contains duplicate entries", dc));
        }
-        if (old_rf && !old_rf->is_rack_based() && old_rf->count() != 0) {
-            // FIXME: Allow this if replicas already conform to the given rack list.
-            // FIXME: Implement automatic colocation to allow transition to rack list.
-            throw exceptions::configuration_exception(fmt::format(
-                    "Cannot change replication factor from numeric to rack list for '{}'", dc));
-        }
+        numeric_to_rack_list_transition = numeric_to_rack_list_transition || (old_rf && !old_rf->is_rack_based() && old_rf->count() != 0);
+    }
+
+    if (numeric_to_rack_list_transition && rf_change) {
+        throw exceptions::configuration_exception("Cannot change replication factor from numeric to rack list and rf value at the same time");
    }

    if (!rf && options.empty() && old_options.empty()) {
@@ -412,7 +416,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(s
            ? std::optional<unsigned>(0) : std::nullopt;
    auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
    bool uses_tablets = initial_tablets.has_value();
-    bool rack_list_enabled = feat.rack_list_rf;
+    bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
    auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
    return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
            std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
@@ -428,7 +432,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
        throw exceptions::invalid_request_exception("Cannot alter replication strategy vnode/tablets flavor");
    }
    auto sc = get_replication_strategy_class();
-    bool rack_list_enabled = feat.rack_list_rf;
+    bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
    if (sc) {
        options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
    } else {
--- a/db/hints/internal/hint_endpoint_manager.cc
+++ b/db/hints/internal/hint_endpoint_manager.cc
@@ -248,7 +248,7 @@ future<db::commitlog> hint_endpoint_manager::add_store() noexcept {
            // which is larger than the segment ID of the RP of the last written hint.
            cfg.base_segment_id = _last_written_rp.base_id();

-            return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) -> future<commitlog> {
+            return commitlog::create_commitlog(std::move(cfg)).then([this] (this auto, commitlog l) -> future<commitlog> {
                // add_store() is triggered every time hint files are forcefully flushed to I/O (every hints_flush_period).
                // When this happens we want to refill _sender's segments only if it has finished with the segments he had before.
                if (_sender.have_segments()) {
--- a/db/object_storage_endpoint_param.cc
+++ b/db/object_storage_endpoint_param.cc
@@ -135,5 +135,5 @@ const std::string db::object_storage_endpoint_param::gs_type = "gs";

 auto fmt::formatter<db::object_storage_endpoint_param>::format(const db::object_storage_endpoint_param& e, fmt::format_context& ctx) const
    -> decltype(ctx.out()) {
-    return fmt::format_to(ctx.out(), "object_storage_endpoint_param{{}}", e.to_json_string());
+    return fmt::format_to(ctx.out(), "object_storage_endpoint_param{}", e.to_json_string());
 }
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -110,6 +110,7 @@ namespace {
            system_keyspace::v3::CDC_LOCAL,
            system_keyspace::DICTS,
            system_keyspace::VIEW_BUILDING_TASKS,
+            system_keyspace::CLIENT_ROUTES,
        };
        if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
            props.enable_schema_commitlog();
@@ -137,6 +138,7 @@ namespace {
                system_keyspace::ROLE_PERMISSIONS,
                system_keyspace::DICTS,
                system_keyspace::VIEW_BUILDING_TASKS,
+                system_keyspace::CLIENT_ROUTES,
            };
            if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
                props.is_group0_table = true;
@@ -309,6 +311,7 @@ schema_ptr system_keyspace::topology() {
            .with_column("tablet_balancing_enabled", boolean_type, column_kind::static_column)
            .with_column("upgrade_state", utf8_type, column_kind::static_column)
            .with_column("global_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
+            .with_column("paused_rf_change_requests", set_type_impl::get_instance(timeuuid_type, true), column_kind::static_column)
            .set_comment("Current state of topology change machine")
            .with_hash_version()
            .build();
@@ -1415,6 +1418,23 @@ schema_ptr system_keyspace::view_building_tasks() {
    return schema;
 }

+schema_ptr system_keyspace::client_routes() {
+    static thread_local auto schema = [] {
+        auto id = generate_legacy_id(NAME, CLIENT_ROUTES);
+        return schema_builder(NAME, CLIENT_ROUTES, std::make_optional(id))
+                .with_column("connection_id", utf8_type, column_kind::partition_key)
+                .with_column("host_id", uuid_type, column_kind::clustering_key)
+                .with_column("address", utf8_type)
+                .with_column("port", int32_type)
+                .with_column("tls_port", int32_type)
+                .with_column("alternator_port", int32_type)
+                .with_column("alternator_https_port", int32_type)
+                .with_hash_version()
+                .build();
+    }();
+    return schema;
+}
+
 future<system_keyspace::local_info> system_keyspace::load_local_info() {
    auto msg = co_await execute_cql(format("SELECT host_id, cluster_name, data_center, rack FROM system.{} WHERE key=?", LOCAL), sstring(LOCAL));

@@ -2342,7 +2362,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
                    v3::cdc_local(),
                    raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery(),
                    topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(), view_build_status_v2(),
-                    dicts(), view_building_tasks(), cdc_streams_state(), cdc_streams_history()
+                    dicts(), view_building_tasks(), client_routes(), cdc_streams_state(), cdc_streams_history()
    });

    if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
@@ -3137,7 +3157,10 @@ static bool must_have_tokens(service::node_state nst) {
    // A decommissioning node doesn't have tokens at the end, they are
    // removed during transition to the left_token_ring state.
    case service::node_state::decommissioning: return false;
-    case service::node_state::removing: return true;
+    // A removing node might or might not have tokens depending on whether
+    // REMOVENODE_WITH_LEFT_TOKEN_RING feature is enabled. To support both
+    // cases, we allow removing nodes to not have tokens.
+    case service::node_state::removing: return false;
    case service::node_state::rebuilding: return true;
    case service::node_state::normal: return true;
    case service::node_state::left: return false;
@@ -3377,6 +3400,12 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
            }
        }

+        if (some_row.has("paused_rf_change_requests")) {
+            for (auto&& v : deserialize_set_column(*topology(), some_row, "paused_rf_change_requests")) {
+                ret.paused_rf_change_requests.insert(value_cast<utils::UUID>(v));
+            }
+        }
+
        if (some_row.has("enabled_features")) {
            ret.enabled_features = decode_features(deserialize_set_column(*topology(), some_row, "enabled_features"));
        }
@@ -3588,35 +3617,43 @@ system_keyspace::topology_requests_entry system_keyspace::topology_request_row_t
    return entry;
 }

-future<system_keyspace::topology_requests_entry> system_keyspace::get_topology_request_entry(utils::UUID id, bool require_entry) {
+future<system_keyspace::topology_requests_entry> system_keyspace::get_topology_request_entry(utils::UUID id) {
+    auto r = co_await get_topology_request_entry_opt(id);
+    if (!r) {
+        on_internal_error(slogger, format("no entry for request id {}", id));
+    }
+    co_return std::move(*r);
+}
+
+future<std::optional<system_keyspace::topology_requests_entry>> system_keyspace::get_topology_request_entry_opt(utils::UUID id) {
    auto rs = co_await execute_cql(
        format("SELECT * FROM system.{} WHERE id = {}", TOPOLOGY_REQUESTS, id));

    if (!rs || rs->empty()) {
-        if (require_entry) {
-            on_internal_error(slogger, format("no entry for request id {}", id));
-        } else {
-            co_return topology_requests_entry{
-                .id = utils::null_uuid()
-            };
-        }
+        co_return std::nullopt;
    }

    const auto& row = rs->one();
    co_return topology_request_row_to_entry(id, row);
 }

-future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops_request_entries(db_clock::time_point end_time_limit) {
+future<system_keyspace::topology_requests_entries> system_keyspace::get_topology_request_entries(std::vector<std::variant<service::topology_request, service::global_topology_request>> request_types, db_clock::time_point end_time_limit) {
+    sstring request_types_str = "";
+    bool first = true;
+    for (const auto& rt : request_types) {
+        if (!std::exchange(first, false)) {
+            request_types_str += ", ";
+        }
+        request_types_str += std::visit([] (auto&& arg) { return fmt::format("'{}'", arg); }, rt);
+    }
+
    // Running requests.
    auto rs_running = co_await execute_cql(
-        format("SELECT * FROM system.{} WHERE done = false AND request_type IN ('{}', '{}', '{}', '{}', '{}') ALLOW FILTERING", TOPOLOGY_REQUESTS,
-            service::topology_request::join, service::topology_request::replace, service::topology_request::rebuild, service::topology_request::leave, service::topology_request::remove));
-
+        format("SELECT * FROM system.{} WHERE done = false AND request_type IN ({}) ALLOW FILTERING", TOPOLOGY_REQUESTS, request_types_str));

    // Requests which finished after end_time_limit.
    auto rs_done = co_await execute_cql(
-        format("SELECT * FROM system.{} WHERE end_time > {} AND request_type IN ('{}', '{}', '{}', '{}', '{}') ALLOW FILTERING", TOPOLOGY_REQUESTS, end_time_limit.time_since_epoch().count(),
-            service::topology_request::join, service::topology_request::replace, service::topology_request::rebuild, service::topology_request::leave, service::topology_request::remove));
+        format("SELECT * FROM system.{} WHERE end_time > {} AND request_type IN ({}) ALLOW FILTERING", TOPOLOGY_REQUESTS, end_time_limit.time_since_epoch().count(), request_types_str));

    topology_requests_entries m;
    for (const auto& row: *rs_done) {
@@ -3634,6 +3671,16 @@ future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops
    co_return m;
 }

+future<system_keyspace::topology_requests_entries> system_keyspace::get_node_ops_request_entries(db_clock::time_point end_time_limit) {
+    return get_topology_request_entries({
+        service::topology_request::join,
+        service::topology_request::replace,
+        service::topology_request::rebuild,
+        service::topology_request::leave,
+        service::topology_request::remove
+    }, end_time_limit);
+}
+
 future<mutation> system_keyspace::get_insert_dict_mutation(
    std::string_view name,
    bytes data,
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -199,6 +199,7 @@ public:
    static constexpr auto VIEW_BUILD_STATUS_V2 = "view_build_status_v2";
    static constexpr auto DICTS = "dicts";
    static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
+    static constexpr auto CLIENT_ROUTES = "client_routes";

    // auth
    static constexpr auto ROLES = "roles";
@@ -276,6 +277,7 @@ public:
    static schema_ptr view_build_status_v2();
    static schema_ptr dicts();
    static schema_ptr view_building_tasks();
+    static schema_ptr client_routes();

    // auth
    static schema_ptr roles();
@@ -667,7 +669,9 @@ public:

    future<service::topology_request_state> get_topology_request_state(utils::UUID id, bool require_entry);
    topology_requests_entry topology_request_row_to_entry(utils::UUID id, const cql3::untyped_result_set_row& row);
-    future<topology_requests_entry> get_topology_request_entry(utils::UUID id, bool require_entry);
+    future<topology_requests_entry> get_topology_request_entry(utils::UUID id);
+    future<std::optional<topology_requests_entry>> get_topology_request_entry_opt(utils::UUID id);
+    future<system_keyspace::topology_requests_entries> get_topology_request_entries(std::vector<std::variant<service::topology_request, service::global_topology_request>> request_types, db_clock::time_point end_time_limit);
    future<topology_requests_entries> get_node_ops_request_entries(db_clock::time_point end_time_limit);

 public:
--- a/docs/alternator/alternator.md
+++ b/docs/alternator/alternator.md
@@ -1,17 +1,17 @@
-# Alternator: DynamoDB API in Scylla
+# Alternator: DynamoDB API in ScyllaDB

 ## Introduction
-Alternator is a Scylla feature adding compatibility with Amazon DynamoDB(TM).
+Alternator is a ScyllaDB feature adding compatibility with Amazon DynamoDB(TM).
 DynamoDB's API uses JSON-encoded requests and responses which are sent over
 an HTTP or HTTPS transport. It is described in detail in Amazon's [DynamoDB
 API Reference](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/).

 Our goal is that any application written to use Amazon DynamoDB could
-be run, unmodified, against Scylla with Alternator enabled. Alternator's
+be run, unmodified, against ScyllaDB with Alternator enabled. Alternator's
 compatibility with DynamoDB is fairly complete, but users should be aware
 of some differences and some unimplemented features. The extent of
 Alternator's compatibility with DynamoDB is described in the
-[Scylla Alternator for DynamoDB users](compatibility.md) document,
+[ScyllaDB Alternator for DynamoDB users](compatibility.md) document,
 which is updated as the work on Alternator progresses and compatibility
 continues to improve.

@@ -19,8 +19,8 @@ Alternator also adds several features and APIs that are not available in
 DynamoDB. These are described in [Alternator-specific APIs](new-apis.md).

 ## Running Alternator
-By default, Scylla does not listen for DynamoDB API requests. To enable
-this API in Scylla you must set at least two configuration options,
+By default, ScyllaDB does not listen for DynamoDB API requests. To enable
+this API in ScyllaDB you must set at least two configuration options,
 **alternator_port** and **alternator_write_isolation**. For example in the
 YAML configuration file:
 ```yaml
@@ -30,7 +30,7 @@ alternator_write_isolation: only_rmw_uses_lwt # or always, forbid or unsafe
 or, equivalently, via command-line arguments: `--alternator-port=8000
 --alternator-write-isolation=only_rmw_uses_lwt.

-the **alternator_port** option determines on which port Scylla listens for
+the **alternator_port** option determines on which port ScyllaDB listens for
 DynamoDB API requests. By default, it listens on this port on all network
 interfaces. To listen only on a specific interface, configure also the
 **alternator_address** option.
@@ -41,12 +41,12 @@ Alternator has four different choices
 for the implementation of writes, each with different advantages. You should
 carefully consider which of the options makes more sense for your intended
 use case and configure alternator_write_isolation accordingly. There is
-currently no default for this option: Trying to run Scylla with an Alternator
+currently no default for this option: Trying to run ScyllaDB with an Alternator
 port selected but without configuring write isolation will result in an error message,
 asking you to set it.

 In addition to (or instead of) serving HTTP requests on alternator_port,
-Scylla can accept DynamoDB API requests over HTTPS (encrypted), on the port
+ScyllaDB can accept DynamoDB API requests over HTTPS (encrypted), on the port
 specified by **alternator_https_port**. As usual for HTTPS servers, the
 operator must specify certificate and key files. By default these should
 be placed in `/etc/scylla/scylla.crt` and `/etc/scylla/scylla.key`, but
@@ -54,7 +54,7 @@ these default locations can overridden by specifying
 `--alternator-encryption-options keyfile="..."` and
 `--alternator-encryption-options certificate="..."`.

-By default, Scylla saves a snapshot of deleted tables. But Alternator does
+By default, ScyllaDB saves a snapshot of deleted tables. But Alternator does
 not offer an API to restore these snapshots, so these snapshots are not useful
 and waste disk space - deleting a table does not recover any disk space.
 It is therefore recommended to disable this automatic-snapshotting feature
@@ -73,11 +73,11 @@ itself. Instructions, code and examples for doing this can be found in the

 This section provides only a very brief introduction to Alternator's
 design. A much more detailed document about the features of the DynamoDB
-API and how they are, or could be, implemented in Scylla can be found in:
+API and how they are, or could be, implemented in ScyllaDB can be found in:
 <https://docs.google.com/document/d/1i4yjF5OSAazAY_-T8CBce9-2ykW4twx_E_Nt2zDoOVs>

 Almost all of Alternator's source code (except some initialization code)
-can be found in the alternator/ subdirectory of Scylla's source code.
+can be found in the alternator/ subdirectory of ScyllaDB's source code.
 Extensive functional tests can be found in the test/alternator
 subdirectory. These tests are written in Python, and can be run against
 both Alternator and Amazon's DynamoDB; This allows verifying that
@@ -85,15 +85,15 @@ Alternator's behavior matches the one observed on DynamoDB.
 See test/alternator/README.md for more information about the tests and
 how to run them.

-With Alternator enabled on port 8000 (for example), every Scylla node
+With Alternator enabled on port 8000 (for example), every ScyllaDB node
 listens for DynamoDB API requests on this port. These requests, in
 JSON format over HTTP, are parsed and result in calls to internal Scylla
 C++ functions - there is no CQL generation or parsing involved.
-In Scylla terminology, the node receiving the request acts as the
+In ScyllaDB terminology, the node receiving the request acts as the
 *coordinator*, and often passes the request on to one or more other nodes -
 *replicas* which hold copies of the requested data.

-Alternator tables are stored as Scylla tables, each in a separate keyspace.
+Alternator tables are stored as ScyllaDB tables, each in a separate keyspace.
 Each keyspace is initialized when the corresponding Alternator table is
 created (with a CreateTable request). The replication factor (RF) for this
 keyspace is chosen at that point, depending on the size of the cluster:
@@ -101,19 +101,19 @@ RF=3 is used on clusters with three or more nodes, and RF=1 is used for
 smaller clusters. Such smaller clusters are, of course, only recommended
 for tests because of the risk of data loss.

-Each table in Alternator is stored as a Scylla table in a separate
+Each table in Alternator is stored as a ScyllaDB table in a separate
 keyspace. The DynamoDB key columns (hash and sort key) have known types,
-and become partition and clustering key columns of the Scylla table.
+and become partition and clustering key columns of the ScyllaDB table.
 All other attributes may be different for each row, so are stored in one
-map column in Scylla, and not as separate columns.
+map column in ScyllaDB, and not as separate columns.

 DynamoDB supports two consistency levels for reads, "eventual consistency"
-and "strong consistency". These two modes are implemented using Scylla's CL
+and "strong consistency". These two modes are implemented using ScyllaDB's CL
 (consistency level) feature: All writes are done using the `LOCAL_QUORUM`
 consistency level, then strongly-consistent reads are done with
 `LOCAL_QUORUM`, while eventually-consistent reads are with just `LOCAL_ONE`.

-In Scylla (and its inspiration, Cassandra), high write performance is
+In ScyllaDB (and its inspiration, Cassandra), high write performance is
 achieved by ensuring that writes do not require reads from disk.
 The DynamoDB API, however, provides many types of requests that need a read
 before the write (a.k.a. RMW requests - read-modify-write). For example,
@@ -121,7 +121,7 @@ a request may copy an existing attribute, increment an attribute,
 be conditional on some expression involving existing values of attribute,
 or request that the previous values of attributes be returned. These
 read-modify-write transactions should be _isolated_ from each other, so
-by default Alternator implements every write operation using Scylla's
+by default Alternator implements every write operation using ScyllaDB's
 LWT (lightweight transactions). This default can be overridden on a per-table
 basis, by tagging the table as explained above in the "write isolation
 policies" section.
--- a/docs/alternator/compatibility.md
+++ b/docs/alternator/compatibility.md
@@ -1,6 +1,6 @@
 # ScyllaDB Alternator for DynamoDB users

-Scylla supports the DynamoDB API (this feature is codenamed "Alternator").
+ScyllaDB supports the DynamoDB API (this feature is codenamed "Alternator").
 Our goal is to support any application written for Amazon DynamoDB.
 Nevertheless, there are a few differences between DynamoDB and Scylla, and
 and a few DynamoDB features that have not yet been implemented in Scylla.
@@ -8,16 +8,16 @@ The purpose of this document is to inform users of these differences.

 ## Provisioning

-The most obvious difference between DynamoDB and Scylla is that while
-DynamoDB is a shared cloud service, Scylla is a dedicated service running
+The most obvious difference between DynamoDB and ScyllaDB is that while
+DynamoDB is a shared cloud service, ScyllaDB is a dedicated service running
 on your private cluster. Whereas DynamoDB allows you to "provision" the
 number of requests per second you'll need - or at an extra cost not even
-provision that - Scylla requires you to provision your cluster. You need
+provision that - ScyllaDB requires you to provision your cluster. You need
 to reason about the number and size of your nodes - not the throughput.

 Moreover, DynamoDB's per-table provisioning (`BillingMode=PROVISIONED`) is
 not yet supported by Scylla. The BillingMode and ProvisionedThroughput options
-on a table need to be valid but are ignored, and Scylla behaves like DynamoDB's
+on a table need to be valid but are ignored, and ScyllaDB behaves like DynamoDB's
 `BillingMode=PAY_PER_REQUEST`: All requests are accepted without a per-table
 throughput cap.

@@ -33,7 +33,7 @@ Instructions for doing this can be found in:

 ## Write isolation policies

-Scylla was designed to optimize the performance of pure write operations -
+ScyllaDB was designed to optimize the performance of pure write operations -
 writes which do not need to read the previous value of the item.
 In CQL, writes which do need the previous value of the item must explicitly
 use the slower LWT ("LightWeight Transaction") feature to be correctly
@@ -79,11 +79,11 @@ a _higher_ timestamp - and this will be the "last write" that wins.
 To avoid or mitigate this write reordering issue, users may consider
 one or more of the following:

-1. Use NTP to keep the clocks on the different Scylla nodes synchronized.
+1. Use NTP to keep the clocks on the different ScyllaDB nodes synchronized.
   If the delay between the two writes is longer than NTP's accuracy,
   they will not be reordered.
 2. If an application wants to ensure that two specific writes are not
-   reordered, it should send both requests to the same Scylla node.
+   reordered, it should send both requests to the same ScyllaDB node.
   Care should be taken when using a load balancer - which might redirect
   two requests to two different nodes.
 3. Consider using the `always_use_lwt` write isolation policy.
@@ -210,7 +210,7 @@ CREATE SERVICE_LEVEL IF NOT EXISTS oltp WITH SHARES = 1000;
 ATTACH SERVICE_LEVEL olap TO alice;
 ATTACH SERVICE_LEVEL oltp TO bob;
 ```
-Note that `alternator_enforce_authorization` has to be enabled in Scylla configuration.
+Note that `alternator_enforce_authorization` has to be enabled in ScyllaDB configuration.

 See [Authorization](##Authorization) section to learn more about roles and authorization.
 See [Workload Prioritization](../features/workload-prioritization)
@@ -218,11 +218,11 @@ to read about Workload Prioritization in detail.

 ## Metrics

-Scylla has an advanced and extensive monitoring framework for inspecting
-and graphing hundreds of different metrics of Scylla's usage and performance.
-Scylla's monitoring stack, based on Grafana and Prometheus, is described in
+ScyllaDB has an advanced and extensive monitoring framework for inspecting
+and graphing hundreds of different metrics of ScyllaDB's usage and performance.
+ScyllaDB's monitoring stack, based on Grafana and Prometheus, is described in
 <https://docs.scylladb.com/operating-scylla/monitoring/>.
-This monitoring stack is different from DynamoDB's offering - but Scylla's
+This monitoring stack is different from DynamoDB's offering - but ScyllaDB's
 is significantly more powerful and gives the user better insights on
 the internals of the database and its performance.

@@ -248,7 +248,7 @@ data in different partition order. Applications mustn't rely on that
 undocumented order.

 Note that inside each partition, the individual items will be sorted the same
-in DynamoDB and Scylla - determined by the _sort key_ defined for that table.
+in DynamoDB and ScyllaDB - determined by the _sort key_ defined for that table.

 ---

@@ -274,7 +274,7 @@ is different, or can be configured in Alternator:
 ## Experimental API features

 Some DynamoDB API features are supported by Alternator, but considered
-**experimental** in this release. An experimental feature in Scylla is a
+**experimental** in this release. An experimental feature in ScyllaDB is a
 feature whose functionality is complete, or mostly complete, but it is not
 as thoroughly tested or optimized as regular features. Also, an experimental
 feature's implementation is still subject to change and upgrades may not be
@@ -351,8 +351,8 @@ they should be easy to detect. Here is a list of these unimplemented features:

 * The on-demand backup APIs are not supported: CreateBackup, DescribeBackup,
  DeleteBackup, ListBackups, RestoreTableFromBackup.
-  For now, users can use Scylla's existing backup solutions such as snapshots
-  or Scylla Manager.
+  For now, users can use ScyllaDB's existing backup solutions such as snapshots
+  or ScyllaDB Manager.
  <https://github.com/scylladb/scylla/issues/5063>

 * Continuous backup (the ability to restore any point in time) is also not
@@ -370,7 +370,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
  <https://github.com/scylladb/scylla/issues/5068>

 * DAX (DynamoDB Accelerator), an in-memory cache for DynamoDB, is not
-  available in for Alternator. Anyway, it should not be necessary - Scylla's
+  available in for Alternator. Anyway, it should not be necessary - ScyllaDB's
  internal cache is already rather advanced and there is no need to place
  another cache in front of the it. We wrote more about this here:
  <https://www.scylladb.com/2017/07/31/database-caches-not-good/>
@@ -384,7 +384,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
 * The PartiQL syntax (SQL-like SELECT/UPDATE/INSERT/DELETE expressions)
  and the operations ExecuteStatement, BatchExecuteStatement and
  ExecuteTransaction are not yet supported.
-  A user that is interested in an SQL-like syntax can consider using Scylla's
+  A user that is interested in an SQL-like syntax can consider using ScyllaDB's
  CQL protocol instead.
  This feature was added to DynamoDB in November 2020.
  <https://github.com/scylladb/scylla/issues/8787>
@@ -393,7 +393,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
  which is different from AWS's. In particular, the operations
  DescribeContributorInsights, ListContributorInsights and
  UpdateContributorInsights that configure Amazon's "CloudWatch Contributor
-  Insights" are not yet supported. Scylla has different ways to retrieve the
+  Insights" are not yet supported. ScyllaDB has different ways to retrieve the
  same information, such as which items were accessed most often.
  <https://github.com/scylladb/scylla/issues/8788>

--- a/docs/alternator/getting-started.md
+++ b/docs/alternator/getting-started.md
@@ -11,7 +11,7 @@ This section will guide you through the steps for setting up the cluster:
   <https://hub.docker.com/r/scylladb/scylla/>, but add to every `docker run`
   command a `-p 8000:8000` before the image name and
   `--alternator-port=8000 --alternator-write-isolation=always` at the end.
-   The "alternator-port" option specifies on which port Scylla will listen for
+   The "alternator-port" option specifies on which port ScyllaDB will listen for
   the (unencrypted) DynamoDB API, and the "alternator-write-isolation" chooses
   whether or not Alternator will use LWT for every write.
   For example,
@@ -24,10 +24,10 @@ This section will guide you through the steps for setting up the cluster:
 By default, ScyllaDB run in this way will not have authentication or
 authorization enabled, and any DynamoDB API request will be honored without
 requiring them to be signed appropriately. See the
-[Scylla Alternator for DynamoDB users](compatibility.md#authentication-and-authorization)
+[ScyllaDB Alternator for DynamoDB users](compatibility.md#authentication-and-authorization)
 document on how to configure authentication and authorization.

-## Testing Scylla's DynamoDB API support:
+## Testing ScyllaDB's DynamoDB API support:
 ### Running AWS Tic Tac Toe demo app to test the cluster:
 1. Follow the instructions on the [AWS github page](https://github.com/awsdocs/amazon-dynamodb-developer-guide/blob/master/doc_source/TicTacToe.Phase1.md)
 2. Enjoy your tic-tac-toe game :-)
--- a/docs/alternator/new-apis.md
+++ b/docs/alternator/new-apis.md
@@ -2,9 +2,9 @@

 Alternator's primary goal is to be compatible with Amazon DynamoDB(TM)
 and its APIs, so that any application written to use Amazon DynamoDB could
-be run, unmodified, against Scylla with Alternator enabled. The extent of
+be run, unmodified, against ScyllaDB with Alternator enabled. The extent of
 Alternator's compatibility with DynamoDB is described in the
-[Scylla Alternator for DynamoDB users](compatibility.md) document.
+[ScyllaDB Alternator for DynamoDB users](compatibility.md) document.

 But Alternator also adds several features and APIs that are not available in
 DynamoDB. These Alternator-specific APIs are documented here.
@@ -15,7 +15,7 @@ _conditional_ update or an update based on the old value of an attribute.
 The read and the write should be treated as a single transaction - protected
 (_isolated_) from other parallel writes to the same item.

-Alternator could do this isolation by using Scylla's LWT (lightweight
+Alternator could do this isolation by using ScyllaDB's LWT (lightweight
 transactions) for every write operation, but this significantly slows
 down writes, and not necessary for workloads which don't use read-modify-write
 (RMW) updates.
@@ -41,7 +41,7 @@ isolation policy for a specific table can be overridden by tagging the table
    which need a read before the write. An attempt to use such statements
    (e.g.,  UpdateItem with a ConditionExpression) will result in an error.
    In this mode, the remaining write requests which are allowed - pure writes
-    without a read - are performed using standard Scylla writes, not LWT,
+    without a read - are performed using standard ScyllaDB writes, not LWT,
    so they are significantly faster than they would have been in the
    `always_use_lwt`, but their isolation is still correct.

@@ -65,19 +65,19 @@ isolation policy for a specific table can be overridden by tagging the table
    read-modify-write updates. This mode is not recommended for any use case,
    and will likely be removed in the future.

-## Accessing system tables from Scylla
-Scylla exposes lots of useful information via its internal system tables,
+## Accessing system tables from ScyllaDB
+ScyllaDB exposes lots of useful information via its internal system tables,
 which can be found in system keyspaces: 'system', 'system\_auth', etc.
 In order to access to these tables via alternator interface,
 Scan and Query requests can use a special table name:
 `.scylla.alternator.KEYSPACE_NAME.TABLE_NAME`
-which will return results fetched from corresponding Scylla table.
+which will return results fetched from corresponding ScyllaDB table.

 This interface can be used only to fetch data from system tables.
 Attempts to read regular tables via the virtual interface will result
 in an error.

-Example: in order to query the contents of Scylla's `system.large_rows`,
+Example: in order to query the contents of ScyllaDB's `system.large_rows`,
 pass `TableName='.scylla.alternator.system.large_rows'` to a Query/Scan
 request.

@@ -113,14 +113,14 @@ connection (either active or idle), not necessarily an active request as
 in Alternator.

 ## Service discovery
-As explained in [Scylla Alternator for DynamoDB users](compatibility.md),
+As explained in [ScyllaDB Alternator for DynamoDB users](compatibility.md),
 Alternator requires a load-balancer or a client-side load-balancing library
-to distribute requests between all Scylla nodes. This load-balancer needs
-to be able to _discover_ the Scylla nodes. Alternator provides two special
+to distribute requests between all ScyllaDB nodes. This load-balancer needs
+to be able to _discover_ the ScyllaDB nodes. Alternator provides two special
 requests, `/` and `/localnodes`, to help with this service discovery, which
 we will now explain.

-Some setups know exactly which Scylla nodes were brought up, so all that
+Some setups know exactly which ScyllaDB nodes were brought up, so all that
 remains is to periodically verify that each node is still functional. The
 easiest way to do this is to make an HTTP (or HTTPS) GET request to the node,
 with URL `/`. This is a trivial GET request and does **not** need to be
@@ -133,10 +133,10 @@ $ curl http://localhost:8000/
 healthy: localhost:8000
 ```

-In other setups, the load balancer might not know which Scylla nodes exist.
-For example, it may be possible to add or remove Scylla nodes without a
+In other setups, the load balancer might not know which ScyllaDB nodes exist.
+For example, it may be possible to add or remove ScyllaDB nodes without a
 client-side load balancer knowing. For these setups we have the `/localnodes`
-request that can be used to discover which Scylla nodes exist: A load balancer
+request that can be used to discover which ScyllaDB nodes exist: A load balancer
 that already knows at least one live node can discover the rest by sending
 a `/localnodes` request to the known node. It's again an unauthenticated
 HTTP (or HTTPS) GET request:
@@ -160,7 +160,7 @@ list the nodes in a specific _data center_ or _rack_. These options are
 useful for certain use cases:

 * A `dc` option (e.g., `/localnodes?dc=dc1`) can be passed to list the
-  nodes in a specific Scylla data center, not the data center of the node
+  nodes in a specific ScyllaDB data center, not the data center of the node
  being contacted. This is useful when a client knowns of _some_ Scylla
  node belonging to an unknown DC, but wants to list the nodes in _its_
  DC, which it knows by name.
@@ -191,7 +191,7 @@ tells them to.

 If you want to influence whether a specific Alternator table is created with tablets or vnodes,
 you can do this by specifying the `system:initial_tablets` tag
-(in earlier versions of Scylla the tag was `experimental:initial_tablets`)
+(in earlier versions of ScyllaDB the tag was `experimental:initial_tablets`)
 in the CreateTable operation. The value of this tag can be:

 * Any valid integer as the value of this tag enables tablets.
--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -1043,6 +1043,8 @@ The following modes are available:
   * - ``immediate``
     - Tombstone GC is immediately performed. There is no wait time or repair requirement. This mode is useful for a table that uses the TWCS compaction strategy with no user deletes. After data is expired after TTL, ScyllaDB can perform compaction to drop the expired data immediately.

+.. warning:: The ``repair`` mode is not supported for :term:`Colocated Tables <Colocated Table>` in this version.
+
 .. _cql-per-table-tablet-options:

 Per-table tablet options
--- a/docs/cql/time-to-live.rst
+++ b/docs/cql/time-to-live.rst
@@ -102,6 +102,7 @@ Additional Information

 To learn more about TTL, and see a hands-on example, check out `this lesson <https://university.scylladb.com/courses/data-modeling/lessons/advanced-data-modeling/topic/expiring-data-with-ttl-time-to-live/>`_ on ScyllaDB University.

+* `Video: Managing data expiration with Time-To-Live <https://www.youtube.com/watch?v=SXkbu7mFHeA>`_
 * :doc:`Apache Cassandra Query Language (CQL) Reference </cql/index>`
 * :doc:`KB Article:How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds/>`
 * :doc:`KB Article:Time to Live (TTL) and Compaction </kb/ttl-facts/>`
--- a/docs/dev/protocol-extensions.md
+++ b/docs/dev/protocol-extensions.md
@@ -236,3 +236,26 @@ the same mechanism for other protocol versions, such as CQLv4.

 The feature is identified by the `SCYLLA_USE_METADATA_ID` key, which is meant to be sent
 in the SUPPORTED message.
+
+## Sending the CLIENT_ROUTES_CHANGE event
+
+This extension allows a driver to update its connections when the
+`system.client_routes` table is modified.
+
+In some network topologies a specific mapping of addresses and ports is required (e.g.
+to support Private Link). This mapping can change dynamically even when no nodes are
+added or removed. The driver must adapt to those changes; otherwise connectivity can be
+lost.
+
+The extension is implemented as a new `EVENT` type: `CLIENT_ROUTES_CHANGE`. The event
+body consists of:
+- [string] change
+- [string list] connection_ids
+- [string list] host_ids
+
+There is only one change value: `UPDATE_NODES`, which means at least one client route
+was inserted, updated, or deleted.
+
+Events already have a subscription mechanism similar to protocol extensions (that is,
+the driver only receives the events it explicitly subscribed to), so no additional
+`cql_protocol_extension` key is introduced for this feature.
--- a/docs/dev/topology-over-raft.md
+++ b/docs/dev/topology-over-raft.md
@@ -86,6 +86,7 @@ stateDiagram-v2
        de_left_token_ring --> [*]
    }
    state removing {
+        re_left_token_ring : left_token_ring
        re_tablet_draining : tablet_draining
        re_tablet_migration : tablet_migration
        re_write_both_read_old : write_both_read_old
@@ -98,7 +99,8 @@ stateDiagram-v2
        re_tablet_draining --> re_write_both_read_old
        re_write_both_read_old --> re_write_both_read_new: streaming completed
        re_write_both_read_old --> re_rollback_to_normal: rollback
-        re_write_both_read_new --> [*]
+        re_write_both_read_new --> re_left_token_ring
+        re_left_token_ring --> [*]
    }
    rebuilding --> normal: streaming completed
    decommissioning --> left: operation succeeded
@@ -122,9 +124,10 @@ Note that these are not all states, as there are other states specific to tablet
    Writes to vnodes-based tables are going to both new and old replicas (new replicas means calculated according
    to modified token ring), reads are using old replicas.
 - `write_both_read_new` - as above, but reads are using new replicas.
- `left_token_ring` - the decommissioning node left the token ring, but we still need to wait until other
-    nodes observe it and stop sending writes to this node. Then, we tell the node to shut down and remove
-    it from group 0. We also use this state to rollback a failed bootstrap or decommission.
+- `left_token_ring` - the decommissioning or removing node left the token ring, but we still need to wait until other
+    nodes observe it and stop sending writes to this node. For decommission, we tell the node to shut down,
+    then remove it from group 0. For removenode, the node is already down, so we skip the shutdown step.
+    We also use this state to rollback a failed bootstrap or decommission.
 - `rollback_to_normal` - the decommission or removenode operation failed. Rollback the operation by
    moving the node we tried to decommission/remove back to the normal state.
 - `lock` - the topology stays in this state until externally changed (to null state), preventing topology
@@ -141,7 +144,9 @@ reads that started before this point exist in the system. Finally we remove the
 transitioning state.

 Decommission, removenode and replace work similarly, except they don't go through
-`commit_cdc_generation`.
+`commit_cdc_generation`. Both decommission and removenode go through the
+`left_token_ring` state to run a global barrier ensuring all nodes are aware
+of the topology change before the operation completes.

 The state machine may also go only through the `commit_cdc_generation` state
 after getting a request from the user to create a new CDC generation if the
--- a/docs/getting-started/index.rst
+++ b/docs/getting-started/index.rst
@@ -25,8 +25,7 @@ Getting Started
  :id: "getting-started"
  :class: my-panel

-  * `Install ScyllaDB (Binary Packages, Docker, or EC2) <https://www.scylladb.com/download/#core>`_ - Links to the ScyllaDB Download Center
-  
+  * :doc:`Install ScyllaDB </getting-started/install-scylla/index/>`
  * :doc:`Configure ScyllaDB </getting-started/system-configuration/>`
  * :doc:`Run ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
  * :doc:`Create a ScyllaDB Cluster - Single Data Center (DC) </operating-scylla/procedures/cluster-management/create-cluster/>`
--- a/docs/getting-started/installation-common/disable-housekeeping.rst
+++ b/docs/getting-started/installation-common/disable-housekeeping.rst
@@ -3,8 +3,7 @@
 ScyllaDB Housekeeping and how to disable it
 ============================================

-It is always recommended to run the latest version of ScyllaDB. 
-The latest stable release version is always available from the `Download Center <https://www.scylladb.com/download/>`_.
+It is always recommended to run the latest stable version of ScyllaDB. 

 When you install ScyllaDB, it installs by default two services: **scylla-housekeeping-restart** and **scylla-housekeeping-daily**. These services check for the latest ScyllaDB version and prompt the user if they are using a version that is older than what is publicly available.
 Information about your ScyllaDB deployment, including the ScyllaDB version currently used, as well as unique user and server identifiers, are collected by a centralized service.
--- a/docs/operating-scylla/nodetool-commands/cluster/repair.rst
+++ b/docs/operating-scylla/nodetool-commands/cluster/repair.rst
@@ -9,6 +9,8 @@ Running ``cluster repair`` on a **single node** synchronizes all data on all nod
 To synchronize all data in clusters that have both tablets-based and vnodes-based keyspaces, run :doc:`nodetool repair -pr </operating-scylla/nodetool-commands/repair/>` on **all**
 of the nodes in the cluster, and :doc:`nodetool cluster repair </operating-scylla/nodetool-commands/cluster/repair/>` on  **any** of the nodes in the cluster.

+.. warning:: :term:`Colocated Tables <Colocated Table>` cannot be synchronized using cluster repair in this version.
+
 To check if a keyspace enables tablets, use:

 .. code-block:: cql
--- a/docs/poetry.lock
+++ b/docs/poetry.lock
@@ -2,36 +2,35 @@

 [[package]]
 name = "alabaster"
-version = "0.7.16"
+version = "1.0.0"
 description = "A light, configurable Sphinx theme"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"},
-    {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
+    {file = "alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"},
+    {file = "alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e"},
 ]

 [[package]]
 name = "anyio"
-version = "4.11.0"
+version = "4.12.0"
 description = "High-level concurrency and networking framework on top of asyncio or Trio"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc"},
-    {file = "anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4"},
+    {file = "anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb"},
+    {file = "anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0"},
 ]

 [package.dependencies]
 exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
 idna = ">=2.8"
-sniffio = ">=1.1"
 typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}

 [package.extras]
-trio = ["trio (>=0.31.0)"]
+trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""]

 [[package]]
 name = "babel"
@@ -50,14 +49,14 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)"

 [[package]]
 name = "beartype"
-version = "0.22.6"
+version = "0.22.8"
 description = "Unbearably fast near-real-time pure-Python runtime-static type-checker."
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "beartype-0.22.6-py3-none-any.whl", hash = "sha256:0584bc46a2ea2a871509679278cda992eadde676c01356ab0ac77421f3c9a093"},
-    {file = "beartype-0.22.6.tar.gz", hash = "sha256:97fbda69c20b48c5780ac2ca60ce3c1bb9af29b3a1a0216898ffabdd523e48f4"},
+    {file = "beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e"},
+    {file = "beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165"},
 ]

 [package.extras]
@@ -70,18 +69,18 @@ test-tox-coverage = ["coverage (>=5.5)"]

 [[package]]
 name = "beautifulsoup4"
-version = "4.14.2"
+version = "4.14.3"
 description = "Screen-scraping library"
 optional = false
 python-versions = ">=3.7.0"
 groups = ["main"]
 files = [
-    {file = "beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515"},
-    {file = "beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e"},
+    {file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"},
+    {file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"},
 ]

 [package.dependencies]
-soupsieve = ">1.2"
+soupsieve = ">=1.6.1"
 typing-extensions = ">=4.0.0"

 [package.extras]
@@ -802,18 +801,6 @@ files = [
    {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
 ]

-[[package]]
-name = "sniffio"
-version = "1.3.1"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
-    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
-]
-
 [[package]]
 name = "snowballstemmer"
 version = "3.0.1"
@@ -840,18 +827,18 @@ files = [

 [[package]]
 name = "sphinx"
-version = "7.4.7"
+version = "8.1.3"
 description = "Python documentation generator"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"},
-    {file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"},
+    {file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"},
+    {file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"},
 ]

 [package.dependencies]
-alabaster = ">=0.7.14,<0.8.0"
+alabaster = ">=0.7.14"
 babel = ">=2.13"
 colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""}
 docutils = ">=0.20,<0.22"
@@ -861,17 +848,17 @@ packaging = ">=23.0"
 Pygments = ">=2.17"
 requests = ">=2.30.0"
 snowballstemmer = ">=2.2"
-sphinxcontrib-applehelp = "*"
-sphinxcontrib-devhelp = "*"
-sphinxcontrib-htmlhelp = ">=2.0.0"
-sphinxcontrib-jsmath = "*"
-sphinxcontrib-qthelp = "*"
+sphinxcontrib-applehelp = ">=1.0.7"
+sphinxcontrib-devhelp = ">=1.0.6"
+sphinxcontrib-htmlhelp = ">=2.0.6"
+sphinxcontrib-jsmath = ">=1.0.1"
+sphinxcontrib-qthelp = ">=1.0.6"
 sphinxcontrib-serializinghtml = ">=1.1.9"
 tomli = {version = ">=2", markers = "python_version < \"3.11\""}

 [package.extras]
 docs = ["sphinxcontrib-websupport"]
-lint = ["flake8 (>=6.0)", "importlib-metadata (>=6.0)", "mypy (==1.10.1)", "pytest (>=6.0)", "ruff (==0.5.2)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-docutils (==0.21.0.20240711)", "types-requests (>=2.30.0)"]
+lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"]
 test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"]

 [[package]]
@@ -1001,13 +988,14 @@ test = ["tox"]

 [[package]]
 name = "sphinx-scylladb-markdown"
-version = "0.1.3"
+version = "0.1.4"
 description = "Sphinx extension for ScyllaDB documentation with enhanced Markdown support through MystParser and recommonmark."
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "sphinx_scylladb_markdown-0.1.3-py3-none-any.whl", hash = "sha256:f20160b4aadf4c8cf95637f0a544121954b792914ab6ec05b67cae75e20a5566"},
+    {file = "sphinx_scylladb_markdown-0.1.4-py3-none-any.whl", hash = "sha256:598753e01cf159d4698eb1a707958828446e21749038d3d42c5b9c7e86eda6e4"},
+    {file = "sphinx_scylladb_markdown-0.1.4.tar.gz", hash = "sha256:9db3ae0dcf7c3519262da65e48c7f9e4db0ad1ce9c5f874864ea218f4cbc4c68"},
 ]

 [package.dependencies]
@@ -1059,24 +1047,25 @@ dev = ["build", "flake8", "pre-commit", "pytest", "sphinx", "sphinx-last-updated

 [[package]]
 name = "sphinx-substitution-extensions"
-version = "2025.1.2"
+version = "2025.11.17"
 description = "Extensions for Sphinx which allow for substitutions."
 optional = false
 python-versions = ">=3.10"
 groups = ["main"]
 files = [
-    {file = "sphinx_substitution_extensions-2025.1.2-py2.py3-none-any.whl", hash = "sha256:ff14f40e4393bd7434a196badb8d47983355d9755af884b902e3023fb456b958"},
-    {file = "sphinx_substitution_extensions-2025.1.2.tar.gz", hash = "sha256:53b8d394d5098a09aef36bc687fa310aeb28466319d2c750e996e46400fb2474"},
+    {file = "sphinx_substitution_extensions-2025.11.17-py2.py3-none-any.whl", hash = "sha256:ac18455bdc8324b337b0fe7498c1c0d0b1cb65c74d131459be4dea9edb6abbef"},
+    {file = "sphinx_substitution_extensions-2025.11.17.tar.gz", hash = "sha256:aae17f8db9efc3d454a304373ae3df763f8739e05e0b98d5381db46f6d250b27"},
 ]

 [package.dependencies]
 beartype = ">=0.18.5"
 docutils = ">=0.19"
-sphinx = ">=7.3.5"
+myst-parser = ">=4.0.0"
+sphinx = ">=8.1.0"

 [package.extras]
-dev = ["actionlint-py (==1.7.5.21)", "check-manifest (==0.50)", "deptry (==0.21.2)", "doc8 (==1.1.2)", "doccmd (==2024.12.26)", "docformatter (==1.7.5)", "interrogate (==1.7.0)", "mypy-strict-kwargs (==2024.12.25)", "mypy[faster-cache] (==1.14.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pyenchant (==3.3.0rc1)", "pylint (==3.3.3)", "pyproject-fmt (==2.5.0)", "pyright (==1.1.391)", "pyroma (==4.2)", "pytest (==8.3.4)", "pytest-cov (==6.0.0)", "ruff (==0.8.4)", "shellcheck-py (==0.10.0.1)", "shfmt-py (==3.7.0.1)", "sphinx-toolbox (==3.8.1)", "sphinx[test] (==8.1.3)", "types-docutils (==0.21.0.20241128)", "vulture (==2.14)", "yamlfix (==1.17.0)"]
-release = ["check-wheel-contents (==0.6.1)"]
+dev = ["actionlint-py (==1.7.8.24)", "check-manifest (==0.51)", "deptry (==0.24.0)", "doc8 (==2.0.0)", "doccmd (==2025.11.8.1)", "docformatter (==1.7.7)", "interrogate (==1.7.0)", "mypy-strict-kwargs (==2025.4.3)", "mypy[faster-cache] (==1.18.2)", "pre-commit (==4.4.0)", "pylint[spelling] (==4.0.3)", "pyproject-fmt (==2.11.1)", "pyright (==1.1.407)", "pyroma (==5.0)", "pytest (==9.0.1)", "pytest-cov (==7.0.0)", "ruff (==0.14.5)", "shellcheck-py (==0.11.0.1)", "shfmt-py (==3.12.0.2)", "sphinx-lint (==1.0.1)", "sphinx-toolbox (==4.0.0)", "types-docutils (==0.22.2.20251006)", "vulture (==2.14)", "yamlfix (==1.19.0)"]
+release = ["check-wheel-contents (==0.6.3)"]

 [[package]]
 name = "sphinx-tabs"
@@ -1363,21 +1352,21 @@ files = [

 [[package]]
 name = "urllib3"
-version = "2.5.0"
+version = "2.6.2"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"},
-    {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"},
+    {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
+    {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
 ]

 [package.extras]
-brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
+brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""]
 h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
-zstd = ["zstandard (>=0.18.0)"]
+zstd = ["backports-zstd (>=1.0.0) ; python_version < \"3.14\""]

 [[package]]
 name = "uvicorn"
@@ -1603,4 +1592,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "0ae673106f45d3465cbdabbf511e165ca44feadd34d7753f2e68093afaa95c79"
+content-hash = "9a17caa38b3c88f3fe3d1a60fdb73a96aa12ff1e30ecb00e2f9249e7ba9f859c"
--- a/docs/pyproject.toml
+++ b/docs/pyproject.toml
@@ -12,10 +12,10 @@ redirects_cli ="^0.1.3"
 sphinx-scylladb-theme = "^1.8.10"
 sphinx-sitemap = "^2.6.0"
 sphinx-autobuild = "^2024.4.19"
-Sphinx = "^7.3.7"
+Sphinx = "^8.0.0"
 sphinx-multiversion-scylla = "^0.3.4"
 sphinxcontrib-datatemplates = "^0.9.2"
-sphinx-scylladb-markdown = "^0.1.2"
+sphinx-scylladb-markdown = "^0.1.4"
 sphinx_collapse ="^0.1.3"

 [build-system]
--- a/docs/reference/glossary.rst
+++ b/docs/reference/glossary.rst
@@ -202,3 +202,7 @@ Glossary
       The name comes from two basic operations, multiply (MU) and rotate (R), used in its inner loop.
       The MurmurHash3 version used in ScyllaDB originated from `Apache Cassandra <https://commons.apache.org/proper/commons-codec/apidocs/org/apache/commons/codec/digest/MurmurHash3.html>`_, and is **not** identical to the `official MurmurHash3 calculation <https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/MurmurHash.java#L31-L33>`_. More `here <https://github.com/russss/murmur3-cassandra>`_.

+    Colocated Table
+       An internal table of a special type in a :doc:`tablets </architecture/tablets>` enabled keyspace that is colocated with another base table, meaning it always has the same tablet replicas as the base table.
+       Current types of colocated tables include CDC log tables, local indexes, and materialized views that have the same partition key as their base table.
+
--- a/ent/encryption/encryption.cc
+++ b/ent/encryption/encryption.cc
@@ -816,7 +816,6 @@ public:
    future<data_sink> wrap_sink(const sstables::sstable& sst, sstables::component_type type, data_sink sink) override {
        switch (type) {
        case sstables::component_type::Scylla:
-        case sstables::component_type::TemporaryScylla:
        case sstables::component_type::TemporaryTOC:
        case sstables::component_type::TOC:
            co_return sink;
@@ -845,7 +844,6 @@ public:
                                                         sstables::component_type type,
                                                         data_source src) override {
        switch (type) {
-        case sstables::component_type::TemporaryScylla:
        case sstables::component_type::Scylla:
        case sstables::component_type::TemporaryTOC:
        case sstables::component_type::TOC:
--- a/gms/feature_service.hh
+++ b/gms/feature_service.hh
@@ -176,6 +176,8 @@ public:
    gms::feature rack_list_rf { *this, "RACK_LIST_RF"sv };
    gms::feature driver_service_level { *this, "DRIVER_SERVICE_LEVEL"sv };
    gms::feature strongly_consistent_tables { *this, "STRONGLY_CONSISTENT_TABLES"sv };
+    gms::feature client_routes { *this, "CLIENT_ROUTES"sv };
+    gms::feature removenode_with_left_token_ring { *this, "REMOVENODE_WITH_LEFT_TOKEN_RING"sv };
 public:

    const std::unordered_map<sstring, std::reference_wrapper<feature>>& registered_features() const;
--- a/main.cc
+++ b/main.cc
@@ -23,6 +23,7 @@
 #include <seastar/core/future.hh>
 #include <seastar/core/signal.hh>
 #include <seastar/core/timer.hh>
+#include "service/client_routes.hh"
 #include "service/qos/raft_service_level_distributed_data_accessor.hh"
 #include "db/view/view_building_state.hh"
 #include "tasks/task_manager.hh"
@@ -1795,6 +1796,13 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
                auth_cache.stop().get();
            });

+            checkpoint(stop_signal, "initializing client routes service");
+            static sharded<service::client_routes_service> client_routes;
+            client_routes.start(std::ref(stop_signal.as_sharded_abort_source()), std::ref(feature_service), std::ref(group0_client), std::ref(qp), std::ref(lifecycle_notifier)).get();
+            auto stop_client_routes = defer_verbose_shutdown("client_routes", [&] {
+                client_routes.stop().get();
+            });
+
            checkpoint(stop_signal, "initializing storage service");
            debug::the_storage_service = &ss;
            ss.start(std::ref(stop_signal.as_sharded_abort_source()),
@@ -1803,7 +1811,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
                std::ref(messaging), std::ref(repair),
                std::ref(stream_manager), std::ref(lifecycle_notifier), std::ref(bm), std::ref(snitch),
                std::ref(tablet_allocator), std::ref(cdc_generation_service), std::ref(view_builder), std::ref(view_building_worker), std::ref(qp), std::ref(sl_controller),
-                std::ref(auth_cache),
+                std::ref(auth_cache), std::ref(client_routes),
                std::ref(tsm), std::ref(vbsm), std::ref(task_manager), std::ref(gossip_address_map),
                compression_dict_updated_callback,
                only_on_shard0(&*disk_space_monitor_shard0)
@@ -2191,6 +2199,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
                    });
            }).get();

+            api::set_server_client_routes(ctx, client_routes).get();
+            auto stop_cr_api = defer_verbose_shutdown("client routes API", [&ctx] {
+                api::unset_server_client_routes(ctx).get();
+            });
+
            checkpoint(stop_signal, "join cluster");
            // Allow abort during join_cluster since bootstrap or replace
            // can take a long time.
--- a/node_ops/task_manager_module.cc
+++ b/node_ops/task_manager_module.cc
@@ -56,33 +56,16 @@ static tasks::task_manager::task_state get_state(const db::system_keyspace::topo
    }
 }

-static std::set<tasks::task_id> get_pending_ids(service::topology& topology) {
-    std::set<tasks::task_id> ids;
-    for (auto& request : topology.requests) {
-        ids.emplace(topology.find(request.first)->second.request_id);
-    }
-    return ids;
+static future<db::system_keyspace::topology_requests_entries> get_entries(db::system_keyspace& sys_ks, std::chrono::seconds ttl) {
+    return sys_ks.get_node_ops_request_entries(db_clock::now() - ttl);
 }

-static future<db::system_keyspace::topology_requests_entries> get_entries(db::system_keyspace& sys_ks, service::topology& topology, std::chrono::seconds ttl) {
-    // Started requests.
-    auto entries = co_await sys_ks.get_node_ops_request_entries(db_clock::now() - ttl);
-
-    // Pending requests.
-    for (auto& id : get_pending_ids(topology)) {
-        entries.try_emplace(id.uuid(), db::system_keyspace::topology_requests_entry{});
-    }
-
-    co_return entries;
-}
-
-future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status_helper(tasks::task_id id, tasks::virtual_task_hint hint) const {
-    auto entry = co_await _ss._sys_ks.local().get_topology_request_entry(id.uuid(), false);
-    auto started = entry.id;
-    service::topology& topology = _ss._topology_state_machine._topology;
-    if (!started && !get_pending_ids(topology).contains(id)) {
+future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status(tasks::task_id id, tasks::virtual_task_hint hint) {
+    auto entry_opt = co_await _ss._sys_ks.local().get_topology_request_entry_opt(id.uuid());
+    if (!entry_opt) {
        co_return std::nullopt;
    }
+    auto& entry = *entry_opt;
    co_return tasks::task_status{
        .task_id = id,
        .type = request_type_to_task_type(entry.request_type),
@@ -101,7 +84,7 @@ future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status_help
        .entity = "",
        .progress_units = "",
        .progress = tasks::task_manager::task::progress{},
-        .children = started ? co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper())) : utils::chunked_vector<tasks::task_identity>{}
+        .children = co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper()))
    };
 }

@@ -123,26 +106,22 @@ future<std::optional<tasks::virtual_task_hint>> node_ops_virtual_task::contains(
        }
    }

-    auto entry = co_await _ss._sys_ks.local().get_topology_request_entry(task_id.uuid(), false);
-    co_return bool(entry.id) && std::holds_alternative<service::topology_request>(entry.request_type) ? empty_hint : std::nullopt;
+    auto entry = co_await _ss._sys_ks.local().get_topology_request_entry_opt(task_id.uuid());
+    co_return entry && std::holds_alternative<service::topology_request>(entry->request_type) ? empty_hint : std::nullopt;
 }

 future<tasks::is_abortable> node_ops_virtual_task::is_abortable(tasks::virtual_task_hint) const {
    return make_ready_future<tasks::is_abortable>(tasks::is_abortable::no);
 }

-future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status(tasks::task_id id, tasks::virtual_task_hint hint) {
-    return get_status_helper(id, std::move(hint));
-}
-
 future<std::optional<tasks::task_status>> node_ops_virtual_task::wait(tasks::task_id id, tasks::virtual_task_hint hint) {
-    auto entry = co_await get_status_helper(id, hint);
+    auto entry = co_await get_status(id, hint);
    if (!entry) {
        co_return std::nullopt;
    }

    co_await _ss.wait_for_topology_request_completion(id.uuid(), false);
-    co_return co_await get_status_helper(id, std::move(hint));
+    co_return co_await get_status(id, std::move(hint));
 }

 future<> node_ops_virtual_task::abort(tasks::task_id id, tasks::virtual_task_hint) noexcept {
@@ -151,8 +130,7 @@ future<> node_ops_virtual_task::abort(tasks::task_id id, tasks::virtual_task_hin

 future<std::vector<tasks::task_stats>> node_ops_virtual_task::get_stats() {
    db::system_keyspace& sys_ks = _ss._sys_ks.local();
-    service::topology& topology = _ss._topology_state_machine._topology;
-    co_return std::ranges::to<std::vector<tasks::task_stats>>(co_await get_entries(sys_ks, topology, get_task_manager().get_user_task_ttl())
+    co_return std::ranges::to<std::vector<tasks::task_stats>>(co_await get_entries(sys_ks, get_task_manager().get_user_task_ttl())
            | std::views::transform([] (const auto& e) {
        auto id = e.first;
        auto& entry = e.second;
--- a/node_ops/task_manager_module.hh
+++ b/node_ops/task_manager_module.hh
@@ -39,8 +39,6 @@ public:
    virtual future<std::optional<tasks::task_status>> wait(tasks::task_id id, tasks::virtual_task_hint hint) override;
    virtual future<> abort(tasks::task_id id, tasks::virtual_task_hint hint) noexcept override;
    virtual future<std::vector<tasks::task_stats>> get_stats() override;
-private:
-    future<std::optional<tasks::task_status>> get_status_helper(tasks::task_id id, tasks::virtual_task_hint hint) const;
 };

 class streaming_task_impl : public tasks::task_manager::task::impl {
--- a/replica/database.cc
+++ b/replica/database.cc
@@ -2793,6 +2793,7 @@ future<> database::flush_all_tables() {
    });
    _all_tables_flushed_at = db_clock::now();
    co_await _commitlog->wait_for_pending_deletes();
+    dblog.info("Forcing new commitlog segment and flushing all tables complete");
 }

 future<db_clock::time_point> database::get_all_tables_flushed_at(sharded<database>& sharded_db) {
--- a/schema/schema.hh
+++ b/schema/schema.hh
@@ -593,7 +593,7 @@ private:
    v3_columns _v3_columns;
    mutable schema_registry_entry* _registry_entry = nullptr;
    std::unique_ptr<::view_info> _view_info;
-    schema_ptr _cdc_schema;
+    mutable schema_ptr _cdc_schema;

    const std::array<column_count_type, 3> _offsets;

@@ -957,6 +957,7 @@ public:
    friend bool operator==(const schema&, const schema&);
    const column_mapping& get_column_mapping() const;
    friend class schema_registry_entry;
+    friend class schema_registry;
    // May be called from different shard
    schema_registry_entry* registry_entry() const noexcept;
    // Returns true iff this schema version was synced with on current node.
--- a/schema/schema_registry.cc
+++ b/schema/schema_registry.cc
@@ -78,10 +78,8 @@ void schema_registry::attach_table(schema_registry_entry& e) noexcept {
 }

 schema_ptr schema_registry::learn(schema_ptr s) {
-    auto learned_cdc_schema = s->cdc_schema() ? local_schema_registry().learn(s->cdc_schema()) : nullptr;
-    if (learned_cdc_schema != s->cdc_schema()) {
-        s = s->make_with_cdc(learned_cdc_schema);
-    }
+    auto learned_cdc_schema = s->cdc_schema() ? learn(s->cdc_schema()) : nullptr;
+    s->_cdc_schema = learned_cdc_schema;
    if (s->registry_entry()) {
        return s;
    }
@@ -92,7 +90,9 @@ schema_ptr schema_registry::learn(schema_ptr s) {
            e.load(s);
            attach_table(e);
        }
-        return e.get_schema();
+        auto loaded_s = e.get_schema();
+        loaded_s->_cdc_schema = learned_cdc_schema;
+        return loaded_s;
    }
    slogger.debug("Learning about version {} of {}.{}", s->version(), s->ks_name(), s->cf_name());
    auto e_ptr = make_lw_shared<schema_registry_entry>(s->version(), *this);
--- a/service/CMakeLists.txt
+++ b/service/CMakeLists.txt
@@ -3,6 +3,7 @@ target_sources(service
  PRIVATE
    broadcast_tables/experimental/lang.cc
    client_state.cc
+    client_routes.cc
    mapreduce_service.cc
    migration_manager.cc
    misc_services.cc
--- a/service/client_routes.cc
+++ b/service/client_routes.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ *
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "service/client_routes.hh"
+#include "cql3/query_processor.hh"
+#include "cql3/untyped_result_set.hh"
+#include "mutation/mutation.hh"
+#include "service/endpoint_lifecycle_subscriber.hh"
+#include "db/system_keyspace.hh"
+
+static logging::logger crlogger("client_routes");
+
+service::query_state& client_routes_query_state() {
+    using namespace std::chrono_literals;
+    const auto t = 10s;
+    static timeout_config tc{ t, t, t, t, t, t, t };
+    static thread_local service::client_state cs(service::client_state::internal_tag{}, tc);
+    static thread_local service::query_state qs(cs, empty_service_permit());
+    return qs;
+};
+
+future<mutation> service::client_routes_service::make_remove_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_key& key) {
+    static const sstring stmt = format("DELETE FROM {}.{} WHERE connection_id = ? and host_id = ?", db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES);
+
+    auto muts = co_await _qp.get_mutations_internal(stmt, client_routes_query_state(), ts, {key.connection_id, key.host_id});
+    if (muts.size() != 1) {
+        on_internal_error(crlogger, fmt::format("expected 1 mutation got {}", muts.size()));
+    }
+    co_return std::move(muts[0]);
+}
+
+future<mutation> service::client_routes_service::make_update_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_entry& route) {
+    static const sstring stmt = format("INSERT INTO {}.{} (connection_id, host_id, address, port, tls_port, alternator_port, alternator_https_port) VALUES (?, ?, ?, ?, ?, ?, ?)", db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES);
+
+    auto muts = co_await _qp.get_mutations_internal(stmt, client_routes_query_state(), ts, {
+        route.connection_id,
+        route.host_id,
+        route.address,
+        route.port,
+        route.tls_port,
+        route.alternator_port,
+        route.alternator_https_port
+    });
+    if (muts.size() != 1) {
+        on_internal_error(crlogger, fmt::format("expected 1 mutation got {}", muts.size()));
+    }
+    co_return std::move(muts[0]);
+}
+
+future<std::vector<service::client_routes_service::client_route_entry>> service::client_routes_service::get_client_routes() const {
+    std::vector<service::client_routes_service::client_route_entry> result;
+    static const sstring query = format("SELECT * from {}.{}", db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES);
+    auto rs = co_await _qp.execute_internal(query, cql3::query_processor::cache_internal::yes);
+    result.reserve(rs->size());
+    for (const auto& row : *rs) {
+        result.emplace_back(
+            row.get_as<sstring>("connection_id"),
+            row.get_as<utils::UUID>("host_id"),
+            row.get_as<sstring>("address"),
+            row.get_opt<int32_t>("port"),
+            row.get_opt<int32_t>("tls_port"),
+            row.get_opt<int32_t>("alternator_port"),
+            row.get_opt<int32_t>("alternator_https_port")
+        );
+    }
+    co_return result;
+}
+
+seastar::future<> service::client_routes_service::notify_client_routes_change(const client_route_keys& client_route_keys) {
+    co_await container().invoke_on_all([&client_route_keys] (service::client_routes_service& client_routes) {
+        return client_routes._lifecycle_notifier.notify_client_routes_change(client_route_keys);
+    });
+}
+
+seastar::future<> service::client_routes_service::set_client_routes_inner(const std::vector<service::client_routes_service::client_route_entry>& route_entries) {
+    auto guard = co_await _group0_client.start_operation(_abort_source, service::raft_timeout{});
+    utils::chunked_vector<canonical_mutation> cmuts;
+
+    for (auto& entry : route_entries) {
+        auto mut = co_await make_update_client_route_mutation(guard.write_timestamp(), entry);
+        cmuts.emplace_back(std::move(mut));
+    }
+    auto cmd = _group0_client.prepare_command(service::write_mutations{std::move(cmuts)}, guard, "insert client routes");
+    co_await _group0_client.add_entry(std::move(cmd), std::move(guard), _abort_source);
+}
+
+seastar::future<> service::client_routes_service::delete_client_routes_inner(const std::vector<service::client_routes_service::client_route_key>& route_keys) {
+    auto guard = co_await _group0_client.start_operation(_abort_source, service::raft_timeout{});
+    utils::chunked_vector<canonical_mutation> cmuts;
+
+    for (const auto& route_key : route_keys) {
+        auto mut = co_await make_remove_client_route_mutation(guard.write_timestamp(), route_key);
+        cmuts.emplace_back(std::move(mut));
+    }
+
+    auto cmd = _group0_client.prepare_command(service::write_mutations{std::move(cmuts)}, guard, "delete client routes");
+    co_await _group0_client.add_entry(std::move(cmd), std::move(guard), _abort_source);
+}
+
+seastar::future<> service::client_routes_service::set_client_routes(const std::vector<service::client_routes_service::client_route_entry>& route_entries) {
+    return container().invoke_on(0, [route_entries = std::move(route_entries)] (service::client_routes_service& cr) -> future<> {
+        return cr.with_retry([&cr, route_entries = std::move(route_entries)] () mutable {
+            return cr.set_client_routes_inner(route_entries);
+        });
+    });
+}
+
+seastar::future<> service::client_routes_service::delete_client_routes(const std::vector<service::client_routes_service::client_route_key>& route_keys) {
+    return container().invoke_on(0, [route_keys = std::move(route_keys)] (service::client_routes_service& cr) -> future<> {
+        return cr.with_retry([&cr, route_keys = std::move(route_keys)] () mutable {
+            return cr.delete_client_routes_inner(route_keys);
+        });
+    });
+}
+
+template <typename Func>
+seastar::future<> service::client_routes_service::with_retry(Func&& func) const {
+    int retries = 10;
+    while (true) {
+        try {
+            co_await func();
+        } catch (const ::service::group0_concurrent_modification&) {
+            crlogger.warn("Failed to set client routes due to guard conflict, retries={}", retries);
+            if (retries--) {
+                continue;
+            }
+            throw;
+        }
+        break;
+    }
+}
--- a/service/client_routes.hh
+++ b/service/client_routes.hh
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ *
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+#pragma once
+
+#include <seastar/core/abort_source.hh>
+#include <seastar/core/sharded.hh>
+
+#include "gms/feature_service.hh"
+#include "mutation/mutation.hh"
+#include "service/raft/raft_group0_client.hh"
+
+namespace service {
+
+class endpoint_lifecycle_notifier;
+
+class client_routes_service : public seastar::peering_sharded_service<client_routes_service> {
+public:
+    client_routes_service(
+        abort_source& abort_source,
+        gms::feature_service& feature_service,
+        service::raft_group0_client& group0_client,
+        cql3::query_processor& qp,
+        endpoint_lifecycle_notifier& elc_notif
+    )
+    : _abort_source(abort_source)
+    , _feature_service(feature_service)
+    , _group0_client(group0_client)
+    , _qp(qp)
+    , _lifecycle_notifier(elc_notif) { }
+
+    struct client_route_key {
+        sstring connection_id;
+        utils::UUID host_id;
+
+        bool operator<(const client_route_key& other) const {
+            if (connection_id != other.connection_id) {
+                return connection_id < other.connection_id;
+            }
+            return host_id < other.host_id;
+        }
+    };
+    using client_route_keys = std::set<client_route_key>;
+
+    struct client_route_entry {
+        sstring connection_id;
+        utils::UUID host_id;
+        sstring address;
+        // At least one of the ports should be specified
+        std::optional<int32_t> port;
+        std::optional<int32_t> tls_port;
+        std::optional<int32_t> alternator_port;
+        std::optional<int32_t> alternator_https_port;
+    };
+
+    gms::feature_service& get_feature_service() noexcept {
+        return _feature_service;
+    }
+
+    // mutations
+    future<mutation> make_remove_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_key& key);
+    future<mutation> make_update_client_route_mutation(api::timestamp_type ts, const client_route_entry& entry);
+    future<std::vector<client_route_entry>> get_client_routes() const;
+    seastar::future<> set_client_routes(const std::vector<service::client_routes_service::client_route_entry>& route_entries);
+    seastar::future<> delete_client_routes(const std::vector<service::client_routes_service::client_route_key>& route_keys);
+
+
+    // notifications
+    seastar::future<> notify_client_routes_change(const client_route_keys& client_route_keys);
+private:
+    seastar::future<> set_client_routes_inner(const std::vector<service::client_routes_service::client_route_entry>& route_entries);
+    seastar::future<> delete_client_routes_inner(const std::vector<service::client_routes_service::client_route_key>& route_keys);
+    template <typename Func>
+    seastar::future<> with_retry(Func&& func) const;
+
+    abort_source& _abort_source;
+    gms::feature_service& _feature_service;
+    service::raft_group0_client& _group0_client;
+    cql3::query_processor& _qp;
+    endpoint_lifecycle_notifier& _lifecycle_notifier;
+};
+
+}
--- a/service/endpoint_lifecycle_subscriber.hh
+++ b/service/endpoint_lifecycle_subscriber.hh
@@ -13,6 +13,7 @@
 #include "gms/inet_address.hh"
 #include "locator/host_id.hh"
 #include "utils/atomic_vector.hh"
+#include "service/client_routes.hh"

 namespace service {

@@ -65,6 +66,7 @@ public:
     * @param endpoint the endpoint marked DOWN.
     */
    virtual void on_down(const gms::inet_address& endpoint, locator::host_id host_id) {}
+    virtual void on_client_routes_change(const client_routes_service::client_route_keys& client_route_keys) {}
 };

 class endpoint_lifecycle_notifier {
@@ -79,6 +81,8 @@ public:
    future<> notify_released(locator::host_id host_id);
    future<> notify_up(gms::inet_address endpoint, locator::host_id host_id);
    future<> notify_joined(gms::inet_address endpoint, locator::host_id host_id);
+
+    future<> notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys);
 };

 }
--- a/service/migration_listener.hh
+++ b/service/migration_listener.hh
@@ -163,7 +163,11 @@ public:
    void before_drop_column_family(const schema&, utils::chunked_vector<mutation>&, api::timestamp_type);
    void before_drop_keyspace(const sstring& keyspace_name, utils::chunked_vector<mutation>&, api::timestamp_type);

+    // Called when creating a tablet map for a new table.
+    // When in the context of a notification callback, call `before_allocate_tablet_map_in_notification`,
+    // and otherwise call 'before_allocate_tablet_map'.
    void before_allocate_tablet_map(const locator::tablet_map&, const schema&, utils::chunked_vector<mutation>&, api::timestamp_type);
+    void before_allocate_tablet_map_in_notification(const locator::tablet_map&, const schema&, utils::chunked_vector<mutation>&, api::timestamp_type);
 };

 }
--- a/service/migration_manager.cc
+++ b/service/migration_manager.cc
@@ -648,6 +648,13 @@ void migration_notifier::before_allocate_tablet_map(const locator::tablet_map& m
    });
 }

+void migration_notifier::before_allocate_tablet_map_in_notification(const locator::tablet_map& map,
+        const schema& s, utils::chunked_vector<mutation>& mutations, api::timestamp_type ts) {
+    _listeners.thread_for_each_nested([&map, &s, &mutations, ts] (migration_listener* listener) {
+        listener->on_before_allocate_tablet_map(map, s, mutations, ts);
+    });
+}
+
 utils::chunked_vector<mutation> prepare_keyspace_update_announcement(replica::database& db, lw_shared_ptr<keyspace_metadata> ksm, api::timestamp_type ts) {
    db.validate_keyspace_update(*ksm);
    mlogger.info("Update Keyspace: {}", ksm);
--- a/service/qos/service_level_controller.cc
+++ b/service/qos/service_level_controller.cc
@@ -640,6 +640,16 @@ future<scheduling_group> service_level_controller::auth_integration::get_user_sc
    }
 }

+scheduling_group service_level_controller::auth_integration::get_user_cached_scheduling_group(const std::optional<auth::authenticated_user>& usr) {
+    if (usr && usr->name) {
+        auto sl_opt = find_cached_effective_service_level(*usr->name);
+        auto& sl_name = (sl_opt && sl_opt->shares_name) ? *sl_opt->shares_name : default_service_level_name;
+        return _sl_controller.get_scheduling_group(sl_name);
+    } else {
+        return _sl_controller.get_default_scheduling_group();
+    }
+}
+
 future<scheduling_group> service_level_controller::get_user_scheduling_group(const std::optional<auth::authenticated_user>& usr) {
    // Special case:
    // -------------
@@ -656,6 +666,11 @@ future<scheduling_group> service_level_controller::get_user_scheduling_group(con
    return _auth_integration->get_user_scheduling_group(usr);
 }

+scheduling_group service_level_controller::get_cached_user_scheduling_group(const std::optional<auth::authenticated_user>& usr) {
+    SCYLLA_ASSERT(_auth_integration != nullptr);
+    return _auth_integration->get_user_cached_scheduling_group(usr);
+}
+
 std::optional<sstring> service_level_controller::get_active_service_level() {
    unsigned sched_idx = internal::scheduling_group_index(current_scheduling_group());
    if (_sl_lookup[sched_idx].first) {
@@ -774,6 +789,10 @@ future<service_levels_info> service_level_controller::get_distributed_service_le
    return _sl_data_accessor ? _sl_data_accessor->get_service_level(service_level_name) : make_ready_future<service_levels_info>();
 }

+bool service_level_controller::can_use_effective_service_level_cache() const{
+    return _sl_data_accessor && _sl_data_accessor->can_use_effective_service_level_cache();
+}
+
 future<bool> service_level_controller::validate_before_service_level_add() {
    assert(this_shard_id() == global_controller);
    if (_global_controller_db->deleted_scheduling_groups.size() > 0) {
--- a/service/qos/service_level_controller.hh
+++ b/service/qos/service_level_controller.hh
@@ -154,7 +154,10 @@ public:
        /// Synchronous version of `find_effective_service_level` that only checks the cache.
        std::optional<service_level_options> find_cached_effective_service_level(const sstring& role_name);

+        /// Execute a function within the service level context of a user, get_user_scheduling_group - async version 
+        /// get_user_cached_scheduling_group - sync version (used for v2 servers).
        future<scheduling_group> get_user_scheduling_group(const std::optional<auth::authenticated_user>& usr);
+        scheduling_group get_user_cached_scheduling_group(const std::optional<auth::authenticated_user>& usr);

        template <typename Func, typename Ret = std::invoke_result_t<Func>>
            requires std::invocable<Func>
@@ -339,6 +342,12 @@ public:
     * @return if the user is authenticated the user's scheduling group. otherwise get_scheduling_group("default")
     */
    future<scheduling_group> get_user_scheduling_group(const std::optional<auth::authenticated_user>& usr);
+    /**
+     * Get the scheduling group of a specific user for the service level cache
+     * @param user - the user for determining the service level
+     * @return if the user is authenticated the user's scheduling group. otherwise get_scheduling_group("default")
+     */
+    scheduling_group get_cached_user_scheduling_group(const std::optional<auth::authenticated_user>& usr);
    /**
     * @return the name of the currently active service level if such exists or an empty
     * optional if no active service level.
@@ -400,6 +409,13 @@ public:
    future<service_levels_info> get_distributed_service_levels(qos::query_context ctx);
    future<service_levels_info> get_distributed_service_level(sstring service_level_name);

+    /*
+    * Returns whether effective service level cache can be populated and used.
+    * This is equivalent to checking whether auth + raft have been migrated to raft.
+    */
+    bool can_use_effective_service_level_cache() const;
+    
+    
    /**
     * Returns the service level options **in effect** for a user having the given
     * collection of roles.
--- a/service/raft/group0_state_machine.cc
+++ b/service/raft/group0_state_machine.cc
@@ -124,8 +124,40 @@ bool should_flush_system_topology_after_applying(const mutation& mut, const data
    return false;
 }

-future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms) {
+static void collect_client_routes_update(const mutation& mut, client_routes_service::client_route_keys& client_routes_update) {
+
+    auto s_client_routes = db::system_keyspace::client_routes();
+    if (mut.column_family_id() != s_client_routes->id()) {
+        return;
+    }
+
+    const auto pk_components = mut.decorated_key()._key.explode(*s_client_routes);
+    if (pk_components.empty()) {
+        return;
+    }
+
+    auto conn_uuid = value_cast<sstring>(utf8_type->deserialize_value(pk_components[0]));
+    for (const rows_entry& re : mut.partition().clustered_rows()) {
+        const auto ck_components = re.key().explode(*s_client_routes);
+        if (ck_components.empty()) {
+            continue;
+        }
+        auto host_uuid = value_cast<utils::UUID>(uuid_type->deserialize_value(ck_components[0]));
+        client_routes_update.emplace(conn_uuid, host_uuid);
+    }
+}
+
+static future<> notify_client_route_change_if_needed(storage_service& storage_service, const client_routes_service::client_route_keys& client_routes_update) {
+    if (client_routes_update.size() > 0) {
+        slogger.trace("write_mutations_to_database: notify_client_routes_change routes_update.size()={}", client_routes_update.size());
+        co_await storage_service.notify_client_routes_change(client_routes_update);
+    }
+}
+
+future<> write_mutations_to_database(storage_service& storage_service, storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms) {
    utils::chunked_vector<frozen_mutation_and_schema> mutations;
+    client_routes_service::client_route_keys client_routes_update;
+
    mutations.reserve(cms.size());
    bool need_system_topology_flush = false;
    try {
@@ -133,7 +165,12 @@ future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address fro
            auto& tbl = proxy.local_db().find_column_family(cm.column_family_id());
            auto& s = tbl.schema();
            auto mut = co_await to_mutation_gently(cm, s);
+
            need_system_topology_flush = need_system_topology_flush || should_flush_system_topology_after_applying(mut, proxy.data_dictionary());
+            if (proxy.data_dictionary().has_schema(db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES)) {
+                collect_client_routes_update(mut, client_routes_update);
+            }
+
            mutations.emplace_back(co_await freeze_gently(mut), s);
        }
    } catch (replica::no_such_column_family& e) {
@@ -147,6 +184,8 @@ future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address fro
        slogger.trace("write_mutations_to_database: flushing {}.{}", db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
        co_await proxy.get_db().local().flush(db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
    }
+
+    co_await notify_client_route_change_if_needed(storage_service, client_routes_update);
 }

 group0_state_machine::modules_to_reload group0_state_machine::get_modules_to_reload(const utils::chunked_vector<canonical_mutation>& mutations) {
@@ -251,7 +290,7 @@ future<> group0_state_machine::merge_and_apply(group0_state_machine_merger& merg
    [&] (topology_change& chng) -> future<> {
        auto modules_to_reload = get_modules_to_reload(chng.mutations);
        auto tablet_keys = replica::get_tablet_metadata_change_hint(chng.mutations);
-        co_await write_mutations_to_database(_sp, cmd.creator_addr, std::move(chng.mutations));
+        co_await write_mutations_to_database(_ss, _sp, cmd.creator_addr, std::move(chng.mutations));
        co_await _ss.topology_transition({.tablets_hint = std::move(tablet_keys)});
        co_await reload_modules(std::move(modules_to_reload));
    },
@@ -263,7 +302,7 @@ future<> group0_state_machine::merge_and_apply(group0_state_machine_merger& merg
    },
    [&] (write_mutations& muts) -> future<> {
        auto modules_to_reload = get_modules_to_reload(muts.mutations);
-        co_await write_mutations_to_database(_sp, cmd.creator_addr, std::move(muts.mutations));
+        co_await write_mutations_to_database(_ss, _sp, cmd.creator_addr, std::move(muts.mutations));
        co_await reload_modules(std::move(modules_to_reload));
    }
    ), cmd.change);
@@ -393,6 +432,7 @@ future<> group0_state_machine::load_snapshot(raft::snapshot_id id) {

 future<> group0_state_machine::transfer_snapshot(raft::server_id from_id, raft::snapshot_descriptor snp) {
  try {
+    co_await utils::get_local_injector().inject("block_group0_transfer_snapshot", utils::wait_for_message(300s));
    // Note that this may bring newer state than the group0 state machine raft's
    // log, so some raft entries may be double applied, but since the state
    // machine is idempotent it is not a problem.
@@ -451,11 +491,23 @@ future<> group0_state_machine::transfer_snapshot(raft::server_id from_id, raft::
        co_await _sp.get_db().local().flush(db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
    }

+    client_routes_service::client_route_keys client_routes_update;
    if (raft_snp) {
+        if (_sp.data_dictionary().has_schema(db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES)) {
+            auto s_client_routes = db::system_keyspace::client_routes();
+            for (auto& canonical_mut : raft_snp->mutations) {
+                if (canonical_mut.column_family_id() == s_client_routes->id()) {
+                    auto mut = co_await to_mutation_gently(canonical_mut, s_client_routes);
+                    slogger.trace("transfer snapshot: raft snapshot includes client_routes mutation");
+                    collect_client_routes_update(mut, client_routes_update);
+                }
+            }
+        }
        co_await mutate_locally(std::move(raft_snp->mutations), _sp);
    }

    co_await _ss.auth_cache().load_all();
+    co_await notify_client_route_change_if_needed(_ss, client_routes_update);

    co_await _sp.mutate_locally({std::move(history_mut)}, nullptr);
  } catch (const abort_requested_exception&) {
--- a/service/raft/group0_state_machine.hh
+++ b/service/raft/group0_state_machine.hh
@@ -130,6 +130,6 @@ public:
 bool should_flush_system_topology_after_applying(const mutation& mut, const data_dictionary::database db);

 // Used to write data to topology and other tables except schema tables.
-future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms);
+future<> write_mutations_to_database(storage_service& storage_service, storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms);

 } // end of namespace service
--- a/service/raft/raft_group0_client.hh
+++ b/service/raft/raft_group0_client.hh
@@ -254,6 +254,10 @@ public:
    group0_batch(const group0_batch&) = delete;
    group0_batch(group0_batch&&) = default;

+    const group0_guard& guard() const {
+        return _guard.value();
+    }
+
    // Gets timestamp which should be used when building mutations.
    api::timestamp_type write_timestamp() const;
    utils::UUID new_group0_state_id() const;
--- a/service/storage_proxy.cc
+++ b/service/storage_proxy.cc
@@ -1114,7 +1114,7 @@ private:
                // only for a truncate which is still waiting.
                if (_topology_state_machine._topology.global_request) {
                    utils::UUID ongoing_global_request_id = _topology_state_machine._topology.global_request_id.value();
-                    const auto topology_requests_entry = co_await _sys_ks.local().get_topology_request_entry(ongoing_global_request_id, true);
+                    const auto topology_requests_entry = co_await _sys_ks.local().get_topology_request_entry(ongoing_global_request_id);
                    auto global_request = std::get<service::global_topology_request>(topology_requests_entry.request_type);
                    if (global_request == global_topology_request::truncate_table) {
                        std::optional<topology::transition_state>& tstate = _topology_state_machine._topology.tstate;
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -205,6 +205,7 @@ storage_service::storage_service(abort_source& abort_source,
    cql3::query_processor& qp,
    sharded<qos::service_level_controller>& sl_controller,
    auth::cache& auth_cache,
+    sharded<client_routes_service>& client_routes,
    topology_state_machine& topology_state_machine,
    db::view::view_building_state_machine& view_building_state_machine,
    tasks::task_manager& tm,
@@ -224,11 +225,13 @@ storage_service::storage_service(abort_source& abort_source,
        , _snitch(snitch)
        , _sl_controller(sl_controller)
        , _auth_cache(auth_cache)
+        , _client_routes(client_routes)
        , _group0(nullptr)
        , _async_gate("storage_service")
        , _node_ops_abort_thread(node_ops_abort_thread())
        , _node_ops_module(make_shared<node_ops::task_manager_module>(tm, *this))
        , _tablets_module(make_shared<service::task_manager_module>(tm, *this))
+        , _global_topology_requests_module(make_shared<service::topo::task_manager_module>(tm))
        , _address_map(address_map)
        , _shared_token_metadata(stm)
        , _erm_factory(erm_factory)
@@ -252,9 +255,11 @@ storage_service::storage_service(abort_source& abort_source,
 {
    tm.register_module(_node_ops_module->get_name(), _node_ops_module);
    tm.register_module(_tablets_module->get_name(), _tablets_module);
+    tm.register_module(_global_topology_requests_module->get_name(), _global_topology_requests_module);
    if (this_shard_id() == 0) {
        _node_ops_module->make_virtual_task<node_ops::node_ops_virtual_task>(*this);
        _tablets_module->make_virtual_task<service::tablet_virtual_task>(*this);
+        _global_topology_requests_module->make_virtual_task<service::topo::global_topology_request_virtual_task>(*this);
    }
    register_metrics();

@@ -583,12 +588,16 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
            }
            break;
        case node_state::decommissioning:
-            // A decommissioning node loses its tokens when topology moves to left_token_ring.
-            if (_topology_state_machine._topology.tstate == topology::transition_state::left_token_ring) {
-                break;
-            }
            [[fallthrough]];
        case node_state::removing:
+            // A decommissioning or removing node loses its tokens when topology moves to left_token_ring.
+            if (_topology_state_machine._topology.tstate == topology::transition_state::left_token_ring) {
+                if (rs.state == node_state::removing && !_feature_service.removenode_with_left_token_ring) {
+                    on_internal_error(
+                            rtlogger, "removenode operation can only enter the left_token_ring state when REMOVENODE_WITH_LEFT_TOKEN_RING feature is enabled");
+                }
+                break;
+            }
            if (_topology_state_machine._topology.tstate == topology::transition_state::rollback_to_normal) {
                // no need for double writes anymore since op failed
                co_await process_normal_node(id, host_id, ip, rs);
@@ -1375,6 +1384,34 @@ public:
    }
 };

+future<bool> storage_service::ongoing_rf_change(const group0_guard& guard, sstring ks) const {
+    auto ongoing_ks_rf_change = [&] (utils::UUID request_id) -> future<bool> {
+        auto req_entry = co_await _sys_ks.local().get_topology_request_entry(request_id);
+        co_return std::holds_alternative<global_topology_request>(req_entry.request_type) &&
+            std::get<global_topology_request>(req_entry.request_type) == global_topology_request::keyspace_rf_change &&
+            req_entry.new_keyspace_rf_change_ks_name.has_value() && req_entry.new_keyspace_rf_change_ks_name.value() == ks;
+    };
+    if (_topology_state_machine._topology.global_request_id.has_value()) {
+        auto req_id = _topology_state_machine._topology.global_request_id.value();
+        if (co_await ongoing_ks_rf_change(req_id)) {
+            co_return true;
+        }
+    }
+    for (auto request_id : _topology_state_machine._topology.paused_rf_change_requests) {
+        if (co_await ongoing_ks_rf_change(request_id)) {
+            co_return true;
+        }
+        co_await coroutine::maybe_yield();
+    }
+    for (auto request_id : _topology_state_machine._topology.global_requests_queue) {
+        if (co_await ongoing_ks_rf_change(request_id)) {
+            co_return true;
+        }
+        co_await coroutine::maybe_yield();
+    }
+    co_return false;
+}
+
 future<> storage_service::raft_initialize_discovery_leader(const join_node_request_params& params) {
    if (params.replaced_id.has_value()) {
        throw std::runtime_error(::format("Cannot perform a replace operation because this is the first node in the cluster"));
@@ -1420,7 +1457,7 @@ future<> storage_service::raft_initialize_discovery_leader(const join_node_reque
            _migration_manager.local().get_group0_client().get_history_gc_duration(), "bootstrap: adding myself as the first node to the topology");
    auto mutation_creator_addr = _sys_ks.local().local_db().get_token_metadata().get_topology().my_address();

-    co_await write_mutations_to_database(_qp.proxy(), mutation_creator_addr, std::move(change.mutations));
+    co_await write_mutations_to_database(*this, _qp.proxy(), mutation_creator_addr, std::move(change.mutations));
    co_await _qp.proxy().mutate_locally({history_append}, nullptr);
 }

@@ -3443,6 +3480,7 @@ future<> storage_service::stop() {
    _listeners.clear();
    co_await _tablets_module->stop();
    co_await _node_ops_module->stop();
+    co_await _global_topology_requests_module->stop();
    co_await _async_gate.close();
    co_await std::move(_node_ops_abort_thread);
    _tablet_split_monitor_event.signal();
@@ -5025,6 +5063,50 @@ future<> storage_service::wait_for_topology_not_busy() {
    }
 }

+future<> storage_service::abort_paused_rf_change(utils::UUID request_id) {
+    auto holder = _async_gate.hold();
+
+    if (this_shard_id() != 0) {
+        // group0 is only set on shard 0.
+        co_return co_await container().invoke_on(0, [&] (auto& ss) {
+            return ss.abort_paused_rf_change(request_id);
+        });
+    }
+
+    if (!_feature_service.rack_list_rf) {
+        throw std::runtime_error("The RACK_LIST_RF feature is not enabled on the cluster yet");
+    }
+
+    while (true) {
+        auto guard = co_await _group0->client().start_operation(_group0_as, raft_timeout{});
+
+        bool found = std::ranges::contains(_topology_state_machine._topology.paused_rf_change_requests, request_id);
+        if (!found) {
+            slogger.warn("RF change request with id '{}' is not paused, so it can't be aborted", request_id);
+            co_return;
+        }
+
+        utils::chunked_vector<canonical_mutation> updates;
+        updates.push_back(canonical_mutation(topology_mutation_builder(guard.write_timestamp())
+                                .resume_rf_change_request(_topology_state_machine._topology.paused_rf_change_requests, request_id).build()));
+        updates.push_back(canonical_mutation(topology_request_tracking_mutation_builder(request_id)
+                                                    .done("Aborted by user request")
+                                                    .build()));
+
+        topology_change change{std::move(updates)};
+        group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard,
+                format("aborting rf change request {}", request_id));
+
+        try {
+            co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _group0_as, raft_timeout{});
+        } catch (group0_concurrent_modification&) {
+            slogger.info("aborting request {}: concurrent modification, retrying.", request_id);
+            continue;
+        }
+        break;
+    }
+}
+
 semaphore& storage_service::get_do_sample_sstables_concurrency_limiter() {
    return _do_sample_sstables_concurrency_limiter;
 }
@@ -5228,7 +5310,7 @@ future<> storage_service::raft_check_and_repair_cdc_streams() {
            request_id = _topology_state_machine._topology.global_request_id.value();
        } else if (!_topology_state_machine._topology.global_requests_queue.empty()) {
            request_id = _topology_state_machine._topology.global_requests_queue[0];
-            auto req_entry = co_await _sys_ks.local().get_topology_request_entry(request_id, true);
+            auto req_entry = co_await _sys_ks.local().get_topology_request_entry(request_id);
            curr_req = std::get<global_topology_request>(req_entry.request_type);
        } else {
            request_id = utils::UUID{};
@@ -7702,6 +7784,9 @@ void storage_service::init_messaging_service() {
                    additional_tables.push_back(db::system_keyspace::cdc_streams_state()->id());
                    additional_tables.push_back(db::system_keyspace::cdc_streams_history()->id());
                }
+                if (ss._feature_service.client_routes) {
+                    additional_tables.push_back(db::system_keyspace::client_routes()->id());
+                }
            }

            for (const auto& table : boost::join(params.tables, additional_tables)) {
@@ -8041,6 +8126,18 @@ future<> endpoint_lifecycle_notifier::notify_joined(gms::inet_address endpoint,
    });
 }

+future<> endpoint_lifecycle_notifier::notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys) {
+    co_await seastar::async([this, &client_route_keys] {
+        _subscribers.thread_for_each([&client_route_keys] (endpoint_lifecycle_subscriber* subscriber) {
+            try {
+                subscriber->on_client_routes_change(client_route_keys);
+            } catch (...) {
+                slogger.warn("Client routes notification failed: {}", std::current_exception());
+            }
+        });
+    });
+}
+
 future<> storage_service::notify_joined(inet_address endpoint, locator::host_id hid) {
    co_await utils::get_local_injector().inject(
        "storage_service_notify_joined_sleep", std::chrono::milliseconds{500});
@@ -8065,6 +8162,10 @@ future<> storage_service::notify_cql_change(inet_address endpoint, locator::host
    }
 }

+future<> storage_service::notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys) {
+    co_await _client_routes.local().notify_client_routes_change(client_route_keys);
+}
+
 bool storage_service::is_normal_state_handled_on_boot(locator::host_id node) {
    return _normal_state_handled_on_boot.contains(node);
 }
--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -17,8 +17,10 @@
 #include "gms/endpoint_state.hh"
 #include "gms/i_endpoint_state_change_subscriber.hh"
 #include "schema/schema_fwd.hh"
+#include "service/client_routes.hh"
 #include "service/endpoint_lifecycle_subscriber.hh"
 #include "service/qos/service_level_controller.hh"
+#include "service/task_manager_module.hh"
 #include "service/topology_guard.hh"
 #include "locator/abstract_replication_strategy.hh"
 #include "locator/snitch_base.hh"
@@ -48,6 +50,7 @@
 #include "service/tablet_allocator.hh"
 #include "service/tablet_operation.hh"
 #include "mutation/timestamp.hh"
+#include "utils/UUID.hh"
 #include "utils/user_provided_param.hh"
 #include "utils/sequenced_set.hh"
 #include "service/topology_coordinator.hh"
@@ -202,6 +205,7 @@ private:
    sharded<locator::snitch_ptr>& _snitch;
    sharded<qos::service_level_controller>& _sl_controller;
    auth::cache& _auth_cache;
+    sharded<client_routes_service>& _client_routes;

    // Engaged on shard 0 before `join_cluster`.
    service::raft_group0* _group0;
@@ -225,6 +229,7 @@ private:
    future<> _node_ops_abort_thread;
    shared_ptr<node_ops::task_manager_module> _node_ops_module;
    shared_ptr<service::task_manager_module> _tablets_module;
+    shared_ptr<service::topo::task_manager_module> _global_topology_requests_module;
    gms::gossip_address_map& _address_map;
    void node_ops_insert(node_ops_id, gms::inet_address coordinator, std::list<inet_address> ignore_nodes,
                         std::function<future<>()> abort_func);
@@ -269,6 +274,7 @@ public:
        cql3::query_processor& qp,
        sharded<qos::service_level_controller>& sl_controller,
        auth::cache& auth_cache,
+        sharded<client_routes_service>& _client_routes,
        topology_state_machine& topology_state_machine,
        db::view::view_building_state_machine& view_building_state_machine,
        tasks::task_manager& tm,
@@ -931,6 +937,7 @@ public:
    bool topology_global_queue_empty() const {
        return !_topology_state_machine._topology.global_request.has_value();
    }
+    future<bool> ongoing_rf_change(const group0_guard& guard, sstring ks) const;
    future<> raft_initialize_discovery_leader(const join_node_request_params& params);
    future<> initialize_done_topology_upgrade_state();
 private:
@@ -1068,6 +1075,8 @@ public:
    future<sstring> wait_for_topology_request_completion(utils::UUID id, bool require_entry = true);
    future<> wait_for_topology_not_busy();

+    future<> abort_paused_rf_change(utils::UUID request_id);
+
 private:
    semaphore _do_sample_sstables_concurrency_limiter{1};
    // To avoid overly-large RPC messages, `do_sample_sstables` is broken up into several rounds.
@@ -1138,11 +1147,14 @@ public:
    future<std::vector<std::byte>> train_dict(utils::chunked_vector<temporary_buffer<char>> sample);
    future<> publish_new_sstable_dict(table_id, std::span<const std::byte>, service::raft_group0_client&);
    void set_train_dict_callback(decltype(_train_dict));
+    seastar::future<> notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys);
+

    friend class join_node_rpc_handshaker;
    friend class node_ops::node_ops_virtual_task;
    friend class tasks::task_manager;
    friend class tablet_virtual_task;
+    friend class topo::global_topology_request_virtual_task;
 };

 }
--- a/service/tablet_allocator.cc
+++ b/service/tablet_allocator.cc
@@ -6,12 +6,16 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
 */

+#include "cql3/statements/ks_prop_defs.hh"
+#include "db/system_keyspace.hh"
 #include "locator/tablets.hh"
+#include "locator/topology.hh"
 #include "replica/tablets.hh"
 #include "locator/tablet_replication_strategy.hh"
 #include "replica/database.hh"
 #include "service/migration_listener.hh"
 #include "service/tablet_allocator.hh"
+#include "utils/UUID.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
 #include "utils/stall_free.hh"
@@ -22,6 +26,7 @@
 #include "replica/database.hh"
 #include "gms/feature_service.hh"
 #include <iterator>
+#include <ranges>
 #include <utility>
 #include <fmt/ranges.h>
 #include <seastar/coroutine/maybe_yield.hh>
@@ -237,6 +242,147 @@ struct migration_candidate {
    migration_badness badness;
 };

+struct colocation_source {
+    locator::global_tablet_id gid;
+    locator::tablet_replica replica;
+};
+
+using colocation_source_set = utils::chunked_vector<colocation_source>;
+using colocation_sources_by_destination_rack = std::unordered_map<endpoint_dc_rack, colocation_source_set>;
+
+struct rack_list_colocation_state {
+    colocation_sources_by_destination_rack dst_dc_rack_to_tablets;
+    std::unordered_map<endpoint_dc_rack, std::unordered_set<utils::UUID>> dst_to_requests;
+    utils::UUID request_to_resume;
+
+    void maybe_set_request_to_resume(const utils::UUID& id) {
+        if (!request_to_resume) {
+            request_to_resume = id;
+        }
+    }
+};
+
+future<rack_list_colocation_state> find_required_rack_list_colocations(
+        replica::database& db,
+        token_metadata_ptr tmptr,
+        db::system_keyspace* sys_ks,
+        const std::unordered_set<utils::UUID>& paused_rf_change_requests,
+        const std::unordered_set<locator::global_tablet_id>& already_planned_migrations) {
+    rack_list_colocation_state state;
+
+    auto get_node = [&] (locator::host_id host) -> const locator::node& {
+        auto* node = tmptr->get_topology().find_node(host);
+        if (!node) {
+            on_internal_error(lblogger, format("Node {} not found in topology", host));
+        }
+        return *node;
+    };
+    for (const auto& request_id : paused_rf_change_requests) {
+        auto req_entry = co_await sys_ks->get_topology_request_entry(request_id);
+        sstring ks_name = *req_entry.new_keyspace_rf_change_ks_name;
+
+        if (!db.has_keyspace(ks_name)) {
+            state.maybe_set_request_to_resume(request_id);
+            continue;
+        }
+        auto& ks = db.find_keyspace(ks_name);
+        std::unordered_map<sstring, sstring> saved_ks_props = *req_entry.new_keyspace_rf_change_data;
+        cql3::statements::ks_prop_defs new_ks_props{std::map<sstring, sstring>{saved_ks_props.begin(), saved_ks_props.end()}};
+        new_ks_props.validate();
+        auto ks_md = new_ks_props.as_ks_metadata_update(ks.metadata(), *tmptr, db.features(), db.get_config());
+
+        auto tables_with_mvs = ks.metadata()->tables();
+        auto views = ks.metadata()->views();
+        tables_with_mvs.insert(tables_with_mvs.end(), views.begin(), views.end());
+        if (tables_with_mvs.empty()) {
+            state.maybe_set_request_to_resume(request_id);
+            continue;
+        }
+        bool no_changes_needed = true;
+        for (const auto& table_or_mv : tables_with_mvs) {
+            if (!tmptr->tablets().is_base_table(table_or_mv->id())) {
+                continue;
+            }
+            const auto& tmap = tmptr->tablets().get_tablet_map(table_or_mv->id());
+            const auto& new_replication_strategy_config = ks_md->strategy_options();
+            for (auto& [dc, rf_value] : new_replication_strategy_config) {
+                if (!std::holds_alternative<rack_list>(rf_value)) {
+                    continue;
+                }
+
+                auto racks = std::get<rack_list>(rf_value) | std::ranges::to<std::unordered_set<sstring>>();
+                co_await tmap.for_each_tablet([&] (tablet_id tid, const tablet_info& ti) -> future<> {
+                    auto gid = locator::global_tablet_id{table_or_mv->id(), tid};
+
+                    // Current replicas in this DC. There might be multiple replicas in the same rack.
+                    auto dc_replicas = ti.replicas | std::views::filter([&] (const tablet_replica& r) {
+                        return get_node(r.host).dc_rack().dc == dc;
+                    }) | std::ranges::to<std::vector<tablet_replica>>();
+
+                    if (dc_replicas.empty()) {
+                        return make_ready_future<>();
+                    }
+
+                    // Find replicas that are not in the desired racks (src_replicas)
+                    // and racks that do not have replicas yet (dst_racks).
+                    auto dst_racks = racks;
+                    std::vector<tablet_replica> src_replicas;
+                    for (const auto& r : dc_replicas) {
+                        auto rack = get_node(r.host).dc_rack().rack;
+                        if (dst_racks.find(rack) != dst_racks.end()) {
+                            // There is already a replica in this rack.
+                            dst_racks.erase(rack);
+                        } else {
+                            // There is a replica in this rack, but it needs to be moved.
+                            src_replicas.push_back(r);
+                        }
+                    }
+
+                    auto zipped = std::views::zip(src_replicas, dst_racks);
+                    if (!std::ranges::empty(zipped)) {
+                        no_changes_needed = false;
+                    }
+
+                    // Skip tablet that is in transitions.
+                    auto* tti = tmap.get_tablet_transition_info(tid);
+                    if (tti) {
+                        lblogger.debug("Skipped colocation for tablet={} which is already in transition={}", gid, tti->transition);
+                        return make_ready_future<>();
+                    }
+
+                    // Skip tablet that is about to be in transition.
+                    if (already_planned_migrations.contains(gid)) {
+                        return make_ready_future<>();
+                    }
+
+                    for (auto src_dst : zipped) {
+                        auto src = std::get<0>(src_dst);
+                        auto dst = std::get<1>(src_dst);
+                        auto endpoint = locator::endpoint_dc_rack{dc, dst};
+
+                        state.dst_dc_rack_to_tablets[endpoint].emplace_back(colocation_source{{table_or_mv->id(), tid}, src});
+                        state.dst_to_requests[endpoint].insert(request_id);
+                    }
+                    return make_ready_future<>();
+                });
+            }
+        }
+        if (no_changes_needed) {
+            state.maybe_set_request_to_resume(request_id);
+        }
+    }
+    co_return state;
+}
+
+future<bool> requires_rack_list_colocation(
+        replica::database& db,
+        locator::token_metadata_ptr tmptr,
+        db::system_keyspace* sys_ks,
+        utils::UUID request_id) {
+    auto res = co_await find_required_rack_list_colocations(db, tmptr, sys_ks, {request_id}, {});
+    co_return res.request_to_resume != request_id;
+}
+
 }

 template<>
@@ -658,6 +804,8 @@ class load_balancer {

    replica::database& _db;
    token_metadata_ptr _tm;
+    service::topology* _topology;
+    db::system_keyspace* _sys_ks;
    std::optional<locator::load_sketch> _load_sketch;
    // Holds the set of tablets already scheduled for transition during plan-making.
    std::unordered_set<global_tablet_id> _scheduled_tablets;
@@ -742,7 +890,10 @@ private:
        return streaming_infos;
    }
 public:
-    load_balancer(replica::database& db, token_metadata_ptr tm, locator::load_stats_ptr table_load_stats,
+    load_balancer(replica::database& db, token_metadata_ptr tm,
+            service::topology* topology,
+            db::system_keyspace* sys_ks,
+            locator::load_stats_ptr table_load_stats,
            load_balancer_stats_manager& stats,
            uint64_t target_tablet_size,
            unsigned tablets_per_shard_goal,
@@ -751,19 +902,26 @@ public:
        , _tablets_per_shard_goal(tablets_per_shard_goal)
        , _db(db)
        , _tm(std::move(tm))
+        , _topology(topology)
+        , _sys_ks(sys_ks)
        , _table_load_stats(std::move(table_load_stats))
        , _stats(stats)
        , _skiplist(std::move(skiplist))
    { }

+    bool ongoing_rack_list_colocation() const {
+        return _topology != nullptr && _sys_ks != nullptr && !_topology->paused_rf_change_requests.empty();
+    }
+
    future<migration_plan> make_plan() {
        const locator::topology& topo = _tm->get_topology();
        migration_plan plan;

+        auto rack_list_colocation = ongoing_rack_list_colocation();
        if (!utils::get_local_injector().enter("tablet_migration_bypass")) {
            // Prepare plans for each DC separately and combine them to be executed in parallel.
            for (auto&& dc : topo.get_datacenters()) {
-                if (_db.get_config().rf_rack_valid_keyspaces()) {
+                if (_db.get_config().rf_rack_valid_keyspaces() || rack_list_colocation) {
                    for (auto rack : topo.get_datacenter_racks().at(dc) | std::views::keys) {
                        auto rack_plan = co_await make_plan(dc, rack);
                        auto level = rack_plan.size() > 0 ? seastar::log_level::info : seastar::log_level::debug;
@@ -779,6 +937,10 @@ public:
            }
        }

+        if (rack_list_colocation) {
+            plan.merge(co_await make_rack_list_colocation_plan(plan));
+        }
+
        // Merge table-wide resize decisions, may emit new decisions, revoke or finalize ongoing ones.
        // Note : Resize plans should be generated before repair plans to avoid scheduling repairs when there is pending resize finalization
        plan.merge_resize_plan(co_await make_resize_plan(plan));
@@ -789,8 +951,8 @@ public:
        }

        auto level = plan.size() > 0 ? seastar::log_level::info : seastar::log_level::debug;
-        lblogger.log(level, "Prepared {} migration plans, out of which there were {} tablet migration(s) and {} resize decision(s) and {} tablet repair(s)",
-                plan.size(), plan.tablet_migration_count(), plan.resize_decision_count(), plan.tablet_repair_count());
+        lblogger.log(level, "Prepared {} migration plans, out of which there were {} tablet migration(s) and {} resize decision(s) and {} tablet repair(s) and {} rack-list colocation(s)",
+                plan.size(), plan.tablet_migration_count(), plan.resize_decision_count(), plan.tablet_repair_count(), plan.tablet_rack_list_colocation_count());
        co_return std::move(plan);
    }

@@ -815,6 +977,58 @@ public:
        co_return false;
    }

+    void ensure_node(node_load_map& nodes, host_id host) {
+        if (nodes.contains(host)) {
+            return;
+        }
+        const locator::topology& topo = _tm->get_topology();
+        auto* node = topo.find_node(host);
+        if (!node) {
+            on_internal_error(lblogger, format("Node {} not found in topology", host));
+        }
+        node_load& load = nodes[host];
+        load.id = host;
+        load.node = node;
+        load.shard_count = node->get_shard_count();
+        load.shards.resize(load.shard_count);
+        if (!load.shard_count) {
+            throw std::runtime_error(format("Shard count of {} not found in topology", host));
+        }
+        if (!_db.features().tablet_load_stats_v2) {
+            // This way load calculation will hold tablet count.
+            load.capacity = _target_tablet_size * load.shard_count;
+        } else if (_table_load_stats && _table_load_stats->capacity.contains(host)) {
+            load.capacity = _table_load_stats->capacity.at(host);
+        }
+    }
+
+    future<> consider_scheduled_load(node_load_map& nodes) {
+        const locator::topology& topo = _tm->get_topology();
+        for (auto&& [table, tables] : _tm->tablets().all_table_groups()) {
+            const auto& tmap = _tm->tablets().get_tablet_map(table);
+            for (auto&& [tid, trinfo]: tmap.transitions()) {
+                co_await coroutine::maybe_yield();
+                if (is_streaming(&trinfo)) {
+                    auto& tinfo = tmap.get_tablet_info(tid);
+                    apply_load(nodes, get_migration_streaming_info(topo, tinfo, trinfo));
+                }
+            }
+        }
+    }
+
+    future<> consider_planned_load(node_load_map& nodes, const migration_plan& mplan) {
+        const locator::topology& topo = _tm->get_topology();
+        auto& tablet_meta = _tm->tablets();
+
+        for (const tablet_migration_info& tmi : mplan.migrations()) {
+            co_await coroutine::maybe_yield();
+            auto& tmap = tablet_meta.get_tablet_map(tmi.tablet.table);
+            auto& tinfo = tmap.get_tablet_info(tmi.tablet.tablet);
+            auto streaming_info = get_migration_streaming_info(topo, tinfo, tmi);
+            apply_load(nodes, streaming_info);
+        }
+    }
+
    future<tablet_repair_plan> make_repair_plan(const migration_plan& mplan) {
        lblogger.debug("In make_repair_plan");

@@ -830,53 +1044,19 @@ public:
        // Populate the load of the migration that is already in the plan
        node_load_map nodes;
        // TODO: share code with make_plan()
-        auto ensure_node = [&] (host_id host) {
-            if (nodes.contains(host)) {
-                return;
-            }
-            auto* node = topo.find_node(host);
-            if (!node) {
-                on_internal_error(lblogger, format("Node {} not found in topology", host));
-            }
-            node_load& load = nodes[host];
-            load.id = host;
-            load.node = node;
-            load.shard_count = node->get_shard_count();
-            load.shards.resize(load.shard_count);
-            if (!load.shard_count) {
-                throw std::runtime_error(format("Shard count of {} not found in topology", host));
-            }
-        };
-        // TODO: share code with make_plan()
        topo.for_each_node([&] (const locator::node& node) {
            bool is_drained = node.get_state() == locator::node::state::being_decommissioned
                              || node.get_state() == locator::node::state::being_removed;
            if (node.get_state() == locator::node::state::normal || is_drained) {
-                ensure_node(node.host_id());
+                ensure_node(nodes, node.host_id());
            }
        });

        // Consider load that is already scheduled
-        for (auto&& [table, tables] : _tm->tablets().all_table_groups()) {
-            const auto& tmap = _tm->tablets().get_tablet_map(table);
-            for (auto&& [tid, trinfo]: tmap.transitions()) {
-                co_await coroutine::maybe_yield();
-                if (is_streaming(&trinfo)) {
-                    auto& tinfo = tmap.get_tablet_info(tid);
-                    apply_load(nodes, get_migration_streaming_info(topo, tinfo, trinfo));
-                }
-            }
-        }
+        co_await consider_scheduled_load(nodes);

        // Consider load that is about to be scheduled
-        auto& tablet_meta = _tm->tablets();
-        for (const tablet_migration_info& tmi : mplan.migrations()) {
-            co_await coroutine::maybe_yield();
-            auto& tmap = tablet_meta.get_tablet_map(tmi.tablet.table);
-            auto& tinfo = tmap.get_tablet_info(tmi.tablet.tablet);
-            auto streaming_info = get_migration_streaming_info(topo, tinfo, tmi);
-            apply_load(nodes, streaming_info);
-        }
+        co_await consider_planned_load(nodes, mplan);

        struct repair_plan {
            locator::global_tablet_id gid;
@@ -959,6 +1139,109 @@ public:
        co_return ret;
    }

+    future<migration_plan> make_rack_list_colocation_plan(const migration_plan& mplan) {
+        lblogger.debug("In make_rack_list_colocation_plan");
+
+        migration_plan plan;
+        tablet_rack_list_colocation_plan rack_list_plan;
+        if (!ongoing_rack_list_colocation()) {
+            co_return plan;
+        }
+
+        const locator::topology& topo = _tm->get_topology();
+
+        auto migration_tablet_ids = co_await mplan.get_migration_tablet_ids();
+        auto colocation_state = co_await find_required_rack_list_colocations(_db, _tm, _sys_ks,
+            _topology->paused_rf_change_requests, std::move(migration_tablet_ids));
+
+        node_load_map nodes;
+        topo.for_each_node([&] (const locator::node& node) {
+            if (node.get_state() == locator::node::state::normal && !node.is_excluded()) {
+                ensure_node(nodes, node.host_id());
+            }
+        });
+
+        // Consider load that is already scheduled.
+        co_await consider_scheduled_load(nodes);
+
+        // Consider load that is about to be scheduled.
+        co_await consider_planned_load(nodes, mplan);
+
+        std::unordered_set<global_tablet_id> colocation_tablet_ids;
+        for (auto& [dc_rack, colocation_sources] : colocation_state.dst_dc_rack_to_tablets) {
+            auto nodes_by_load_dst = nodes | std::views::filter([&] (const auto& host_load) {
+                auto& [host, load] = host_load;
+                auto& node = *load.node;
+                return node.dc_rack() == dc_rack;
+            }) | std::views::keys | std::ranges::to<std::vector<host_id>>();
+
+            if (nodes_by_load_dst.empty()) {
+                lblogger.warn("No target nodes available for RF change colocation plan in dc {}, rack {}", dc_rack.dc, dc_rack.rack);
+                if (auto it = colocation_state.dst_to_requests.find(dc_rack); it != colocation_state.dst_to_requests.end()) {
+                    rack_list_plan.maybe_add_request_to_resume(*it->second.begin());
+                }
+                continue;
+            }
+
+            auto nodes_cmp = nodes_by_load_cmp(nodes);
+            auto nodes_dst_cmp = [&] (const host_id& a, const host_id& b) {
+                return nodes_cmp(b, a);
+            };
+
+            // Ascending load heap of candidate target nodes.
+            std::make_heap(nodes_by_load_dst.begin(), nodes_by_load_dst.end(), nodes_dst_cmp);
+
+            const tablet_metadata& tmeta = _tm->tablets();
+            for (colocation_source& source : colocation_sources) {
+                if (colocation_tablet_ids.contains(source.gid)) {
+                    lblogger.debug("Skipped colocation of replica {} of tablet={}, another replica of which is about to be colocated", source.replica, source.gid);
+                    continue;
+                }
+
+                // Pick the least loaded node as target.
+                std::pop_heap(nodes_by_load_dst.begin(), nodes_by_load_dst.end(), nodes_dst_cmp);
+                auto target = nodes_by_load_dst.back();
+                auto& target_info = nodes[target];
+                auto push_back_target_node = seastar::defer([&] {
+                    std::push_heap(nodes_by_load_dst.begin(), nodes_by_load_dst.end(), nodes_dst_cmp);
+                });
+
+                lblogger.debug("target node: {}, avg_load={}", target, target_info.avg_load);
+
+                auto dst = global_shard_id {target, _load_sketch->get_least_loaded_shard(target)};
+
+                lblogger.trace("target shard: {}, tablets={}, load={}", dst.shard,
+                            target_info.shards[dst.shard].tablet_count,
+                            target_info.shard_load(dst.shard, _target_tablet_size));
+
+                tablet_transition_kind kind = tablet_transition_kind::migration;
+                migration_tablet_set source_tablets {
+                    .tablet_s = source.gid,     // Ignore the merge co-location.
+                };
+                auto src = source.replica;
+                auto mig = get_migration_info(source_tablets, kind, src, dst);
+                auto& tmap = tmeta.get_tablet_map(source_tablets.table());
+                auto mig_streaming_info = get_migration_streaming_infos(topo, tmap, mig);
+                pick(*_load_sketch, dst.host, dst.shard, source_tablets);
+                if (can_accept_load(nodes, mig_streaming_info)) {
+                    apply_load(nodes, mig_streaming_info);
+                    lblogger.debug("Adding migration: {}", mig);
+                    mark_as_scheduled(mig);
+                    for (auto& m : mig) {
+                        plan.add(std::move(m));
+                        colocation_tablet_ids.insert(m.tablet);
+                    }
+                }
+                update_node_load_on_migration(nodes, src, dst, source_tablets);
+            }
+        }
+        if (colocation_state.request_to_resume) {
+            rack_list_plan.maybe_add_request_to_resume(colocation_state.request_to_resume);
+        }
+        plan.set_rack_list_colocation_plan(std::move(rack_list_plan));
+        co_return std::move(plan);
+    }
+
    // Returns true if a table has replicas of all its sibling tablets co-located.
    // This is used for determining whether merge can be finalized, since co-location
    // is a strict requirement for sibling tablets to be merged.
@@ -2967,30 +3250,6 @@ public:
        node_load_map nodes;
        std::unordered_set<host_id> nodes_to_drain;

-        auto ensure_node = [&] (host_id host) {
-            if (nodes.contains(host)) {
-                return;
-            }
-            auto* node = topo.find_node(host);
-            if (!node) {
-                on_internal_error(lblogger, format("Node {} not found in topology", host));
-            }
-            node_load& load = nodes[host];
-            load.id = host;
-            load.node = node;
-            load.shard_count = node->get_shard_count();
-            load.shards.resize(load.shard_count);
-            if (!load.shard_count) {
-                throw std::runtime_error(format("Shard count of {} not found in topology", host));
-            }
-            if (!_db.features().tablet_load_stats_v2) {
-                // This way load calculation will hold tablet count.
-                load.capacity = _target_tablet_size * load.shard_count;
-            } else if (_table_load_stats && _table_load_stats->capacity.contains(host)) {
-                load.capacity = _table_load_stats->capacity.at(host);
-            }
-        };
-
        _tm->for_each_token_owner([&] (const locator::node& node) {
            if (!node_filter(node)) {
                return;
@@ -2999,7 +3258,7 @@ public:
                              || node.get_state() == locator::node::state::being_removed;
            if (node.get_state() == locator::node::state::normal || is_drained) {
                if (is_drained) {
-                    ensure_node(node.host_id());
+                    ensure_node(nodes, node.host_id());
                    lblogger.info("Will drain node {} ({}) from DC {}", node.host_id(), node.get_state(), dc);
                    nodes_to_drain.emplace(node.host_id());
                    nodes[node.host_id()].drained = true;
@@ -3007,7 +3266,7 @@ public:
                    // Excluded nodes should not be chosen as targets for migration.
                    lblogger.debug("Ignoring excluded node {}: state={}", node.host_id(), node.get_state());
                } else {
-                    ensure_node(node.host_id());
+                    ensure_node(nodes, node.host_id());
                }
            }
        });
@@ -3040,7 +3299,7 @@ public:
                                                           r, global_tablet_id{table, tid}));
                    }
                    if (node->left() && node_filter(*node)) {
-                        ensure_node(r.host);
+                        ensure_node(nodes, r.host);
                        nodes_to_drain.insert(r.host);
                        nodes[r.host].drained = true;
                    }
@@ -3242,7 +3501,7 @@ public:
            plan.merge(co_await make_intranode_plan(nodes, nodes_to_drain));
        }

-        if (_tm->tablets().balancing_enabled() && plan.empty()) {
+        if (_tm->tablets().balancing_enabled() && plan.empty() && !ongoing_rack_list_colocation()) {
            auto dc_merge_plan = co_await make_merge_colocation_plan(dc, nodes);
            auto level = dc_merge_plan.tablet_migration_count() > 0 ? seastar::log_level::info : seastar::log_level::debug;
            lblogger.log(level, "Prepared {} migrations for co-locating sibling tablets in DC {}", dc_merge_plan.tablet_migration_count(), dc);
@@ -3264,9 +3523,11 @@ class tablet_allocator_impl : public tablet_allocator::impl
    locator::load_stats_ptr _load_stats;
 private:
    load_balancer make_load_balancer(token_metadata_ptr tm,
+            service::topology* topology,
+            db::system_keyspace* sys_ks,
            locator::load_stats_ptr table_load_stats,
            std::unordered_set<host_id> skiplist) {
-        load_balancer lb(_db, tm, std::move(table_load_stats), _load_balancer_stats,
+        load_balancer lb(_db, tm, topology, sys_ks, std::move(table_load_stats), _load_balancer_stats,
            _db.get_config().target_tablet_size_in_bytes(),
            _db.get_config().tablets_per_shard_goal(),
            std::move(skiplist));
@@ -3293,8 +3554,8 @@ public:
        _stopped = true;
    }

-    future<migration_plan> balance_tablets(token_metadata_ptr tm, locator::load_stats_ptr table_load_stats, std::unordered_set<host_id> skiplist) {
-        auto lb = make_load_balancer(tm, table_load_stats ? table_load_stats : _load_stats, std::move(skiplist));
+    future<migration_plan> balance_tablets(token_metadata_ptr tm, service::topology* topology, db::system_keyspace* sys_ks, locator::load_stats_ptr table_load_stats, std::unordered_set<host_id> skiplist) {
+        auto lb = make_load_balancer(tm, topology, sys_ks, table_load_stats ? table_load_stats : _load_stats, std::move(skiplist));
        co_await coroutine::switch_to(_db.get_streaming_scheduling_group());
        co_return co_await lb.make_plan();
    }
@@ -3314,7 +3575,7 @@ public:
    // Allocates new tablets for a table which is not co-located with another table.
    tablet_map allocate_tablets_for_new_base_table(const tablet_aware_replication_strategy* tablet_rs, const schema& s) {
        auto tm = _db.get_shared_token_metadata().get();
-        auto lb = make_load_balancer(tm, nullptr, {});
+        auto lb = make_load_balancer(tm, nullptr, nullptr, nullptr, {});
        auto plan = lb.make_sizing_plan(s.shared_from_this(), tablet_rs).get();
        auto& table_plan = plan.tables[s.id()];
        if (table_plan.target_tablet_count_aligned != table_plan.target_tablet_count) {
@@ -3328,6 +3589,7 @@ public:

    // Allocate tablets for multiple new tables, which may be co-located with each other, or co-located with an existing base table.
    void allocate_tablets_for_new_tables(const keyspace_metadata& ksm, const std::vector<schema_ptr>& cfms, utils::chunked_vector<mutation>& muts, api::timestamp_type ts) {
+        utils::get_local_injector().inject("pause_in_allocate_tablets_for_new_table", utils::wait_for_message(std::chrono::minutes(5))).get();
        locator::replication_strategy_params params(ksm.strategy_options(), ksm.initial_tablets(), ksm.consistency_option());
        auto tm = _db.get_shared_token_metadata().get();
        auto rs = abstract_replication_strategy::create_replication_strategy(ksm.strategy_name(), params, tm->get_topology());
@@ -3369,7 +3631,7 @@ public:
                        if (s.id() != base_id) {
                            lblogger.debug("Creating tablets for {}.{} id={} with base={}", s.ks_name(), s.cf_name(), s.id(), base_id);
                            muts.emplace_back(colocated_tablet_map_to_mutation(s.id(), s.ks_name(), s.cf_name(), base_id, ts));
-                            _db.get_notifier().before_allocate_tablet_map(base_map, s, muts, ts);
+                            _db.get_notifier().before_allocate_tablet_map_in_notification(base_map, s, muts, ts);
                        }
                    }
                };
@@ -3385,7 +3647,7 @@ public:
                        muts.emplace_back(std::move(m));
                        return make_ready_future<>();
                    }).get();
-                    _db.get_notifier().before_allocate_tablet_map(base_map, s, muts, ts);
+                    _db.get_notifier().before_allocate_tablet_map_in_notification(base_map, s, muts, ts);

                    create_colocated_tablet_maps(base_map);
                }
@@ -3534,8 +3796,8 @@ future<> tablet_allocator::stop() {
    return impl().stop();
 }

-future<migration_plan> tablet_allocator::balance_tablets(locator::token_metadata_ptr tm, locator::load_stats_ptr load_stats, std::unordered_set<host_id> skiplist) {
-    return impl().balance_tablets(std::move(tm), std::move(load_stats), std::move(skiplist));
+future<migration_plan> tablet_allocator::balance_tablets(locator::token_metadata_ptr tm, service::topology* topology, db::system_keyspace* sys_ks, locator::load_stats_ptr load_stats, std::unordered_set<host_id> skiplist) {
+    return impl().balance_tablets(std::move(tm), topology, sys_ks, std::move(load_stats), std::move(skiplist));
 }

 void tablet_allocator::set_load_stats(locator::load_stats_ptr load_stats) {
--- a/service/tablet_allocator.hh
+++ b/service/tablet_allocator.hh
@@ -14,8 +14,14 @@
 #include "locator/token_metadata_fwd.hh"
 #include <seastar/core/metrics.hh>

+namespace db {
+class system_keyspace;
+}
+
 namespace service {

+class topology;
+
 struct load_balancer_dc_stats {
    uint64_t calls = 0;
    uint64_t migrations_produced = 0;
@@ -133,6 +139,26 @@ struct tablet_repair_plan {
    }
 };

+struct tablet_rack_list_colocation_plan {
+    utils::UUID _request_to_resume;
+
+    const utils::UUID& request_to_resume() const noexcept {
+        return _request_to_resume;
+    }
+
+    size_t size() const { return _request_to_resume ? 1 : 0; };
+
+    void merge(tablet_rack_list_colocation_plan&& other) {
+        _request_to_resume = _request_to_resume ? _request_to_resume : other._request_to_resume;
+    }
+
+    void maybe_add_request_to_resume(const utils::UUID& id) {
+        if (!_request_to_resume) {
+            _request_to_resume = id;
+        }
+    }
+};
+
 class migration_plan {
 public:
    using migrations_vector = utils::chunked_vector<tablet_migration_info>;
@@ -140,17 +166,19 @@ private:
    migrations_vector _migrations;
    table_resize_plan _resize_plan;
    tablet_repair_plan _repair_plan;
+    tablet_rack_list_colocation_plan _rack_list_colocation_plan;
    bool _has_nodes_to_drain = false;
 public:
    /// Returns true iff there are decommissioning nodes which own some tablet replicas.
    bool has_nodes_to_drain() const { return _has_nodes_to_drain; }

    const migrations_vector& migrations() const { return _migrations; }
-    bool empty() const { return _migrations.empty() && !_resize_plan.size() && !_repair_plan.size();}
-    size_t size() const { return _migrations.size() + _resize_plan.size() + _repair_plan.size(); }
+    bool empty() const { return _migrations.empty() && !_resize_plan.size() && !_repair_plan.size() && !_rack_list_colocation_plan.size(); }
+    size_t size() const { return _migrations.size() + _resize_plan.size() + _repair_plan.size() + _rack_list_colocation_plan.size(); }
    size_t tablet_migration_count() const { return _migrations.size(); }
    size_t resize_decision_count() const { return _resize_plan.size(); }
    size_t tablet_repair_count() const { return _repair_plan.size(); }
+    size_t tablet_rack_list_colocation_count() const { return _rack_list_colocation_plan.size(); }

    void add(tablet_migration_info info) {
        _migrations.emplace_back(std::move(info));
@@ -167,6 +195,7 @@ public:
        _has_nodes_to_drain |= other._has_nodes_to_drain;
        _resize_plan.merge(std::move(other._resize_plan));
        _repair_plan.merge(std::move(other._repair_plan));
+        _rack_list_colocation_plan.merge(std::move(other._rack_list_colocation_plan));
    }

    void set_has_nodes_to_drain(bool b) {
@@ -185,6 +214,12 @@ public:
        _repair_plan = std::move(repair);
    }

+    const tablet_rack_list_colocation_plan& rack_list_colocation_plan() const { return _rack_list_colocation_plan; }
+
+    void set_rack_list_colocation_plan(tablet_rack_list_colocation_plan rack_list_colocation_plan) {
+        _rack_list_colocation_plan = std::move(rack_list_colocation_plan);
+    }
+
    future<std::unordered_set<locator::global_tablet_id>> get_migration_tablet_ids() const;
 };

@@ -230,7 +265,7 @@ public:
    ///
    /// The algorithm takes care of limiting the streaming load on the system, also by taking active migrations into account.
    ///
-    future<migration_plan> balance_tablets(locator::token_metadata_ptr, locator::load_stats_ptr = {}, std::unordered_set<locator::host_id> = {});
+    future<migration_plan> balance_tablets(locator::token_metadata_ptr, service::topology*, db::system_keyspace*, locator::load_stats_ptr = {}, std::unordered_set<locator::host_id> = {});

    void set_load_stats(locator::load_stats_ptr);

@@ -246,6 +281,12 @@ public:
    void on_leadership_lost();
 };

+future<bool> requires_rack_list_colocation(
+        replica::database& db,
+        locator::token_metadata_ptr tmptr,
+        db::system_keyspace* sys_ks,
+        utils::UUID request_id);
+
 }

 template <>
--- a/service/task_manager_module.cc
+++ b/service/task_manager_module.cc
@@ -11,6 +11,7 @@
 #include "service/migration_manager.hh"
 #include "service/storage_service.hh"
 #include "service/task_manager_module.hh"
+#include "service/topology_state_machine.hh"
 #include "tasks/task_handler.hh"
 #include "tasks/virtual_task_hint.hh"
 #include <seastar/coroutine/maybe_yield.hh>
@@ -288,4 +289,116 @@ std::set<locator::host_id> task_manager_module::get_nodes() const {
    return get_task_manager().get_nodes(_ss);
 }

+namespace topo {
+
+static tasks::task_manager::task_state get_state(const db::system_keyspace::topology_requests_entry& entry) {
+    if (!entry.id) {
+        return tasks::task_manager::task_state::created;
+    } else if (!entry.done) {
+        return tasks::task_manager::task_state::running;
+    } else if (entry.error == "") {
+        return tasks::task_manager::task_state::done;
+    } else {
+        return tasks::task_manager::task_state::failed;
+    }
+}
+
+tasks::task_manager::task_group global_topology_request_virtual_task::get_group() const noexcept {
+    return tasks::task_manager::task_group::global_topology_change_group;
+}
+
+future<std::optional<tasks::virtual_task_hint>> global_topology_request_virtual_task::contains(tasks::task_id task_id) const {
+    if (!task_id.uuid().is_timestamp()) {
+        // Task id of node ops operation is always a timestamp.
+        co_return std::nullopt;
+    }
+
+    auto hint = std::make_optional<tasks::virtual_task_hint>({});
+    auto entry = co_await _ss._sys_ks.local().get_topology_request_entry_opt(task_id.uuid());
+    if (entry.has_value() && std::holds_alternative<service::global_topology_request>(entry->request_type) &&
+            std::get<service::global_topology_request>(entry->request_type) == global_topology_request::keyspace_rf_change) {
+        co_return hint;
+    }
+    co_return std::nullopt;
+}
+
+future<tasks::is_abortable> global_topology_request_virtual_task::is_abortable(tasks::virtual_task_hint) const {
+    return make_ready_future<tasks::is_abortable>(tasks::is_abortable::yes);
+}
+
+static tasks::task_stats get_task_stats(const db::system_keyspace::topology_requests_entry& entry) {
+    return tasks::task_stats{
+        .task_id = tasks::task_id{entry.id},
+        .type = fmt::to_string(entry.request_type),
+        .kind = tasks::task_kind::cluster,
+        .scope = "keyspace",
+        .state = get_state(entry),
+        .sequence_number = 0,
+        .keyspace = entry.new_keyspace_rf_change_ks_name.value_or(""),
+        .table = "",
+        .entity = "",
+        .shard = 0,
+        .start_time = entry.start_time,
+        .end_time = entry.end_time,
+    };
+}
+
+future<std::optional<tasks::task_status>> global_topology_request_virtual_task::get_status(tasks::task_id id, tasks::virtual_task_hint hint) {
+    auto entry = co_await _ss._sys_ks.local().get_topology_request_entry_opt(id.uuid());
+    if (!entry.has_value()) {
+        co_return std::nullopt;
+    }
+    auto task_stats = get_task_stats(*entry);
+    co_return tasks::task_status{
+        .task_id = task_stats.task_id,
+        .type = task_stats.type,
+        .kind = task_stats.kind,
+        .scope = task_stats.scope,
+        .state = task_stats.state,
+        .is_abortable = co_await is_abortable(std::move(hint)),
+        .start_time = task_stats.start_time,
+        .end_time = task_stats.end_time,
+        .error = entry->error,
+        .parent_id = tasks::task_id::create_null_id(),
+        .sequence_number = task_stats.sequence_number,
+        .shard = task_stats.shard,
+        .keyspace = task_stats.keyspace,
+        .table = task_stats.table,
+        .entity = task_stats.entity,
+        .progress_units = "",
+        .progress = tasks::task_manager::task::progress{},
+        .children = utils::chunked_vector<tasks::task_identity>{},
+    };
+}
+
+future<std::optional<tasks::task_status>> global_topology_request_virtual_task::wait(tasks::task_id id, tasks::virtual_task_hint hint) {
+    auto entry = co_await get_status(id, hint);
+    if (!entry) {
+        co_return std::nullopt;
+    }
+
+    co_await _ss.wait_for_topology_request_completion(id.uuid(), false);
+    co_return co_await get_status(id, std::move(hint));
+}
+
+future<> global_topology_request_virtual_task::abort(tasks::task_id id, tasks::virtual_task_hint) noexcept {
+    return _ss.abort_paused_rf_change(id.uuid());
+}
+
+future<std::vector<tasks::task_stats>> global_topology_request_virtual_task::get_stats() {
+    db::system_keyspace& sys_ks = _ss._sys_ks.local();
+    co_return std::ranges::to<std::vector<tasks::task_stats>>(co_await sys_ks.get_topology_request_entries({global_topology_request::keyspace_rf_change}, db_clock::now() - get_task_manager().get_user_task_ttl())
+            | std::views::transform([] (const auto& e) {
+        auto& entry = e.second;
+        return get_task_stats(entry);
+    }));
+}
+
+task_manager_module::task_manager_module(tasks::task_manager& tm) noexcept
+    : tasks::task_manager::module(tm, "global_topology_requests")
+{}
+
+}
+
+
 }
--- a/service/task_manager_module.hh
+++ b/service/task_manager_module.hh
@@ -54,4 +54,33 @@ public:

    std::set<locator::host_id> get_nodes() const override;
 };
+
+namespace topo {
+
+class global_topology_request_virtual_task : public tasks::task_manager::virtual_task::impl {
+private:
+    service::storage_service& _ss;
+public:
+    global_topology_request_virtual_task(tasks::task_manager::module_ptr module,
+            service::storage_service& ss)
+        : tasks::task_manager::virtual_task::impl(std::move(module))
+        , _ss(ss)
+    {}
+    virtual tasks::task_manager::task_group get_group() const noexcept override;
+    virtual future<std::optional<tasks::virtual_task_hint>> contains(tasks::task_id task_id) const override;
+    virtual future<tasks::is_abortable> is_abortable(tasks::virtual_task_hint hint) const override;
+
+    virtual future<std::optional<tasks::task_status>> get_status(tasks::task_id id, tasks::virtual_task_hint hint) override;
+    virtual future<std::optional<tasks::task_status>> wait(tasks::task_id id, tasks::virtual_task_hint hint) override;
+    virtual future<> abort(tasks::task_id id, tasks::virtual_task_hint hint) noexcept override;
+    virtual future<std::vector<tasks::task_stats>> get_stats() override;
+};
+
+class task_manager_module : public tasks::task_manager::module {
+public:
+    task_manager_module(tasks::task_manager& tm) noexcept;
+};
+
+}
+
 }
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -6,6 +6,7 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
 */

+#include <algorithm>
 #include <chrono>
 #include <fmt/ranges.h>

@@ -54,6 +55,7 @@
 #include "service/topology_state_machine.hh"
 #include "db/view/view_building_coordinator.hh"
 #include "topology_mutation.hh"
+#include "utils/UUID.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
 #include "utils/stall_free.hh"
@@ -953,7 +955,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
        } else {
            assert(_feature_service.topology_global_request_queue);
            req_id = _topo_sm._topology.global_requests_queue[0];
-            req_entry = co_await _sys_ks.get_topology_request_entry(req_id, true);
+            req_entry = co_await _sys_ks.get_topology_request_entry(req_id);
            req = std::get<global_topology_request>(req_entry.request_type);
        }
        switch (req) {
@@ -997,6 +999,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {

            utils::chunked_vector<canonical_mutation> updates;
            sstring error;
+            bool needs_colocation = false;
            if (_db.has_keyspace(ks_name)) {
                try {
                    auto& ks = _db.find_keyspace(ks_name);
@@ -1004,12 +1007,40 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                    cql3::statements::ks_prop_defs new_ks_props{std::map<sstring, sstring>{saved_ks_props.begin(), saved_ks_props.end()}};
                    new_ks_props.validate();
                    auto ks_md = new_ks_props.as_ks_metadata_update(ks.metadata(), *tmptr, _db.features(), _db.get_config());
+                    _db.validate_keyspace_update(*ks_md);
                    size_t unimportant_init_tablet_count = 2; // must be a power of 2
                    locator::tablet_map new_tablet_map{unimportant_init_tablet_count};

+                  auto schedule_migrations = [&] () -> future<> {
                    auto tables_with_mvs = ks.metadata()->tables();
                    auto views = ks.metadata()->views();
                    tables_with_mvs.insert(tables_with_mvs.end(), views.begin(), views.end());
+                    if (tables_with_mvs.empty()) {
+                        co_return;
+                    }
+                    auto table = tables_with_mvs.front();
+                    auto tablet_count = tmptr->tablets().get_tablet_map(table->id()).tablet_count();
+                    locator::replication_strategy_params params{ks_md->strategy_options(), tablet_count, ks.metadata()->consistency_option()};
+                    auto new_strategy = locator::abstract_replication_strategy::create_replication_strategy("NetworkTopologyStrategy", params, tmptr->get_topology());
+
+                    auto check_needs_colocation = [&] () -> future<bool> {
+                        const auto& new_replication_strategy_config = new_strategy->get_config_options();
+                        const auto& old_replication_strategy_config = ks.metadata()->strategy_options();
+                        bool rack_list_conversion = false;
+                        for (const auto& [dc, rf_value] : new_replication_strategy_config) {
+                            if (std::holds_alternative<locator::rack_list>(rf_value)) {
+                                auto it = old_replication_strategy_config.find(dc);
+                                if (it != old_replication_strategy_config.end() && std::holds_alternative<sstring>(it->second)) {
+                                    rack_list_conversion = true;
+                                    break;
+                                }
+                            }
+                        }
+                        co_return rack_list_conversion ? co_await requires_rack_list_colocation(_db, tmptr, &_sys_ks, req_id) : false;
+                    };
+                    if (needs_colocation = co_await check_needs_colocation(); needs_colocation) {
+                        co_return;
+                    }
                    for (const auto& table_or_mv : tables_with_mvs) {
                        if (!tmptr->tablets().is_base_table(table_or_mv->id())) {
                            // Apply the transition only on base tables.
@@ -1018,8 +1049,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                            continue;
                        }
                        auto old_tablets = co_await tmptr->tablets().get_tablet_map(table_or_mv->id()).clone_gently();
-                        locator::replication_strategy_params params{ks_md->strategy_options(), old_tablets.tablet_count(), ks.metadata()->consistency_option()};
-                        auto new_strategy = locator::abstract_replication_strategy::create_replication_strategy("NetworkTopologyStrategy", params, tmptr->get_topology());
                        new_tablet_map = co_await new_strategy->maybe_as_tablet_aware()->reallocate_tablets(table_or_mv, tmptr, co_await old_tablets.clone_gently());

                        replica::tablet_mutation_builder tablet_mutation_builder(guard.write_timestamp(), table_or_mv->id());
@@ -1046,6 +1075,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                            co_await coroutine::maybe_yield();
                        });
                    }
+                  };
+                    co_await schedule_migrations();

                    auto schema_muts = prepare_keyspace_update_announcement(_db, ks_md, guard.write_timestamp());
                    for (auto& m: schema_muts) {
@@ -1061,16 +1092,22 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                error = "Can't ALTER keyspace " + ks_name + ", keyspace doesn't exist";
            }

-            updates.push_back(canonical_mutation(topology_mutation_builder(guard.write_timestamp())
-                                                         .set_transition_state(topology::transition_state::tablet_migration)
+            bool pause_request = needs_colocation && error.empty();
+            topology_mutation_builder tbuilder(guard.write_timestamp());
+            tbuilder.set_transition_state(topology::transition_state::tablet_migration)
                                                         .set_version(_topo_sm._topology.version + 1)
                                                         .del_global_topology_request()
                                                         .del_global_topology_request_id()
-                                                         .drop_first_global_topology_request_id(_topo_sm._topology.global_requests_queue, req_id)
-                                                         .build()));
-            updates.push_back(canonical_mutation(topology_request_tracking_mutation_builder(req_id)
+                                                         .drop_first_global_topology_request_id(_topo_sm._topology.global_requests_queue, req_id);
+            if (pause_request) {
+                rtlogger.info("keyspace_rf_change for keyspace {} postponed for colocation", ks_name);
+                tbuilder.pause_rf_change_request(req_id);
+            } else {
+                updates.push_back(canonical_mutation(topology_request_tracking_mutation_builder(req_id)
                                                         .done(error)
                                                         .build()));
+            }
+            updates.push_back(canonical_mutation(tbuilder.build()));

            sstring reason = seastar::format("ALTER tablets KEYSPACE called with options: {}", saved_ks_props);
            rtlogger.trace("do update {} reason {}", updates, reason);
@@ -1334,6 +1371,14 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                    .build());
    }

+    void generate_rf_change_resume_update(utils::chunked_vector<canonical_mutation>& out, const group0_guard& guard, utils::UUID request_to_resume) {
+        rtlogger.debug("Generating RF change resume for request id {}", request_to_resume);
+        out.emplace_back(topology_mutation_builder(guard.write_timestamp())
+                .queue_global_topology_request_id(request_to_resume)
+                .resume_rf_change_request(_topo_sm._topology.paused_rf_change_requests, request_to_resume)
+                .build());
+    }
+
    future<> generate_migration_updates(utils::chunked_vector<canonical_mutation>& out, const group0_guard& guard, const migration_plan& plan) {
        if (plan.resize_plan().finalize_resize.empty() || plan.has_nodes_to_drain()) {
            // schedule tablet migration only if there are no pending resize finalisations or if the node is draining.
@@ -1341,6 +1386,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                co_await coroutine::maybe_yield();
                generate_migration_update(out, guard, mig);
            }
+
+            if (auto request_to_resume = plan.rack_list_colocation_plan().request_to_resume(); request_to_resume) {
+                generate_rf_change_resume_update(out, guard, request_to_resume);
+            }
        }

        auto sched_time = db_clock::now();
@@ -1831,7 +1880,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {

        bool has_nodes_to_drain = false;
        if (!preempt) {
-            auto plan = co_await _tablet_allocator.balance_tablets(get_token_metadata_ptr(), {}, get_dead_nodes());
+            auto plan = co_await _tablet_allocator.balance_tablets(get_token_metadata_ptr(), &_topo_sm._topology, &_sys_ks, {}, get_dead_nodes());
            has_nodes_to_drain = plan.has_nodes_to_drain();
            if (!drain || plan.has_nodes_to_drain()) {
                co_await generate_migration_updates(updates, guard, plan);
@@ -1954,7 +2003,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
        co_await utils::get_local_injector().inject("tablet_resize_finalization_post_barrier", utils::wait_for_message(std::chrono::minutes(2)));

        auto tm = get_token_metadata_ptr();
-        auto plan = co_await _tablet_allocator.balance_tablets(tm, {}, get_dead_nodes());
+        auto plan = co_await _tablet_allocator.balance_tablets(tm, &_topo_sm._topology, &_sys_ks, {}, get_dead_nodes());

        utils::chunked_vector<canonical_mutation> updates;
        updates.reserve(plan.resize_plan().finalize_resize.size() * 2 + 1);
@@ -2034,7 +2083,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
        // We should perform TRUNCATE only if the session is still valid. It could be cleared if a previous truncate
        // handler performed the truncate and cleared the session, but crashed before finalizing the request
        if (_topo_sm._topology.session) {
-            const auto topology_requests_entry = co_await _sys_ks.get_topology_request_entry(global_request_id, true);
+            const auto topology_requests_entry = co_await _sys_ks.get_topology_request_entry(global_request_id);
            const table_id& table_id = topology_requests_entry.truncate_table_id;
            lw_shared_ptr<replica::table> table = _db.get_tables_metadata().get_table_if_exists(table_id);

@@ -2623,6 +2672,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                while (utils::get_local_injector().enter("topology_coordinator_pause_after_streaming")) {
                    co_await sleep_abortable(std::chrono::milliseconds(10), _as);
                }
+                const bool removenode_with_left_token_ring = _feature_service.removenode_with_left_token_ring;
                auto node = get_node_to_work_on(std::move(guard));
                bool barrier_failed = false;
                // In this state writes goes to old and new replicas but reads start to be done from new replicas
@@ -2677,7 +2727,9 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                    break;
                case node_state::removing: {
                    co_await utils::get_local_injector().inject("delay_node_removal", utils::wait_for_message(std::chrono::minutes(5)));
-                    node = retake_node(co_await remove_from_group0(std::move(node.guard), node.id), node.id);
+                    if (!removenode_with_left_token_ring) {
+                        node = retake_node(co_await remove_from_group0(std::move(node.guard), node.id), node.id);
+                    }
                }
                    [[fallthrough]];
                case node_state::decommissioning: {
@@ -2685,7 +2737,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                    node_state next_state;
                    utils::chunked_vector<canonical_mutation> muts;
                    muts.reserve(2);
-                    if (node.rs->state == node_state::decommissioning) {
+                    if (removenode_with_left_token_ring || node.rs->state == node_state::decommissioning) {
+                        // Both decommission and removenode go through left_token_ring state
+                        // to ensure a global barrier is executed before the request is marked as done.
+                        // This ensures all nodes have observed the topology change.
                        next_state = node.rs->state;
                        builder.set_transition_state(topology::transition_state::left_token_ring);
                    } else {
@@ -2760,6 +2815,16 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
            case topology::transition_state::left_token_ring: {
                auto node = get_node_to_work_on(std::move(guard));

+                // Need to be captured as the node variable might become invalid (e.g. moved out) at particular points.
+                const auto node_rs_state = node.rs->state;
+
+                const bool is_removenode = node_rs_state == node_state::removing;
+
+                if (is_removenode && !_feature_service.removenode_with_left_token_ring) {
+                    on_internal_error(
+                            rtlogger, "removenode operation can only enter the left_token_ring state when REMOVENODE_WITH_LEFT_TOKEN_RING feature is enabled");
+                }
+
                auto finish_left_token_ring_transition = [&](node_to_work_on& node) -> future<> {
                    // Remove the node from group0 here - in general, it won't be able to leave on its own
                    // because we'll ban it as soon as we tell it to shut down.
@@ -2779,9 +2844,16 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                    muts.push_back(builder.build());
                    co_await remove_view_build_statuses_on_left_node(muts, node.guard, node.id);
                    co_await db::view::view_builder::generate_mutations_on_node_left(_db, _sys_ks, node.guard.write_timestamp(), locator::host_id(node.id.uuid()), muts);
-                    auto str = node.rs->state == node_state::decommissioning
-                            ? ::format("finished decommissioning node {}", node.id)
-                            : ::format("finished rollback of {} after {} failure", node.id, node.rs->state);
+                    auto str = std::invoke([&]() {
+                        switch (node_rs_state) {
+                        case node_state::decommissioning:
+                            return ::format("finished decommissioning node {}", node.id);
+                        case node_state::removing:
+                            return ::format("finished removing node {}", node.id);
+                        default:
+                            return ::format("finished rollback of {} after {} failure", node.id, node.rs->state);
+                        }
+                    });
                    co_await update_topology_state(take_guard(std::move(node)), std::move(muts), std::move(str));
                };

@@ -2794,6 +2866,11 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                }

                if (node.id == _raft.id()) {
+                    // Removed node must be dead, so it shouldn't enter here (it can't coordinate its own removal).
+                    if (is_removenode) {
+                        on_internal_error(rtlogger, "removenode operation cannot be coordinated by the removed node itself");
+                    }
+
                    // Someone else needs to coordinate the rest of the decommission process,
                    // because the decommissioning node is going to shut down in the middle of this state.
                    rtlogger.info("coordinator is decommissioning; giving up leadership");
@@ -2807,8 +2884,13 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {

                bool barrier_failed = false;
                // Wait until other nodes observe the new token ring and stop sending writes to this node.
+                auto excluded_nodes = get_excluded_nodes_for_topology_request(node);
                try {
-                    node = retake_node(co_await global_token_metadata_barrier(std::move(node.guard), get_excluded_nodes_for_topology_request(node)), node.id);
+                    // Removed node is added to ignored nodes, so it should be automatically excluded.
+                    if (is_removenode && !excluded_nodes.contains(node.id)) {
+                        on_internal_error(rtlogger, "removenode operation must have the removed node in excluded_nodes");
+                    }
+                    node = retake_node(co_await global_token_metadata_barrier(std::move(node.guard), std::move(excluded_nodes)), node.id);
                } catch (term_changed_error&) {
                    throw;
                } catch (group0_concurrent_modification&) {
@@ -2825,15 +2907,17 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                }

                if (barrier_failed) {
-                    // If barrier above failed it means there may be unfinished writes to a decommissioned node.
+                    // If barrier above failed it means there may be unfinished writes to a decommissioned node,
+                    // or some nodes might not have observed the new topology yet (one purpose of the barrier
+                    // is to make sure all nodes observed the new topology before completing the request).
                    // Lets wait for the ring delay for those writes to complete and new topology to propagate
                    // before continuing.
                    co_await sleep_abortable(_ring_delay, _as);
                    node = retake_node(co_await start_operation(), node.id);
                }

-                // Make decommissioning node a non voter before reporting operation completion below.
-                // Otherwise the decommissioned node may see the completion and exit before it is removed from
+                // Make decommissioning/removed node a non voter before reporting operation completion below.
+                // Otherwise the node may see the completion and exit before it is removed from
                // the config at which point the removal from the config will hang if the cluster had only two
                // nodes before the decommission.
                co_await _voter_handler.on_node_removed(node.id, _as);
@@ -2844,7 +2928,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {

                co_await update_topology_state(take_guard(std::move(node)), {rtbuilder.build()}, "report request completion in left_token_ring state");

-                // Tell the node to shut down.
+                // For decommission/rollback: Tell the node to shut down.
                // This is done to improve user experience when there are no failures.
                // In the next state (`node_state::left`), the node will be banned by the rest of the cluster,
                // so there's no guarantee that it would learn about entering that state even if it was still
@@ -2853,15 +2937,19 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
                // There is the possibility that the node will never get the message
                // and decommission will hang on that node.
                // This is fine for the rest of the cluster - we will still remove, ban the node and continue.
+                //
+                // For removenode: The node is already dead, no need to send shutdown command.
                auto node_id = node.id;
                bool shutdown_failed = false;
-                try {
-                    node = co_await exec_direct_command(std::move(node), raft_topology_cmd::command::barrier);
-                } catch (...) {
-                    rtlogger.warn("failed to tell node {} to shut down - it may hang."
-                                 " It's safe to shut it down manually now. (Exception: {})",
-                                 node.id, std::current_exception());
-                    shutdown_failed = true;
+                if (!is_removenode) {
+                    try {
+                        node = co_await exec_direct_command(std::move(node), raft_topology_cmd::command::barrier);
+                    } catch (...) {
+                        rtlogger.warn("failed to tell node {} to shut down - it may hang."
+                                      " It's safe to shut it down manually now. (Exception: {})",
+                                node.id, std::current_exception());
+                        shutdown_failed = true;
+                    }
                }
                if (shutdown_failed) {
                    node = retake_node(co_await start_operation(), node_id);
@@ -3458,7 +3546,7 @@ future<bool> topology_coordinator::maybe_start_tablet_migration(group0_guard gua
    }

    auto tm = get_token_metadata_ptr();
-    auto plan = co_await _tablet_allocator.balance_tablets(tm, {}, get_dead_nodes());
+    auto plan = co_await _tablet_allocator.balance_tablets(tm, &_topo_sm._topology, &_sys_ks, {}, get_dead_nodes());
    if (plan.empty()) {
        rtlogger.debug("Tablet load balancer did not make any plan");
        co_return false;
--- a/service/topology_mutation.cc
+++ b/service/topology_mutation.cc
@@ -256,6 +256,20 @@ topology_mutation_builder& topology_mutation_builder::drop_first_global_topology
    }
 }

+topology_mutation_builder& topology_mutation_builder::pause_rf_change_request(const utils::UUID& id) {
+    return apply_set("paused_rf_change_requests", collection_apply_mode::update, std::vector<data_value>{id});
+}
+
+topology_mutation_builder& topology_mutation_builder::resume_rf_change_request(const std::unordered_set<utils::UUID>& values, const utils::UUID& id) {
+    if (values.contains(id)) {
+        auto new_values = values;
+        new_values.erase(id);
+        return apply_set("paused_rf_change_requests", collection_apply_mode::overwrite, new_values | std::views::transform([] (const auto& id) { return data_value{id}; }));
+    } else {
+        return *this;
+    }
+}
+
 topology_mutation_builder& topology_mutation_builder::set_upgrade_state(topology::upgrade_state_type value) {
    return apply_atomic("upgrade_state", ::format("{}", value));
 }
--- a/service/topology_mutation.hh
+++ b/service/topology_mutation.hh
@@ -129,6 +129,8 @@ public:
    topology_mutation_builder& del_global_topology_request_id();
    topology_mutation_builder& queue_global_topology_request_id(const utils::UUID& value);
    topology_mutation_builder& drop_first_global_topology_request_id(const std::vector<utils::UUID>&, const utils::UUID&);
+    topology_mutation_builder& pause_rf_change_request(const utils::UUID&);
+    topology_mutation_builder& resume_rf_change_request(const std::unordered_set<utils::UUID>&, const utils::UUID&);
    topology_node_mutation_builder& with_node(raft::server_id);
    canonical_mutation build() { return canonical_mutation{std::move(_m)}; }
 };
--- a/service/topology_state_machine.hh
+++ b/service/topology_state_machine.hh
@@ -180,6 +180,10 @@ struct topology {
    // The KS options to be used when executing the scheduled ALTER KS statement
    std::optional<std::unordered_map<sstring, sstring>> new_keyspace_rf_change_data;

+    // The ids of RF change requests that are paused because they require tablet co-location.
+    // It may happen during altering from numerical RF to rack list.
+    std::unordered_set<utils::UUID> paused_rf_change_requests;
+
    // The IDs of the committed yet unpublished CDC generations sorted by timestamps.
    std::vector<cdc::generation_id_v2> unpublished_cdc_generations;

--- a/sstables/component_type.hh
+++ b/sstables/component_type.hh
@@ -27,7 +27,6 @@ enum class component_type {
    TemporaryTOC,
    TemporaryStatistics,
    Scylla,
-    TemporaryScylla,
    Rows,
    Partitions,
    TemporaryHashes,
@@ -77,8 +76,6 @@ struct fmt::formatter<sstables::component_type> : fmt::formatter<string_view> {
            return formatter<string_view>::format("TemporaryStatistics", ctx);
        case Scylla:
            return formatter<string_view>::format("Scylla", ctx);
-        case TemporaryScylla:
-            return formatter<string_view>::format("TemporaryScylla", ctx);
        case Partitions:
            return formatter<string_view>::format("Partitions", ctx);
        case Rows:
--- a/sstables/mx/writer.cc
+++ b/sstables/mx/writer.cc
@@ -632,10 +632,6 @@ private:
    std::unique_ptr<file_writer> close_writer(std::unique_ptr<file_writer>& w);

    void close_data_writer();
-    void close_index_writer();
-    void close_rows_writer();
-    void close_partitions_writer();
-
    void ensure_tombstone_is_written() {
        if (!_tombstone_written) {
            consume(tombstone());
@@ -948,16 +944,17 @@ void writer::init_file_writers() {
                _sst._schema->get_compressor_params(),
                std::move(compressor)), _sst.get_filename());
    }
+
    if (_sst.has_component(component_type::Index)) {
        out = _sst._storage->make_data_or_index_sink(_sst, component_type::Index).get();
-        _index_writer = std::make_unique<crc32_digest_file_writer>(std::move(out), _sst.sstable_buffer_size, _sst.index_filename());
+        _index_writer = std::make_unique<file_writer>(output_stream<char>(std::move(out)), _sst.index_filename());
    }
    if (_sst.has_component(component_type::Partitions) && _sst.has_component(component_type::Rows)) {
        out = _sst._storage->make_data_or_index_sink(_sst, component_type::Rows).get();
-        _rows_writer = std::make_unique<crc32_digest_file_writer>(std::move(out), _sst.sstable_buffer_size, component_name(_sst, component_type::Rows));
+        _rows_writer = std::make_unique<file_writer>(output_stream<char>(std::move(out)), component_name(_sst, component_type::Rows));
        _bti_row_index_writer = trie::bti_row_index_writer(*_rows_writer);
        out = _sst._storage->make_data_or_index_sink(_sst, component_type::Partitions).get();
-        _partitions_writer = std::make_unique<crc32_digest_file_writer>(std::move(out), _sst.sstable_buffer_size, component_name(_sst, component_type::Partitions));
+        _partitions_writer = std::make_unique<file_writer>(output_stream<char>(std::move(out)), component_name(_sst, component_type::Partitions));
        _bti_partition_index_writer = trie::bti_partition_index_writer(*_partitions_writer);
    }
    if (_delayed_filter) {
@@ -985,41 +982,6 @@ void writer::close_data_writer() {
    }
 }

-void writer::close_index_writer() {
-    if (_index_writer) {
-        auto writer = close_writer(_index_writer);
-        auto chksum_wr = static_cast<crc32_digest_file_writer*>(writer.get());
-        _sst.get_components_digests().index_digest = chksum_wr->full_checksum();
-    }
-}
-
-void writer::close_partitions_writer() {
-    if (_partitions_writer) {
-        _sst._partitions_db_footer = std::move(*_bti_partition_index_writer).finish(
-            _sst.get_version(),
-            _first_key.value(),
-            _last_key.value());
-        auto writer = close_writer(_partitions_writer);
-        auto chksum_wr = static_cast<crc32_digest_file_writer*>(writer.get());
-        _sst.get_components_digests().partitions_digest = chksum_wr->full_checksum();
-    }
-}
-
-void writer::close_rows_writer() {
-    if (_rows_writer) {
-        // Append some garbage padding to the file just to ensure that it's never empty.
-        // (Otherwise it would be empty if the sstable contains only small partitions).
-        // This is a hack to work around some bad interactions between zero-sized files
-        // and object storage. (It seems that e.g. minio considers a zero-sized file
-        // upload to be a no-op, which breaks some assumptions).
-        uint32_t garbage = seastar::cpu_to_be(0x13371337);
-        _rows_writer->write(reinterpret_cast<const char*>(&garbage), sizeof(garbage));
-        auto writer = close_writer(_rows_writer);
-        auto chksum_wr = static_cast<crc32_digest_file_writer*>(writer.get());
-        _sst.get_components_digests().rows_digest = chksum_wr->full_checksum();
-    }
-}
-
 void writer::consume_new_partition(const dht::decorated_key& dk) {
    _c_stats.start_offset = _data_writer->offset();
    _prev_row_start = _data_writer->offset();
@@ -1668,10 +1630,27 @@ void writer::consume_end_of_stream() {
        _collector.add_compression_ratio(_sst._components->compression.compressed_file_length(), _sst._components->compression.uncompressed_file_length());
    }

-    close_index_writer();
+  if (_index_writer) {
+    close_writer(_index_writer);
+  }

-    close_partitions_writer();
-    close_rows_writer();
+    if (_partitions_writer) {
+        _sst._partitions_db_footer = std::move(*_bti_partition_index_writer).finish(
+            _sst.get_version(),
+            _first_key.value(),
+            _last_key.value());
+        close_writer(_partitions_writer);
+    }
+    if (_rows_writer) {
+        // Append some garbage padding to the file just to ensure that it's never empty.
+        // (Otherwise it would be empty if the sstable contains only small partitions).
+        // This is a hack to work around some bad interactions between zero-sized files
+        // and object storage. (It seems that e.g. minio considers a zero-sized file
+        // upload to be a no-op, which breaks some assumptions).
+        uint32_t garbage = seastar::cpu_to_be(0x13371337);
+        _rows_writer->write(reinterpret_cast<const char*>(&garbage), sizeof(garbage));
+        close_writer(_rows_writer);
+    }

    if (_hashes_writer) {
        close_writer(_hashes_writer);
--- a/sstables/sstable_version.cc
+++ b/sstables/sstable_version.cc
@@ -44,7 +44,6 @@ sstable_version_constants::component_map_t sstable_version_constants::create_com
        { component_type::Filter, "Filter.db" },
        { component_type::Statistics, "Statistics.db" },
        { component_type::Scylla, "Scylla.db" },
-        { component_type::TemporaryScylla, "Scylla.db.tmp" },
        { component_type::TemporaryTOC, TEMPORARY_TOC_SUFFIX },
        { component_type::TemporaryStatistics, "Statistics.db.tmp" }
    };
--- a/sstables/sstables.cc
+++ b/sstables/sstables.cc
@@ -956,22 +956,16 @@ future<file_writer> sstable::make_component_file_writer(component_type c, file_o
        });
 }

-future<std::unique_ptr<crc32_digest_file_writer>> sstable::make_digests_component_file_writer(component_type c, file_output_stream_options options, open_flags oflags) noexcept {
-    return _storage->make_component_sink(*this, c, oflags, std::move(options)).then([this, comp = component_name(*this, c)] (data_sink sink) mutable {
-        return std::make_unique<crc32_digest_file_writer>(std::move(sink), sstable_buffer_size, comp);
-    });
-}
-
 void sstable::open_sstable(const sstring& origin) {
    _origin = origin;
    generate_toc();
    _storage->open(*this);
 }

-void sstable::write_toc(std::unique_ptr<crc32_digest_file_writer> w) {
+void sstable::write_toc(file_writer w) {
    sstlog.debug("Writing TOC file {} ", toc_filename());

-    do_write_simple(*w, [&] (version_types v, file_writer& w) {
+    do_write_simple(std::move(w), [&] (version_types v, file_writer& w) {
        for (auto&& key : _recognized_components) {
            // new line character is appended to the end of each component name.
            auto value = sstable_version_constants::get_component_map(v).at(key) + "\n";
@@ -979,8 +973,6 @@ void sstable::write_toc(std::unique_ptr<crc32_digest_file_writer> w) {
            write(v, w, b);
        }
    });
-
-    _components_digests.toc_digest = w->full_checksum();
 }

 void sstable::write_crc(const checksum& c) {
@@ -997,7 +989,6 @@ void sstable::write_digest(uint32_t full_checksum) {
        auto digest = to_sstring<bytes>(full_checksum);
        write(v, w, digest);
    }, buffer_size);
-    _components_digests.data_digest = full_checksum;
 }

 thread_local std::array<std::vector<int>, downsampling::BASE_SAMPLING_LEVEL> downsampling::_sample_pattern_cache;
@@ -1054,7 +1045,7 @@ future<> sstable::read_simple(T& component) {
    });
 }

-void sstable::do_write_simple(file_writer& writer,
+void sstable::do_write_simple(file_writer&& writer,
                              noncopyable_function<void (version_types, file_writer&)> write_component) {
    write_component(_version, writer);
    _metadata_size_on_disk += writer.offset();
@@ -1069,7 +1060,7 @@ void sstable::do_write_simple(component_type type,
    file_output_stream_options options;
    options.buffer_size = buffer_size;
    auto w = make_component_file_writer(type, std::move(options)).get();
-    do_write_simple(w, std::move(write_component));
+    do_write_simple(std::move(w), std::move(write_component));
 }

 template <component_type Type, typename T>
@@ -1079,30 +1070,10 @@ void sstable::write_simple(const T& component) {
    }, sstable_buffer_size);
 }

-uint32_t sstable::do_write_simple_with_digest(component_type type,
-        noncopyable_function<void (version_types version, file_writer& writer)> write_component, unsigned buffer_size) {
-    auto file_path = filename(type);
-    sstlog.debug("Writing {} file {}", sstable_version_constants::get_component_map(_version).at(type), file_path);
-
-    file_output_stream_options options;
-    options.buffer_size = buffer_size;
-    auto w = make_digests_component_file_writer(type, std::move(options)).get();
-    do_write_simple(*w, std::move(write_component));
-    return w->full_checksum();
-}
-
-template <component_type Type, typename T>
-uint32_t sstable::write_simple_with_digest(const T& component) {
-    return do_write_simple_with_digest(Type, [&component] (version_types v, file_writer& w) {
-        write(v, w, component);
-    }, sstable_buffer_size);
-}
-
 template future<> sstable::read_simple<component_type::Filter>(sstables::filter& f);
 template void sstable::write_simple<component_type::Filter>(const sstables::filter& f);

 template void sstable::write_simple<component_type::Summary>(const sstables::summary_ka&);
-template uint32_t sstable::write_simple_with_digest<component_type::Summary>(const sstables::summary_ka&);

 future<> sstable::read_compression() {
     // FIXME: If there is no compression, we should expect a CRC file to be present.
@@ -1121,8 +1092,7 @@ void sstable::write_compression() {
        return;
    }

-    uint32_t digest = write_simple_with_digest<component_type::CompressionInfo>(_components->compression);
-    _components_digests.compression_digest = digest;
+    write_simple<component_type::CompressionInfo>(_components->compression);
 }

 void sstable::validate_partitioner() {
@@ -1347,8 +1317,7 @@ future<> sstable::read_partitions_db_footer() {
 }

 void sstable::write_statistics() {
-    auto digest = write_simple_with_digest<component_type::Statistics>(_components->statistics);
-    _components_digests.statistics_digest = digest;
+    write_simple<component_type::Statistics>(_components->statistics);
 }

 void sstable::mark_as_being_repaired(const service::session_id& id) {
@@ -1371,25 +1340,13 @@ int64_t sstable::update_repaired_at(int64_t repaired_at) {
 void sstable::rewrite_statistics() {
    sstlog.debug("Rewriting statistics component of sstable {}", get_filename());

+    auto lock = get_units(_mutate_sem, 1).get();
    file_output_stream_options options;
    options.buffer_size = sstable_buffer_size;
-    auto w = make_digests_component_file_writer(component_type::TemporaryStatistics, std::move(options),
+    auto w = make_component_file_writer(component_type::TemporaryStatistics, std::move(options),
            open_flags::wo | open_flags::create | open_flags::truncate).get();
-    write(_version, *w, _components->statistics);
-    w->close();
-
-    // When rewriting statistics, we also need to update the scylla component
-    // because it contains the digest of the statistics component.
-    if (has_scylla_component()) {
-        _components_digests.statistics_digest = w->full_checksum();
-        _components->scylla_metadata->data.set<scylla_metadata_type::ComponentsDigests>(components_digests{_components_digests});
-        sstlog.debug("Rewriting scylla component of sstable {}", get_filename());
-        write_simple<component_type::TemporaryScylla>(*_components->scylla_metadata);
-
-        // rename() guarantees atomicity when renaming a file into place.
-        sstable_write_io_check(rename_file, fmt::to_string(filename(component_type::TemporaryScylla)), fmt::to_string(filename(component_type::Scylla))).get();
-    }
-
+    write(_version, w, _components->statistics);
+    w.close();
    // rename() guarantees atomicity when renaming a file into place.
    sstable_write_io_check(rename_file, fmt::to_string(filename(component_type::TemporaryStatistics)), fmt::to_string(filename(component_type::Statistics))).get();
 }
@@ -1583,8 +1540,7 @@ void sstable::write_filter() {

    auto&& bs = f->bits();
    auto filter_ref = sstables::filter_ref(f->num_hashes(), bs.get_storage());
-    uint32_t digest = write_simple_with_digest<component_type::Filter>(filter_ref);
-    _components_digests.filter_digest = digest;
+    write_simple<component_type::Filter>(filter_ref);
 }

 void sstable::maybe_rebuild_filter_from_index(uint64_t num_partitions) {
@@ -2043,8 +1999,6 @@ sstable::read_scylla_metadata() noexcept {
        }
        return read_simple<component_type::Scylla>(*_components->scylla_metadata).then([this] {
            _features = _components->scylla_metadata->get_features();
-            _components_digests = _components->scylla_metadata->get_components_digests();
-            _components->digest = _components_digests.data_digest;
        });
    });
 }
@@ -2134,7 +2088,6 @@ sstable::write_scylla_metadata(shard_id shard, struct run_identifier identifier,
        sstable_schema.columns.elements.push_back(sstable_column_description{to_sstable_column_kind(col.kind), {col.name()}, {to_bytes(col.type->name())}});
    }
    _components->scylla_metadata->data.set<scylla_metadata_type::Schema>(std::move(sstable_schema));
-    _components->scylla_metadata->data.set<scylla_metadata_type::ComponentsDigests>(components_digests(_components_digests));

    write_simple<component_type::Scylla>(*_components->scylla_metadata);
 }
@@ -2536,15 +2489,19 @@ std::vector<std::pair<component_type, sstring>> sstable::all_components() const
 }

 future<> sstable::snapshot(const sstring& dir) const {
-    return _storage->snapshot(*this, dir, storage::absolute_path::yes);
+    auto lock = co_await get_units(_mutate_sem, 1);
+    co_await _storage->snapshot(*this, dir, storage::absolute_path::yes);
 }

 future<> sstable::change_state(sstable_state to, delayed_commit_changes* delay_commit) {
+    auto lock = co_await get_units(_mutate_sem, 1);
    co_await _storage->change_state(*this, to, _generation, delay_commit);
    _state = to;
 }

 future<> sstable::pick_up_from_upload(sstable_state to, generation_type new_generation) {
+    // just in case, not really needed as the sstable is not yet in use while in the upload dir
+    auto lock = co_await get_units(_mutate_sem, 1);
    co_await _storage->change_state(*this, to, new_generation, nullptr);
    _generation = std::move(new_generation);
    _state = to;
@@ -3118,31 +3075,6 @@ void sstable::set_sstable_level(uint32_t new_level) {
    s.sstable_level = new_level;
 }

-std::optional<uint32_t> sstable::get_component_digest(component_type c) const {
-    switch (c) {
-    case component_type::Index:
-        return _components_digests.index_digest;
-    case component_type::Summary:
-        return _components_digests.summary_digest;
-    case component_type::TOC:
-        return _components_digests.toc_digest;
-    case component_type::CompressionInfo:
-        return _components_digests.compression_digest;
-    case component_type::Filter:
-        return _components_digests.filter_digest;
-    case component_type::Partitions:
-        return _components_digests.partitions_digest;
-    case component_type::Rows:
-        return _components_digests.rows_digest;
-    case component_type::Data:
-        return _components_digests.data_digest;
-    case component_type::Statistics:
-        return _components_digests.statistics_digest;
-    default:
-        return std::nullopt;
-    }
-}
-
 future<> sstable::mutate_sstable_level(uint32_t new_level) {
    if (!has_component(component_type::Statistics)) {
        return make_ready_future<>();
@@ -3479,6 +3411,9 @@ utils::hashed_key sstable::make_hashed_key(const schema& s, const partition_key&

 future<>
 sstable::unlink(storage::sync_dir sync) noexcept {
+    // Serialize with other calls to unlink or potentially ongoing mutations.
+    auto lock = co_await get_units(_mutate_sem, 1);
+
    _unlinked = true;
    _on_delete(*this);

--- a/sstables/sstables.hh
+++ b/sstables/sstables.hh
@@ -9,7 +9,6 @@

 #pragma once

-#include "sstables/writer.hh"
 #include "version.hh"
 #include "shared_sstable.hh"
 #include "open_info.hh"
@@ -629,7 +628,9 @@ private:
    size_t _total_memory_reclaimed{0};
    bool _unlinked{false};

-    components_digests _components_digests;
+    // The mutate semaphore is used to serialize operations like rewrite_statistics
+    // with linking or moving the sstable between directories.
+    mutable named_semaphore _mutate_sem{1, named_semaphore_exception_factory{"sstable mutate"}};
 public:
    bool has_component(component_type f) const;
    sstables_manager& manager() { return _manager; }
@@ -650,18 +651,12 @@ private:

    template <component_type Type, typename T>
    void write_simple(const T& comp);
-    void do_write_simple(file_writer& writer,
+    void do_write_simple(file_writer&& writer,
                         noncopyable_function<void (version_types, file_writer&)> write_component);
    void do_write_simple(component_type type,
            noncopyable_function<void (version_types version, file_writer& writer)> write_component,
            unsigned buffer_size);

-    template <component_type Type, typename T>
-    uint32_t write_simple_with_digest(const T& comp);
-    uint32_t do_write_simple_with_digest(component_type type,
-            noncopyable_function<void (version_types version, file_writer& writer)> write_component,
-            unsigned buffer_size);
-
    void write_crc(const checksum& c);
    void write_digest(uint32_t full_checksum);

@@ -672,9 +667,6 @@ private:
    future<file_writer> make_component_file_writer(component_type c, file_output_stream_options options,
            open_flags oflags = open_flags::wo | open_flags::create | open_flags::exclusive) noexcept;

-    future<std::unique_ptr<crc32_digest_file_writer>> make_digests_component_file_writer(component_type c, file_output_stream_options options,
-        open_flags oflags = open_flags::wo | open_flags::create | open_flags::exclusive) noexcept;
-
    void generate_toc();
    void open_sstable(const sstring& origin);

@@ -705,8 +697,7 @@ private:
    future<> read_summary() noexcept;

    void write_summary() {
-        uint32_t digest = write_simple_with_digest<component_type::Summary>(_components->summary);
-        _components_digests.summary_digest = digest;
+        write_simple<component_type::Summary>(_components->summary);
    }

    // To be called when we try to load an SSTable that lacks a Summary. Could
@@ -836,7 +827,7 @@ private:

    future<> open_or_create_data(open_flags oflags, file_open_options options = {}) noexcept;
    // runs in async context (called from storage::open)
-    void write_toc(std::unique_ptr<crc32_digest_file_writer> w);
+    void write_toc(file_writer w);
    static future<uint32_t> read_digest_from_file(file f);
    static future<lw_shared_ptr<checksum>> read_checksum_from_file(file f);
 public:
@@ -1026,12 +1017,6 @@ public:
        return _components->digest;
    }

-    components_digests& get_components_digests() {
-        return _components_digests;
-    }
-
-    std::optional<uint32_t> get_component_digest(component_type c) const;
-
    // Gets ratio of droppable tombstone. A tombstone is considered droppable here
    // for cells and tombstones expired before the time point "GC before", which
    // is the point before which expiring data can be purged.
--- a/sstables/storage.cc
+++ b/sstables/storage.cc
@@ -204,13 +204,13 @@ void filesystem_storage::open(sstable& sst) {
                                    open_flags::create |
                                    open_flags::exclusive,
                                    options).get();
-    auto w = std::make_unique<crc32_digest_file_writer>(std::move(sink), sst.sstable_buffer_size, component_name(sst, component_type::TemporaryTOC));
+    auto w = file_writer(output_stream<char>(std::move(sink)), component_name(sst, component_type::TemporaryTOC));

    bool toc_exists = file_exists(fmt::to_string(sst.filename(component_type::TOC))).get();
    if (toc_exists) {
        // TOC will exist at this point if write_components() was called with
        // the generation of a sstable that exists.
-        w->close();
+        w.close();
        remove_file(fmt::to_string(sst.filename(component_type::TemporaryTOC))).get();
        throw std::runtime_error(format("SSTable write failed due to existence of TOC file for generation {} of {}.{}", sst._generation, sst._schema->ks_name(), sst._schema->cf_name()));
    }
@@ -670,10 +670,15 @@ void object_storage_base::open(sstable& sst) {
    sst.manager().sstables_registry().create_entry(owner(), status_creating, sst._state, std::move(desc)).get();

    memory_data_sink_buffers bufs;
-    auto out = data_sink(std::make_unique<memory_data_sink>(bufs));
-    auto w = std::make_unique<crc32_digest_file_writer>(std::move(out), sst.sstable_buffer_size, component_name(sst, component_type::TOC));
-
-    sst.write_toc(std::move(w));
+    sst.write_toc(
+        file_writer(
+            output_stream<char>(
+                data_sink(
+                    std::make_unique<memory_data_sink>(bufs)
+                )
+            )
+        )
+    );
    put_object(make_object_name(sst, component_type::TOC), std::move(bufs)).get();
 }

--- a/sstables/types.hh
+++ b/sstables/types.hh
@@ -547,7 +547,6 @@ enum class scylla_metadata_type : uint32_t {
    ExtTimestampStats = 9,
    SSTableIdentifier = 10,
    Schema = 11,
-    ComponentsDigests = 12,
 };

 // UUID is used for uniqueness across nodes, such that an imported sstable
@@ -574,24 +573,6 @@ struct sstable_identifier_type {
    auto describe_type(sstable_version_types v, Describer f) { return f(value); }
 };

-// Component digests stored in scylla metadata to track integrity of individual components
-struct components_digests {
-    std::optional<uint32_t> data_digest;
-    std::optional<uint32_t> compression_digest;
-    std::optional<uint32_t> filter_digest;
-    std::optional<uint32_t> statistics_digest;
-    std::optional<uint32_t> summary_digest;
-    std::optional<uint32_t> index_digest;
-    std::optional<uint32_t> toc_digest;
-    std::optional<uint32_t> partitions_digest;
-    std::optional<uint32_t> rows_digest;
-
-    template <typename Describer>
-    auto describe_type(sstable_version_types v, Describer f) {
-        return f(data_digest,compression_digest, filter_digest, statistics_digest, summary_digest, index_digest, toc_digest, partitions_digest, rows_digest);
-    }
-};
-
 // Types of large data statistics.
 //
 // Note: For extensibility, never reuse an identifier,
@@ -675,8 +656,7 @@ struct scylla_metadata {
            disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::ScyllaVersion, scylla_version>,
            disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::ExtTimestampStats, ext_timestamp_stats>,
            disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::SSTableIdentifier, sstable_identifier>,
-            disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Schema, sstable_schema>,
-            disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::ComponentsDigests, components_digests>
+            disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Schema, sstable_schema>
            > data;

    sstable_enabled_features get_features() const {
@@ -711,13 +691,6 @@ struct scylla_metadata {
        auto* sid = data.get<scylla_metadata_type::SSTableIdentifier, scylla_metadata::sstable_identifier>();
        return sid ? sid->value : sstable_id::create_null_id();
    }
-    const components_digests get_components_digests() const {
-        auto cd = data.get<scylla_metadata_type::ComponentsDigests, components_digests>();
-        if (!cd) {
-            return {};
-        }
-        return *cd;
-    }

    template <typename Describer>
    auto describe_type(sstable_version_types v, Describer f) { return f(data); }
--- a/sstables/writer.hh
+++ b/sstables/writer.hh
@@ -65,7 +65,7 @@ serialized_size(sstable_version_types v, const T& object) {
    return size;
 }

-template <typename ChecksumType, bool calculate_chunk_checksums>
+template <typename ChecksumType>
 requires ChecksumUtils<ChecksumType>
 class checksummed_file_data_sink_impl : public data_sink_impl {
    data_sink _out;
@@ -92,9 +92,7 @@ public:

                per_chunk_checksum = ChecksumType::checksum(per_chunk_checksum, buf.begin() + offset, size);
                _full_checksum = checksum_combine_or_feed<ChecksumType>(_full_checksum, per_chunk_checksum, buf.begin() + offset, size);
-                if constexpr (calculate_chunk_checksums) {
-                    _c.checksums.push_back(per_chunk_checksum);
-                }
+                _c.checksums.push_back(per_chunk_checksum);
            }
        }
        return _out.put(std::move(bufs));
@@ -114,29 +112,29 @@ public:
    }
 };

-template <typename ChecksumType, bool calculate_chunk_checksums>
+template <typename ChecksumType>
 requires ChecksumUtils<ChecksumType>
 class checksummed_file_data_sink : public data_sink {
 public:
    checksummed_file_data_sink(data_sink out, struct checksum& cinfo, uint32_t& full_file_checksum)
-        : data_sink(std::make_unique<checksummed_file_data_sink_impl<ChecksumType, calculate_chunk_checksums>>(std::move(out), cinfo, full_file_checksum)) {}
+        : data_sink(std::make_unique<checksummed_file_data_sink_impl<ChecksumType>>(std::move(out), cinfo, full_file_checksum)) {}
 };

-template <typename ChecksumType, bool calculate_chunk_checksums>
+template <typename ChecksumType>
 requires ChecksumUtils<ChecksumType>
 inline
 output_stream<char> make_checksummed_file_output_stream(data_sink out, struct checksum& cinfo, uint32_t& full_file_checksum) {
-    return output_stream<char>(checksummed_file_data_sink<ChecksumType, calculate_chunk_checksums>(std::move(out), cinfo, full_file_checksum));
+    return output_stream<char>(checksummed_file_data_sink<ChecksumType>(std::move(out), cinfo, full_file_checksum));
 }

-template <typename ChecksumType, bool calculate_chunk_checksums>
+template <typename ChecksumType>
 requires ChecksumUtils<ChecksumType>
 class checksummed_file_writer : public file_writer {
    checksum _c;
    uint32_t _full_checksum;
 public:
    checksummed_file_writer(data_sink out, size_t buffer_size, component_name c)
-            : file_writer(make_checksummed_file_output_stream<ChecksumType, calculate_chunk_checksums>(std::move(out), _c, _full_checksum), std::move(c))
+            : file_writer(make_checksummed_file_output_stream<ChecksumType>(std::move(out), _c, _full_checksum), std::move(c))
            , _c(uint32_t(std::min(size_t(DEFAULT_CHUNK_SIZE), buffer_size)), {})
            , _full_checksum(ChecksumType::init_checksum()) {}

@@ -154,10 +152,8 @@ public:
    }
 };

-using adler32_checksummed_file_writer = checksummed_file_writer<adler32_utils, true>;
-using crc32_checksummed_file_writer = checksummed_file_writer<crc32_utils, true>;
-
-using crc32_digest_file_writer = checksummed_file_writer<crc32_utils, false>;
+using adler32_checksummed_file_writer = checksummed_file_writer<adler32_utils>;
+using crc32_checksummed_file_writer = checksummed_file_writer<crc32_utils>;

 template <typename T, typename W>
 requires Writer<W>
--- a/tasks/task_manager.hh
+++ b/tasks/task_manager.hh
@@ -112,6 +112,7 @@ public:
        // Each virtual task needs to have its group.
        topology_change_group,
        tablets_group,
+        global_topology_change_group,
    };

    class task : public enable_lw_shared_from_this<task> {
--- a/test.py
+++ b/test.py
@@ -228,7 +228,7 @@ def parse_cmd_line() -> argparse.Namespace:
    scylla_additional_options = parser.add_argument_group('Additional options for Scylla tests')
    scylla_additional_options.add_argument('--x-log2-compaction-groups', action="store", default="0", type=int,
                             help="Controls number of compaction groups to be used by Scylla tests. Value of 3 implies 8 groups.")
-    scylla_additional_options.add_argument('--extra-scylla-cmdline-options', action="store", default=[], type=str,
+    scylla_additional_options.add_argument('--extra-scylla-cmdline-options', action="store", default="", type=str,
                                           help="Passing extra scylla cmdline options for all tests. Options should be space separated:"
                                                "'--logger-log-level raft=trace --default-log-level error'")

@@ -279,9 +279,6 @@ def parse_cmd_line() -> argparse.Namespace:
    args.tmpdir = os.path.abspath(args.tmpdir)
    prepare_dirs(tempdir_base=pathlib.Path(args.tmpdir), modes=args.modes, gather_metrics=args.gather_metrics, save_log_on_success=args.save_log_on_success)

-    if args.extra_scylla_cmdline_options:
-        args.extra_scylla_cmdline_options = args.extra_scylla_cmdline_options.split()
-
    return args


--- a/test/alternator/test_batch.py
+++ b/test/alternator/test_batch.py
@@ -152,7 +152,7 @@ def test_batch_write_nonduplicate_multiple_tables(test_table_s, test_table_s_2):
    p = random_string()
    # The batch_writer() function used in previous tests can't write to more
    # than one table. So we use the lower level interface boto3 gives us.
-    reply = test_table_s.meta.client.batch_write_item(RequestItems = {
+    test_table_s.meta.client.batch_write_item(RequestItems = {
        test_table_s.name: [{'PutRequest': {'Item': {'p': p, 'a': 'hi'}}}],
        test_table_s_2.name: [{'PutRequest': {'Item': {'p': p, 'b': 'hello'}}}]
    })
@@ -222,7 +222,7 @@ def test_batch_write_multiple_tables(test_table_s, test_table):
    # We use the low-level batch_write_item API for lack of a more convenient
    # API (the batch_writer() API can only write to one table). At least it
    # spares us the need to encode the key's types...
-    reply = test_table.meta.client.batch_write_item(RequestItems = {
+    test_table.meta.client.batch_write_item(RequestItems = {
        test_table.name: [{'PutRequest': {'Item': {'p': p1, 'c': c1, 'a': 'hi'}}}],
        test_table_s.name: [{'PutRequest': {'Item': {'p': p2, 'b': 'hello'}}}]
    })
@@ -537,9 +537,8 @@ def test_batch_get_item_full_failure(scylla_only, dynamodb, rest_api, test_table
        for i in range(count):
            batch.put_item(Item={
                'p': p, 'c': i, 'content': content})
-    responses = []
    to_read = { test_table_sn.name: {'Keys': [{'p': p, 'c': c} for c in range(count)], 'ConsistentRead': True } }
    # The error injection is permanent, so it will fire for each batch read.
    with scylla_inject_error(rest_api, "alternator_batch_get_item", one_shot=False):
        with pytest.raises(ClientError, match="InternalServerError"):
-            reply = test_table_sn.meta.client.batch_get_item(RequestItems = to_read)
+            test_table_sn.meta.client.batch_get_item(RequestItems = to_read)
--- a/test/alternator/test_cql_rbac.py
+++ b/test/alternator/test_cql_rbac.py
@@ -376,7 +376,7 @@ def test_rbac_updateitem_read(dynamodb, cql, test_table_s):
                assert ret['Attributes'] == {'p': p, 'v': v1}
                # Just MODIFY permission, not SELECT permission, also allows
                # us to do a read-modify-write expression:
-                ret = authorized(lambda: tab.update_item(Key={'p': p},
+                authorized(lambda: tab.update_item(Key={'p': p},
                    UpdateExpression='SET v =  v + :val',
                    ExpressionAttributeValues={':val': 1}))
    assert {'p': p, 'v': v2 + 1} == test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
@@ -903,7 +903,6 @@ def test_rbac_tagresource(dynamodb, cql):
        arn = table.meta.client.describe_table(TableName=table.name)['Table']['TableArn']
        with new_role(cql) as (role, key):
            with new_dynamodb(dynamodb, role, key) as d:
-                tab = d.Table(table.name)
                # Without ALTER permission, TagResource and UntagResource
                # are refused
                tags = [{'Key': 'hello', 'Value': 'dog'},
--- a/test/alternator/test_filter_expression.py
+++ b/test/alternator/test_filter_expression.py
@@ -80,18 +80,18 @@ def test_table_sn_with_data(test_table_sn):
 def test_filter_expression_partition_key_1(test_table_sn_with_data):
    table, p, items = test_table_sn_with_data
    with pytest.raises(ClientError, match='ValidationException.*Condition'):
-        got_items = full_query(table, FilterExpression='p=:p', ExpressionAttributeValues={':p': p})
+        full_query(table, FilterExpression='p=:p', ExpressionAttributeValues={':p': p})

 def test_filter_expression_partition_key_2(test_table_sn_with_data):
    table, p, items = test_table_sn_with_data
    with pytest.raises(ClientError, match='ValidationException.* p'):
-        got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='p=:p', ExpressionAttributeValues={':p': p})
+        full_query(table, KeyConditionExpression='p=:p', FilterExpression='p=:p', ExpressionAttributeValues={':p': p})

 # FilterExpression is also not allowed on the sort key.
 def test_filter_expression_sort_key(test_table_sn_with_data):
    table, p, items = test_table_sn_with_data
    with pytest.raises(ClientError, match='ValidationException.* key '):
-        got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='c=:c',
+        full_query(table, KeyConditionExpression='p=:p', FilterExpression='c=:c',
            ExpressionAttributeValues={':p': p, ':c': 3})

 # Test the "=" operator on different types of attributes (numeric, string,
@@ -387,7 +387,6 @@ def test_filter_expression_map_contains(test_table_sn_with_data):
    assert(got_items == expected_items)
    # One value from a map:
    i = next(iter(items[2]['m']))
-    v = items[2]['m'][i]
    got_items = full_query(table, KeyConditionExpression='p=:p', FilterExpression='contains(m, :i)',
        ExpressionAttributeValues={':p': p, ':i': i})
    #The following could have made sense, but it's what DynamoDB does:
--- a/test/alternator/test_item.py
+++ b/test/alternator/test_item.py
@@ -125,7 +125,6 @@ def test_basic_string_more_update(test_table):
    val1 = random_string()
    val2 = random_string()
    val3 = random_string()
-    val4 = random_string()
    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a3': {'Value': val1, 'Action': 'PUT'}})
    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a1': {'Value': val1, 'Action': 'PUT'}})
    test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a2': {'Value': val2, 'Action': 'PUT'}})
--- a/test/alternator/test_metrics.py
+++ b/test/alternator/test_metrics.py
@@ -304,7 +304,7 @@ def test_wcu_batch_write_item(test_table_s, metrics):
    with check_increases_operation(metrics, ['PutItem'], 'scylla_alternator_wcu_total', 3):
        p1 = random_string()
        p2 = random_string()
-        response = test_table_s.meta.client.batch_write_item(RequestItems = {
+        test_table_s.meta.client.batch_write_item(RequestItems = {
            test_table_s.name: [{'PutRequest': {'Item': {'p': p1, 'a': 'hi'}}}, {'PutRequest': {'Item': {'p': p2, 'a': 'a' * KB}}}]
        })

--- a/test/alternator/test_query.py
+++ b/test/alternator/test_query.py
@@ -369,7 +369,6 @@ def test_query_exclusivestartkey(test_table_sn):
        # The ExclusiveStartKey option must indicate both partition key and
        # sort key. Note that the Python driver further converts this map
        # into the correct format for the request (including the key types).
-        exclusivestartkey = { 'p': p, 'c': start }
        got_items = test_table_sn.query(
            KeyConditions={'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
            ExclusiveStartKey= { 'p': p, 'c': start },
--- a/test/alternator/test_returnconsumedcapacity.py
+++ b/test/alternator/test_returnconsumedcapacity.py
@@ -35,14 +35,12 @@ def test_invalid_consumed_capacity_type(test_table_sb):
    c = random_bytes()
    test_table_sb.put_item(Item={'p': p, 'c': c, 'att': val})
    with pytest.raises(ClientError):
-        response = test_table_sb.get_item(Key={'p': p, 'c': c}, ConsistentRead=True, ReturnConsumedCapacity='DUMMY')
+        test_table_sb.get_item(Key={'p': p, 'c': c}, ConsistentRead=True, ReturnConsumedCapacity='DUMMY')

 # A missing Item, count as zero length item which require 1 or 0.5 RCU depends on the consistency
 def test_missing_get_item(test_table):
    p = random_string()
    c = random_string()
-    val = random_string()
-    val2 = random_string()
    response = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True, ReturnConsumedCapacity='TOTAL')
    assert 'ConsumedCapacity' in response
    consumed_capacity = response['ConsumedCapacity']
@@ -225,7 +223,6 @@ def test_simple_delete_item(test_table_sb):
 # we will get 1 WCU
 def test_delete_missing_item(test_table_sb):
    p = random_string()
-    val = random_string()
    c = random_bytes()
    response = test_table_sb.delete_item(Key={'p': p, 'c': c}, ReturnConsumedCapacity='TOTAL')
    assert 'ConsumedCapacity' in response
--- a/test/alternator/test_returnvalues.py
+++ b/test/alternator/test_returnvalues.py
@@ -99,7 +99,7 @@ def test_put_item_returnvalues_on_condition_check_failure(test_table_s):
    p = random_string()
    # Failed conditional on non existing item doesn't return values.
    with pytest.raises(test_table_s.meta.client.exceptions.ConditionalCheckFailedException) as err:
-        ret=test_table_s.put_item(Item={'p': p, 's': 'cat'},
+        test_table_s.put_item(Item={'p': p, 's': 'cat'},
            ReturnValuesOnConditionCheckFailure='ALL_OLD',
            ConditionExpression='s = :v1',
            ExpressionAttributeValues={':v1' : 'dog'})
@@ -175,7 +175,7 @@ def test_delete_item_returnvalues_on_condition_check_failure(test_table_s):
    p = random_string()
    # Delete of non existing item doesn't return values.
    with pytest.raises(test_table_s.meta.client.exceptions.ConditionalCheckFailedException) as err:
-        ret=test_table_s.delete_item(Key={'p': p},
+        test_table_s.delete_item(Key={'p': p},
            ReturnValuesOnConditionCheckFailure='ALL_OLD',
            ConditionExpression='s = :v1',
            ExpressionAttributeValues={':v1' : 'dog'})
@@ -566,7 +566,7 @@ def test_update_item_returnvalues_on_condition_check_failure(test_table_s):
    p = random_string()
    # Modification of non existing item doesn't return values.
    with pytest.raises(test_table_s.meta.client.exceptions.ConditionalCheckFailedException) as err:
-        ret=test_table_s.update_item(Key={'p': p},
+        test_table_s.update_item(Key={'p': p},
            ReturnValuesOnConditionCheckFailure='ALL_OLD',
            ConditionExpression='s = :v1',
            UpdateExpression='SET s = :v2',
--- a/test/alternator/test_scan.py
+++ b/test/alternator/test_scan.py
@@ -220,7 +220,6 @@ def test_scan_with_key_equality_filtering(dynamodb, filled_test_table):
 # without returning items at all.
 def test_scan_select(filled_test_table):
    test_table, items = filled_test_table
-    got_items = full_scan(test_table)
    # By default, a scan returns all the items, with all their attributes:
    # query returns all attributes:
    got_items = full_scan(test_table)
--- a/test/alternator/test_streams.py
+++ b/test/alternator/test_streams.py
@@ -135,7 +135,7 @@ def test_list_streams_create(dynamodb, dynamodbstreams):
 def test_list_streams_alter(dynamodb, dynamodbstreams):
    for type in stream_types:
        with create_stream_test_table(dynamodb, StreamViewType=None) as table:
-            res = table.update(StreamSpecification={'StreamEnabled': True, 'StreamViewType': type});
+            table.update(StreamSpecification={'StreamEnabled': True, 'StreamViewType': type});
            wait_for_active_stream(dynamodbstreams, table)

 def test_list_streams_paged(dynamodb, dynamodbstreams):
@@ -273,7 +273,7 @@ def test_describe_stream_create_time(dynamodb, dynamodbstreams):

 def test_describe_nonexistent_stream(dynamodb, dynamodbstreams):
    with pytest.raises(ClientError, match='ResourceNotFoundException' if is_local_java(dynamodbstreams) else 'ValidationException'):
-        streams = dynamodbstreams.describe_stream(StreamArn='sdfadfsdfnlfkajakfgjalksfgklasjklasdjfklasdfasdfgasf')
+        dynamodbstreams.describe_stream(StreamArn='sdfadfsdfnlfkajakfgjalksfgklasjklasdjfklasdfasdfgasf')

 def test_describe_stream_with_nonexistent_last_shard(dynamodb, dynamodbstreams):
    with create_stream_test_table(dynamodb, StreamViewType='KEYS_ONLY') as table:
@@ -313,7 +313,7 @@ def test_get_shard_iterator(dynamodb, dynamodbstreams):
        for type in ['AT_SEQUENCE_NUMBER', 'AFTER_SEQUENCE_NUMBER']: 
            # must have seq in these modes
            with pytest.raises(ClientError, match='ValidationException'):
-                iter = dynamodbstreams.get_shard_iterator(
+                dynamodbstreams.get_shard_iterator(
                    StreamArn=arn, ShardId=shard_id, ShardIteratorType=type
                )

@@ -326,7 +326,7 @@ def test_get_shard_iterator(dynamodb, dynamodbstreams):

        # bad arn
        with pytest.raises(ClientError, match='ValidationException'):
-            iter = dynamodbstreams.get_shard_iterator(
+            dynamodbstreams.get_shard_iterator(
                StreamArn='sdfadsfsdfsdgdfsgsfdabadfbabdadsfsdfsdfsdfsdfsdfsdfdfdssdffbdfdf', ShardId=shard_id, ShardIteratorType=type, SequenceNumber=seq
            )
        # bad shard id  
@@ -735,7 +735,6 @@ def compare_events(expected_events, output, mode, expected_region):
            assert not 'NewImage' in record
            if expected_old_image == None:
                assert not 'OldImage' in record
-                pass
            else:
                old_image = {x:deserializer.deserialize(y) for (x,y) in record['OldImage'].items()}
                assert expected_old_image == old_image
@@ -1642,7 +1641,6 @@ def test_table_stream_with_result(dynamodb, dynamodbstreams):
 # doing an UpdateTable to a table - because before this wait finishes we are
 # not allowed to update the same table again or delete it.
 def wait_for_status_active(table):
-    start_time = time.time()
    for i in range(60):
        desc = table.meta.client.describe_table(TableName=table.name)
        if desc['Table']['TableStatus'] == 'ACTIVE':
@@ -1919,15 +1917,15 @@ def test_get_records_too_high_limit(test_table_ss_keys_only, dynamodbstreams):
    shard_id = shard['ShardId']
    iter = dynamodbstreams.get_shard_iterator(StreamArn=arn, ShardId=shard_id, ShardIteratorType='LATEST')['ShardIterator']
    # Limit=1000 should be allowed:
-    response = dynamodbstreams.get_records(ShardIterator=iter, Limit=1000)
+    dynamodbstreams.get_records(ShardIterator=iter, Limit=1000)
    # Limit=1001 should NOT be allowed
    with pytest.raises(ClientError, match='ValidationException.*[Ll]imit'):
-        response = dynamodbstreams.get_records(ShardIterator=iter, Limit=1001)
+        dynamodbstreams.get_records(ShardIterator=iter, Limit=1001)
    # Limit must be >= 0:
    with pytest.raises(ClientError, match='ValidationException.*[Ll]imit'):
-        response = dynamodbstreams.get_records(ShardIterator=iter, Limit=0)
+        dynamodbstreams.get_records(ShardIterator=iter, Limit=0)
    with pytest.raises(ClientError, match='ValidationException.*[Ll]imit'):
-        response = dynamodbstreams.get_records(ShardIterator=iter, Limit=-1)
+        dynamodbstreams.get_records(ShardIterator=iter, Limit=-1)

 # padded_name() creates a unique name of given length by taking the
 # output of unique_table_name() and padding it with extra 'x' characters:
--- a/test/alternator/test_system_tables.py
+++ b/test/alternator/test_system_tables.py
@@ -56,7 +56,6 @@ def test_page_break_over_range_tombstone_asan(scylla_only, dynamodb, rest_api, c
        while True:
            response = client.scan(TableName=qualified_name, Limit=10, **args)
            pos = response.get('LastEvaluatedKey', None)
-            cnt = 0
            for i in response['Items']:
                if i['cf_id'] == 'eee7eb26-a372-4eb4-aeaa-72f224cf0000':
                    items_found.append(i['schema_version'])
@@ -101,10 +100,9 @@ def test_fetch_from_system_tables(scylla_only, dynamodb, rest_api):
 def test_block_access_to_non_system_tables_with_virtual_interface(scylla_only, test_table_s, dynamodb):
    client = dynamodb.meta.client
    with pytest.raises(ClientError, match='ResourceNotFoundException.*{}'.format(internal_prefix)):
-        tables_response = client.scan(TableName="{}alternator_{}.{}".format(internal_prefix, test_table_s.name, test_table_s.name))
+        client.scan(TableName="{}alternator_{}.{}".format(internal_prefix, test_table_s.name, test_table_s.name))

 def test_block_creating_tables_with_reserved_prefix(scylla_only, dynamodb):
-    client = dynamodb.meta.client
    for wrong_name_postfix in ['', 'a', 'xxx', 'system_auth.roles', 'table_name']:
        with pytest.raises(ClientError, match=internal_prefix):
            dynamodb.create_table(TableName=internal_prefix+wrong_name_postfix,
@@ -200,7 +198,6 @@ def test_write_to_config(scylla_only, dynamodb):
 # Same test as above, just using the scylla_config_temporary() utility
 # function (also validating its correctness)
 def test_scylla_config_temporary(scylla_only, dynamodb):
-    tbl = '.scylla.alternator.system.config'
    parameter = 'query_tombstone_page_limit'
    old_val = scylla_config_read(dynamodb, parameter)
    new_val = old_val + "1"
--- a/test/alternator/test_transact.py
+++ b/test/alternator/test_transact.py
@@ -1021,7 +1021,7 @@ def test_transact_get_items_projection_expression(test_table_s):
 def test_transact_get_items_unused_expressionattributenames(test_table_s):
    p = random_string()
    with pytest.raises(ClientError, match='ValidationException.*unused.*#qq'):
-        ret = test_table_s.meta.client.transact_get_items(TransactItems=[
+        test_table_s.meta.client.transact_get_items(TransactItems=[
            { 'Get': {
                'TableName': test_table_s.name,
                'Key': {'p': p},
@@ -1034,7 +1034,7 @@ def test_transact_get_items_unused_expressionattributenames(test_table_s):
 def test_transact_get_items_missing_expressionattributenames(test_table_s):
    p = random_string()
    with pytest.raises(ClientError, match='ValidationException.*#zz'):
-        ret = test_table_s.meta.client.transact_get_items(TransactItems=[
+        test_table_s.meta.client.transact_get_items(TransactItems=[
            { 'Get': {
                'TableName': test_table_s.name,
                'Key': {'p': p},
@@ -1071,7 +1071,6 @@ def test_transact_get_items_100(test_table_s):
 # A transaction with 100 read actions is the limit, and 101 are not allowed:
@pytest.mark.xfail(reason="#5064 - transactions not yet supported")
 def test_transact_get_items_101(test_table_s):
-    p = random_string()
    with pytest.raises(ClientError, match='ValidationException.*[tT]ransactItems.*100'):
        test_table_s.meta.client.transact_get_items(TransactItems=[
            { 'Get': {
--- a/test/alternator/test_ttl.py
+++ b/test/alternator/test_ttl.py
@@ -638,12 +638,10 @@ def test_ttl_expiration_lsi_key(dynamodb, waits_for_expiration):
        assert response['TimeToLiveSpecification'] == ttl_spec
        p = random_string()
        c = random_string()
-        l = random_string()
        # expiration one minute in the past, so item should expire ASAP.
        expiration = int(time.time()) - 60
        table.put_item(Item={'p': p, 'c': c, 'l': expiration})
        start_time = time.time()
-        gsi_was_alive = False
        while time.time() < start_time + max_duration:
            if 'Item' not in table.get_item(Key={'p': p, 'c': c}):
                # test is done - and successful:
@@ -787,7 +785,7 @@ def test_ttl_expiration_long(dynamodb, waits_for_expiration):
        AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'N' },
                               { 'AttributeName': 'c', 'AttributeType': 'N' }]) as table:
        ttl_spec = {'AttributeName': 'expiration', 'Enabled': True}
-        response = table.meta.client.update_time_to_live(TableName=table.name,
+        table.meta.client.update_time_to_live(TableName=table.name,
            TimeToLiveSpecification=ttl_spec)
        with table.batch_writer() as batch:
            for p in range(N):
--- a/test/alternator/util.py
+++ b/test/alternator/util.py
@@ -244,7 +244,7 @@ def get_region(dynamodb):
 # will trigger a test to be skipped if it cannot be executed.
@contextmanager
 def scylla_inject_error(rest_api, err, one_shot=False):
-    response = requests.post(f'{rest_api}/v2/error_injection/injection/{err}?one_shot={one_shot}')
+    requests.post(f'{rest_api}/v2/error_injection/injection/{err}?one_shot={one_shot}')
    response = requests.get(f'{rest_api}/v2/error_injection/injection')
    print("Enabled error injections:", response.content.decode('utf-8'))
    if response.content.decode('utf-8') == "[]":
@@ -253,7 +253,7 @@ def scylla_inject_error(rest_api, err, one_shot=False):
        yield
    finally:
        print("Disabling error injection", err)
-        response = requests.delete(f'{rest_api}/v2/error_injection/injection/{err}')
+        requests.delete(f'{rest_api}/v2/error_injection/injection/{err}')

 # Send a message to the Scylla log. E.g., we can write a message to the log
 # indicating that a test has started, which will make it easier to see which
@@ -306,7 +306,6 @@ def wait_for_gsi_gone(table, gsi_name):
        if 'GlobalSecondaryIndexes' in desc['Table']:
            index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
            if len(index_desc) != 0:
-                index_status = index_desc[0]['IndexStatus']
                time.sleep(0.1)
                continue
        return
--- a/test/boost/network_topology_strategy_test.cc
+++ b/test/boost/network_topology_strategy_test.cc
@@ -1055,14 +1055,16 @@ SEASTAR_TEST_CASE(test_rack_list_rejected_when_feature_not_enabled) {
        BOOST_REQUIRE_EQUAL(replication_factor_data(opts.at(loc.dc)).count(), 1);
        BOOST_REQUIRE(describe(e, "test2").contains(fmt::format("'{}': '1'", loc.dc)));

-        // When feature is enabled, rack list is accepted.
-        e.get_feature_service().local().rack_list_rf.enable();
-        e.execute_cql(create_stmt).get();
-
-        // Altering numeric RF to rack list is not supported yet.
+        // Altering to rack list is not allowed when feature is disabled.
        BOOST_REQUIRE_THROW(e.execute_cql(fmt::format("ALTER KEYSPACE test2 WITH REPLICATION = {{'class': 'NetworkTopologyStrategy',"
                                                      " '{}': ['{}']}}", loc.dc, loc.rack)).get(),
                            exceptions::configuration_exception);
+
+        // When feature is enabled, rack list is accepted.
+        e.get_feature_service().local().rack_list_rf.enable();
+        e.execute_cql(create_stmt).get();
+        e.execute_cql(fmt::format("ALTER KEYSPACE test2 WITH REPLICATION = {{'class': 'NetworkTopologyStrategy',"
+                                                      " '{}': ['{}']}}", loc.dc, loc.rack)).get();
    }, cfg);
 }

--- a/test/boost/reader_concurrency_semaphore_test.cc
+++ b/test/boost/reader_concurrency_semaphore_test.cc
@@ -1663,7 +1663,7 @@ SEASTAR_TEST_CASE(test_reader_concurrency_semaphore_memory_limit_engages) {
    db_cfg.reader_concurrency_semaphore_kill_limit_multiplier.set(4, utils::config_file::config_source::CommandLine);

    return do_with_cql_env_thread([] (cql_test_env& env) {
-        auto tbl = create_memory_limit_table(env, 64);
+        auto tbl = create_memory_limit_table(env, 54);

        auto& db = env.local_db();
        auto& semaphore = db.get_reader_concurrency_semaphore();
--- a/test/boost/schema_registry_test.cc
+++ b/test/boost/schema_registry_test.cc
@@ -105,6 +105,28 @@ SEASTAR_THREAD_TEST_CASE(test_learn_schema_with_cdc) {
    BOOST_REQUIRE(s->cdc_schema()->registry_entry());
 }

+SEASTAR_THREAD_TEST_CASE(test_learn_loaded_schema_with_cdc) {
+    dummy_init dummy;
+    auto s_cdc = schema_builder("ks", "cdc_cf")
+        .with_column("pk", bytes_type, column_kind::partition_key)
+        .with_column("val", bytes_type)
+        .build();
+    auto s = schema_builder("ks", "cf")
+        .with_column("pk", bytes_type, column_kind::partition_key)
+        .with_column("val", bytes_type)
+        .with_cdc_schema(s_cdc)
+        .build();
+
+    local_schema_registry().get_or_load(s->version(), [s] (table_schema_version) {
+        return make_ready_future<extended_frozen_schema>(s);
+    }).get();
+
+    s = local_schema_registry().learn(s);
+
+    BOOST_REQUIRE(s->registry_entry());
+    BOOST_REQUIRE(s->cdc_schema()->registry_entry());
+}
+
 SEASTAR_TEST_CASE(test_async_loading) {
    return seastar::async([] {
        dummy_init dummy;
--- a/test/boost/sstable_test.cc
+++ b/test/boost/sstable_test.cc
@@ -15,14 +15,11 @@
 #include <seastar/core/smp.hh>
 #include <seastar/util/closeable.hh>

-#include "sstables/checksum_utils.hh"
-#include <seastar/util/short_streams.hh>
 #include "sstables/generation_type.hh"
 #include "sstables/sstables.hh"
 #include "sstables/key.hh"
 #include "sstables/open_info.hh"
 #include "sstables/version.hh"
-#include "test/lib/random_schema.hh"
 #include "test/lib/sstable_utils.hh"
 #include "test/lib/reader_concurrency_semaphore.hh"
 #include "test/lib/scylla_test_case.hh"
@@ -35,7 +32,6 @@
 #include "partition_slice_builder.hh"
 #include "sstables/sstable_mutation_reader.hh"
 #include "sstables/binary_search.hh"
-#include "test/lib/random_utils.hh"

 #include <boost/range/combine.hpp>

@@ -883,101 +879,3 @@ BOOST_AUTO_TEST_CASE(test_parse_path_bad) {
        BOOST_CHECK_THROW(parse_path(path), std::exception);
    }
 }
-
-using compress_sstable = tests::random_schema_specification::compress_sstable;
-static future<> test_component_digest_persistence(component_type component, sstable::version_types version, compress_sstable compress = compress_sstable::no, bool rewrite_statistics = false) {
-    return test_env::do_with_async([component, version, compress, rewrite_statistics] (test_env& env) mutable {
-        auto random_spec = tests::make_random_schema_specification(
-            "ks",
-            std::uniform_int_distribution<size_t>(1, 4),
-            std::uniform_int_distribution<size_t>(2, 4),
-            std::uniform_int_distribution<size_t>(2, 8),
-            std::uniform_int_distribution<size_t>(2, 8),
-            compress);
-        auto random_schema = tests::random_schema{tests::random::get_int<uint32_t>(), *random_spec};
-        auto schema = random_schema.schema();
-
-        const auto muts = tests::generate_random_mutations(random_schema, 2).get();
-        auto sst_original = make_sstable_containing(env.make_sstable(schema, version), muts);
-
-        auto& components = sstables::test(sst_original).get_components();
-        bool has_component = components.find(component) != components.end();
-        BOOST_REQUIRE(has_component);
-
-        auto toc_path = fmt::to_string(sst_original->toc_filename());
-        auto entry_desc = sstables::parse_path(toc_path, schema->ks_name(), schema->cf_name());
-        auto dir_path = std::filesystem::path(toc_path).parent_path().string();
-
-        std::optional<uint32_t> original_digest;
-        if (rewrite_statistics) {
-            original_digest = sst_original->get_component_digest(component);
-            BOOST_REQUIRE(original_digest.has_value());
-
-            sst_original->mutate_sstable_level(10).get();
-
-            auto new_digest = sst_original->get_component_digest(component);
-            BOOST_REQUIRE(new_digest.has_value());
-
-            BOOST_REQUIRE(original_digest.value() != new_digest.value());
-        }
-
-        sst_original = nullptr;
-
-        auto sst_reopened = env.make_sstable(schema, dir_path, entry_desc.generation, entry_desc.version, entry_desc.format);
-        sst_reopened->load(schema->get_sharder()).get();
-
-        auto loaded_digest = sst_reopened->get_component_digest(component);
-        BOOST_REQUIRE(loaded_digest.has_value());
-
-        auto f = open_file_dma(sstables::test(sst_reopened).filename(component).native(), open_flags::ro).get();
-        auto stream = make_file_input_stream(f);
-        auto close_stream = deferred_close(stream);
-        auto component_data = util::read_entire_stream_contiguous(stream).get();
-        auto calculated_digest = crc32_utils::checksum(component_data.begin(), component_data.size());
-        BOOST_REQUIRE_EQUAL(calculated_digest, loaded_digest.value());
-    });
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_index) {
-    return test_component_digest_persistence(component_type::Index, sstable::version_types::me);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_partitions) {
-    return test_component_digest_persistence(component_type::Partitions, sstable::version_types::ms);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_rows) {
-    return test_component_digest_persistence(component_type::Rows, sstable::version_types::ms);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_summary) {
-    return test_component_digest_persistence(component_type::Summary, sstable::version_types::me);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_filter) {
-    return test_component_digest_persistence(component_type::Filter, sstable::version_types::me);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_compression) {
-    return test_component_digest_persistence(component_type::CompressionInfo, sstable::version_types::me, compress_sstable::yes);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_toc) {
-    return test_component_digest_persistence(component_type::TOC, sstable::version_types::me);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_statistics) {
-    return test_component_digest_persistence(component_type::Statistics, sstable::version_types::me);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_statistics_rewrite) {
-    return test_component_digest_persistence(component_type::Statistics, sstable::version_types::me, compress_sstable::no, true);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_data) {
-    return test_component_digest_persistence(component_type::Data, sstable::version_types::me);
-}
-
-SEASTAR_TEST_CASE(test_digest_persistence_data_compressed) {
-    return test_component_digest_persistence(component_type::Data, sstable::version_types::me, compress_sstable::yes);
-}
--- a/test/boost/tablets_test.cc
+++ b/test/boost/tablets_test.cc
@@ -8,6 +8,8 @@



+#include "utils/UUID.hh"
+#include <boost/test/tools/old/interface.hpp>
 #include <seastar/core/shard_id.hh>
 #include <seastar/coroutine/as_future.hh>
 #include <source_location>
@@ -446,6 +448,36 @@ SEASTAR_THREAD_TEST_CASE(test_invalid_colocated_tables) {
    .get();
 }

+SEASTAR_TEST_CASE(test_paused_rf_change_requests_persistence) {
+    return do_with_cql_env_thread([] (cql_test_env& e) {
+        topology_builder topo(e);
+
+        auto topology = e.get_system_keyspace().local().load_topology_state({}).get();
+
+        // Check scheduled_rf_change_requests.
+        std::unordered_set<utils::UUID> current_requests;
+        auto new_id1 = utils::make_random_uuid();
+        topo.pause_rf_change_request(new_id1);
+        current_requests.insert(new_id1);
+        auto new_id2 = utils::make_random_uuid();
+        topo.pause_rf_change_request(new_id2);
+        current_requests.insert(new_id2);
+        topology = e.get_system_keyspace().local().load_topology_state({}).get();
+        BOOST_REQUIRE_EQUAL(current_requests.size(), topology.paused_rf_change_requests.size());
+        for (const auto& request : current_requests) {
+            BOOST_REQUIRE(topology.paused_rf_change_requests.contains(request));
+        }
+
+        topo.resume_rf_change_request(current_requests, new_id1);
+        current_requests.erase(new_id1);
+        topology = e.get_system_keyspace().local().load_topology_state({}).get();
+        BOOST_REQUIRE_EQUAL(current_requests.size(), topology.paused_rf_change_requests.size());
+        for (const auto& request : current_requests) {
+            BOOST_REQUIRE(topology.paused_rf_change_requests.contains(request));
+        }
+    }, tablet_cql_test_config());
+}
+
 SEASTAR_TEST_CASE(test_tablet_metadata_persistence_with_colocated_tables) {
    return do_with_cql_env_thread([] (cql_test_env& e) {
        auto h1 = host_id(utils::UUID_gen::get_time_UUID());
@@ -1611,7 +1643,7 @@ future<> handle_resize_finalize(cql_test_env& e, group0_guard& guard, const migr

 // Reflects the plan in a given token metadata as if the migrations were fully executed.
 static
-future<> apply_plan(token_metadata& tm, const migration_plan& plan) {
+future<> apply_plan(token_metadata& tm, const migration_plan& plan, service::topology& topology) {
    for (auto&& mig : plan.migrations()) {
        co_await tm.tablets().mutate_tablet_map_async(mig.tablet.table, [&] (tablet_map& tmap) {
            auto tinfo = tmap.get_tablet_info(mig.tablet.tablet);
@@ -1622,6 +1654,9 @@ future<> apply_plan(token_metadata& tm, const migration_plan& plan) {
        });
    }
    co_await apply_resize_plan(tm, plan);
+    if (auto request_id = plan.rack_list_colocation_plan().request_to_resume(); request_id) {
+        topology.paused_rf_change_requests.erase(request_id);
+    }
 }

 // Reflects the plan in a given token metadata as if the migrations were started but not yet executed.
@@ -1662,13 +1697,15 @@ void do_rebalance_tablets(cql_test_env& e,
 {
    auto& talloc = e.get_tablet_allocator().local();
    auto& stm = e.shared_token_metadata().local();
+    auto& sys_ks = e.get_system_keyspace().local();
+    auto& topology = e.get_topology_state_machine().local()._topology;

    // Sanity limit to avoid infinite loops.
    // The x10 factor is arbitrary, it's there to account for more complex schedules than direct migration.
    auto max_iterations = 1 + get_tablet_count(stm.get()->tablets()) * 10;

    for (size_t i = 0; i < max_iterations; ++i) {
-        auto plan = talloc.balance_tablets(stm.get(), load_stats ? load_stats->get() : nullptr, skiplist).get();
+        auto plan = talloc.balance_tablets(stm.get(), &topology, &sys_ks, load_stats ? load_stats->get() : nullptr, skiplist).get();
        if (plan.empty()) {
            return;
        }
@@ -1676,7 +1713,7 @@ void do_rebalance_tablets(cql_test_env& e,
            return;
        }
        stm.mutate_token_metadata([&] (token_metadata& tm) {
-            return apply_plan(tm, plan);
+            return apply_plan(tm, plan, e.get_topology_state_machine().local()._topology);
        }).get();

        if (auto_split && load_stats) {
@@ -1734,7 +1771,7 @@ void rebalance_tablets(cql_test_env& e,
 static
 void rebalance_tablets_as_in_progress(tablet_allocator& talloc, shared_token_metadata& stm, shared_load_stats& stats) {
    while (true) {
-        auto plan = talloc.balance_tablets(stm.get(), stats.get()).get();
+        auto plan = talloc.balance_tablets(stm.get(), nullptr, nullptr, stats.get()).get();
        if (plan.empty()) {
            break;
        }
@@ -1885,7 +1922,7 @@ SEASTAR_THREAD_TEST_CASE(test_no_conflicting_migrations_in_the_plan) {
        auto& stm = e.shared_token_metadata().local();
        auto& talloc = e.get_tablet_allocator().local();
        talloc.set_load_stats(topo.get_load_stats());
-        migration_plan plan = talloc.balance_tablets(stm.get()).get();
+        migration_plan plan = talloc.balance_tablets(stm.get(), nullptr, nullptr).get();

        BOOST_REQUIRE(!plan.empty());
        std::set<global_tablet_id> tablets;
@@ -1976,7 +2013,7 @@ SEASTAR_THREAD_TEST_CASE(test_no_conflicting_internode_and_intra_merge_colocatio
        auto& stm = e.shared_token_metadata().local();
        auto& talloc = e.get_tablet_allocator().local();
        talloc.set_load_stats(topo.get_load_stats());
-        migration_plan plan = talloc.balance_tablets(stm.get()).get();
+        migration_plan plan = talloc.balance_tablets(stm.get(), nullptr, nullptr).get();

        // The plan should contain non-conflicting migrations.
        BOOST_REQUIRE(!plan.empty());
@@ -1989,6 +2026,101 @@ SEASTAR_THREAD_TEST_CASE(test_no_conflicting_internode_and_intra_merge_colocatio
    }, cfg).get();
 }

+SEASTAR_THREAD_TEST_CASE(test_rack_list_conversion) {
+    do_with_cql_env_thread([] (auto& e) {
+        topology_builder topo(e);
+
+        unsigned shard_count = 1;
+        auto dc1 = topo.dc();
+        auto rack1 = topo.rack();
+        [[maybe_unused]] auto host1 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host2 = topo.add_node(node_state::normal, shard_count);
+        auto rack2 = topo.start_new_rack();
+        [[maybe_unused]] auto host3 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host4 = topo.add_node(node_state::normal, shard_count);
+        auto rack3 = topo.start_new_rack();
+        [[maybe_unused]] auto host5 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host6 = topo.add_node(node_state::normal, shard_count);
+        auto dc2 = topo.start_new_dc().dc;
+        [[maybe_unused]] auto host7 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host8 = topo.add_node(node_state::normal, shard_count);
+
+        auto ks_name = add_keyspace(e, {{dc1, 2}}, 4);
+        auto table1 = add_table(e, ks_name).get();
+
+        // rack1: host1: A D    host2: C
+        // rack2: host3: A      host4: B
+        // rack3: host5: C      host6: B D
+        tablet_id A{0}, B{0};
+        mutate_tablets(e, [&] (tablet_metadata& tmeta) -> future<> {
+            tablet_map tmap(4);
+            auto tid = tmap.first_tablet();
+            A = tid;
+            tmap.set_tablet(tid, tablet_info {  // A
+                tablet_replica_set {
+                    tablet_replica{host1, 0},
+                    tablet_replica{host3, 0},
+                }
+            });
+            tid = *tmap.next_tablet(tid);
+            B = tid;
+            tmap.set_tablet(tid, tablet_info {  // B
+                tablet_replica_set {
+                    tablet_replica{host4, 0},
+                    tablet_replica{host6, 0},
+                }
+            });
+            tid = *tmap.next_tablet(tid);
+            tmap.set_tablet(tid, tablet_info {  // C
+                tablet_replica_set {
+                    tablet_replica{host2, 0},
+                    tablet_replica{host5, 0},
+                }
+            });
+            tid = *tmap.next_tablet(tid);
+            tmap.set_tablet(tid, tablet_info {  // D
+                tablet_replica_set {
+                    tablet_replica{host1, 0},
+                    tablet_replica{host6, 0},
+                }
+            });
+            tmeta.set_tablet_map(table1, std::move(tmap));
+            co_return;
+        });
+
+        auto id = utils::UUID_gen::get_time_UUID();
+        // Build the map literal for CQL
+        auto rf_change_data_cql = format("{{'replication:class': 'NetworkTopologyStrategy', 'replication:{}:0': '{}', 'replication:{}:1': '{}'}}",
+            dc1, rack1.rack, dc1, rack3.rack);
+
+        e.execute_cql(format("INSERT INTO system.topology_requests (id, request_type, done, new_keyspace_rf_change_ks_name, new_keyspace_rf_change_data) VALUES ({}, 'keyspace_rf_change', False, '{}', {})",
+            id, ks_name, rf_change_data_cql)).get();
+        auto& stm = e.shared_token_metadata().local();
+        auto& talloc = e.get_tablet_allocator().local();
+        talloc.set_load_stats(topo.get_load_stats());
+        auto& sys_ks = e.get_system_keyspace().local();
+        auto& topology = e.get_topology_state_machine().local()._topology;
+        topology.paused_rf_change_requests.insert(id);
+        migration_plan plan = talloc.balance_tablets(stm.get(), &topology, &sys_ks).get();
+
+        BOOST_REQUIRE(!plan.empty());
+        // A : host3 -> host5 / host6
+        // B : host4 -> host1 / host2
+        for (auto& mig : plan.migrations()) {
+            testlog.info("Rack list colocation migration: {}", mig);
+            BOOST_REQUIRE(mig.kind == locator::tablet_transition_kind::migration);
+            BOOST_REQUIRE(mig.src.host == host3 || mig.src.host == host4);
+            if (mig.src.host == host3) {
+                BOOST_REQUIRE(mig.tablet.tablet == A);
+                BOOST_REQUIRE(mig.dst.host == host5 || mig.dst.host == host6);
+            } else {
+                BOOST_REQUIRE(mig.tablet.tablet == B);
+                BOOST_REQUIRE(mig.dst.host == host1 || mig.dst.host == host2);
+            }
+        }
+    }).get();
+}
+
 // Throws if tablets have more than 1 replica in a given rack.
 // Run in seastar thread.
 void check_no_rack_overload(const token_metadata& tm) {
@@ -2035,6 +2167,63 @@ void check_rack_list(const locator::topology& topo, const tablet_map& tmap, sstr
    }).get();
 }

+SEASTAR_THREAD_TEST_CASE(test_rack_list_conversion_with_two_replicas_in_rack) {
+    do_with_cql_env_thread([] (auto& e) {
+        topology_builder topo(e);
+
+        unsigned shard_count = 1;
+        auto dc1 = topo.dc();
+        auto rack1 = topo.rack();
+        [[maybe_unused]] auto host1 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host2 = topo.add_node(node_state::normal, shard_count);
+        auto rack2 = topo.start_new_rack();
+        [[maybe_unused]] auto host3 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host4 = topo.add_node(node_state::normal, shard_count);
+        auto rack3 = topo.start_new_rack();
+        [[maybe_unused]] auto host5 = topo.add_node(node_state::normal, shard_count);
+        [[maybe_unused]] auto host6 = topo.add_node(node_state::normal, shard_count);
+
+        auto ks_name = add_keyspace(e, {{dc1, 2}}, 2);
+        auto table1 = add_table(e, ks_name).get();
+
+        tablet_id A{0}, B{0};
+        mutate_tablets(e, [&] (tablet_metadata& tmeta) -> future<> {
+            tablet_map tmap(2);
+            auto tid = tmap.first_tablet();
+            A = tid;
+            tmap.set_tablet(tid, tablet_info {
+                tablet_replica_set {
+                    tablet_replica{host1, 0},
+                    tablet_replica{host2, 0},
+                }
+            });
+            tid = *tmap.next_tablet(tid);
+            B = tid;
+            tmap.set_tablet(tid, tablet_info {
+                tablet_replica_set {
+                    tablet_replica{host5, 0},
+                    tablet_replica{host6, 0},
+                }
+            });
+            tmeta.set_tablet_map(table1, std::move(tmap));
+            co_return;
+        });
+
+        auto id = utils::UUID_gen::get_time_UUID();
+        // Build the map literal for CQL
+        auto rf_change_data_cql = format("{{'replication:class': 'NetworkTopologyStrategy', 'replication:{}:0': '{}', 'replication:{}:1': '{}'}}",
+            dc1, rack1.rack, dc1, rack2.rack);
+
+        e.execute_cql(format("INSERT INTO system.topology_requests (id, request_type, done, new_keyspace_rf_change_ks_name, new_keyspace_rf_change_data) VALUES ({}, 'keyspace_rf_change', False, '{}', {})",
+            id, ks_name, rf_change_data_cql)).get();
+        auto& stm = e.shared_token_metadata().local();
+        auto& topology = e.get_topology_state_machine().local()._topology;
+        topology.paused_rf_change_requests.insert(id);
+        rebalance_tablets(e);
+        check_rack_list(stm.get()->get_topology(), stm.get()->tablets().get_tablet_map(table1), dc1, {rack1.rack, rack2.rack});
+    }).get();
+}
+
 struct alter_result {
    tablet_map new_tablet_map;
    replication_strategy_config_options opts;
@@ -2940,14 +3129,14 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_shuffle_mode) {
    rebalance_tablets(e, &topo.get_shared_load_stats());

    auto& stm = e.shared_token_metadata().local();
-    BOOST_REQUIRE(e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get().empty());
+    BOOST_REQUIRE(e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get().empty());

    utils::get_local_injector().enable("tablet_allocator_shuffle");
    auto disable_injection = seastar::defer([&] {
        utils::get_local_injector().disable("tablet_allocator_shuffle");
    });

-    BOOST_REQUIRE(!e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get().empty());
+    BOOST_REQUIRE(!e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr,topo.get_load_stats()).get().empty());
  }).get();
 }
 #endif
@@ -3073,7 +3262,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) {
        });

        {
-            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get();
+            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get();
            BOOST_REQUIRE(!plan.empty());
        }

@@ -3084,7 +3273,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) {
        }).get();

        {
-            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get();
+            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get();
            BOOST_REQUIRE(plan.empty());
        }

@@ -3094,7 +3283,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) {
        }).get();

        {
-            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get();
+            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get();
            BOOST_REQUIRE(plan.empty());
        }

@@ -3105,7 +3294,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) {
        }).get();

        {
-            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get();
+            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get();
            BOOST_REQUIRE(!plan.empty());
        }

@@ -3115,7 +3304,7 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) {
        }).get();

        {
-            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get();
+            auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get();
            BOOST_REQUIRE(!plan.empty());
        }
  }).get();
@@ -3147,7 +3336,7 @@ SEASTAR_THREAD_TEST_CASE(test_drained_node_is_not_balanced_internally) {
            co_return;
        });

-        migration_plan plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), topo.get_load_stats()).get();
+        migration_plan plan = e.get_tablet_allocator().local().balance_tablets(stm.get(), nullptr, nullptr, topo.get_load_stats()).get();
        BOOST_REQUIRE(plan.has_nodes_to_drain());
        for (auto&& mig : plan.migrations()) {
            BOOST_REQUIRE(mig.kind != tablet_transition_kind::intranode_migration);
@@ -4751,7 +4940,7 @@ SEASTAR_THREAD_TEST_CASE(test_ensure_node_for_load_sketch) {

        auto& talloc = e.get_tablet_allocator().local();
        auto& stm = e.shared_token_metadata().local();
-        talloc.balance_tablets(stm.get(), topo.get_shared_load_stats().get()).get();
+        talloc.balance_tablets(stm.get(), nullptr, nullptr, topo.get_shared_load_stats().get()).get();
    }).get();
 }

--- a/test/cluster/auth_cluster/test_attach_service_level_to_user.py
+++ b/test/cluster/auth_cluster/test_attach_service_level_to_user.py
@@ -0,0 +1,73 @@
+#
+# Copyright (C) 2025-present ScyllaDB
+#
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+#
+
+import time
+import asyncio
+import logging
+import pytest
+from test.pylib.rest_client import read_barrier, get_host_api_address
+from test.pylib.util import unique_name, wait_for_cql_and_get_hosts
+from test.pylib.manager_client import ManagerClient
+from test.cluster.auth_cluster import extra_scylla_config_options as auth_config
+
+@pytest.mark.asyncio
+async def __test_attach_service_level_to_user(request, manager: ManagerClient, is_raft: bool):
+    user = f"test_user_{unique_name()}"
+
+    # Start nodes with correct topology
+    if is_raft:
+        servers = await manager.servers_add(3, config=auth_config)
+    else:
+        conf = {**auth_config, 'force_gossip_topology_changes': True, 'tablets_mode_for_new_keyspaces': 'disabled'}
+        servers = [await manager.server_add(config=conf) for _ in range(3)]
+
+    cql = manager.get_cql()
+    logging.info("Waiting until driver connects to every server")
+    hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
+    ips = [get_host_api_address(host) for host in hosts]
+
+    logging.info("Creating User")
+    await cql.run_async(f"CREATE ROLE {user} WITH login = true AND password='{user}' AND superuser = true")
+
+    connections = await cql.run_async(f"SELECT username, scheduling_group, shard_id FROM system.clients WHERE client_type='cql' AND username='{user}' ALLOW FILTERING")
+
+    verify_service_level = lambda sl : all([conn.scheduling_group == sl for conn in connections])
+    assert verify_service_level("default"), "All connections should be in default service level"
+
+    logging.info("Creating service levels")
+    sls = ["sl" + unique_name() for _ in range(2)]
+    for i, sl in enumerate(sls):
+        await cql.run_async(f"CREATE SERVICE LEVEL {sl} WITH shares = {100 * (i+1)}")
+
+    logging.info("Attach Service Levels to user")
+    for sl in sls:
+        await cql.run_async(f"ATTACH SERVICE LEVEL {sl} TO {user}")
+
+        #if we are not using raft we have to switch the tenant and wait for it to take effect
+        if not is_raft:
+            for ip in ips:
+                await manager.api.client.post('/service_levels/switch_tenants', host=ip)
+                # Switching tenants may be blocked if a connection is waiting for a request (see 'generic_server::connection::process_until_tenant_switch()').
+                # Execute enough cheap statements, so that connection on each shard will process at one statement and update its tenant.
+                for _ in range(100):
+                    read_barrier(manager.api, ip)
+
+        assert verify_service_level(sl), f"All connections should be in {sl} service level"
+        await cql.run_async(f"DETACH SERVICE LEVEL FROM {user}")
+
+    await cql.run_async(f"DROP ROLE {user}")
+    for sl in sls:
+        await cql.run_async(f"DROP SERVICE LEVEL {sl}")
+
+
+@pytest.mark.asyncio
+async def test_attach_service_level_with_raft(request, manager: ManagerClient):
+    await __test_attach_service_level_to_user(request, manager, is_raft=True)
+
+
+@pytest.mark.asyncio
+async def test_attach_service_level_with_gossip(request, manager: ManagerClient):
+    await __test_attach_service_level_to_user(request, manager, is_raft=False)
--- a/test/cluster/auth_cluster/test_raft_service_levels.py
+++ b/test/cluster/auth_cluster/test_raft_service_levels.py
@@ -604,18 +604,14 @@ async def test_driver_service_creation_failure(manager: ManagerClient) -> None:
        service_level_names = [sl.service_level for sl in service_levels]
        assert "driver" not in service_level_names

-def get_processed_tasks_for_group(metrics, group):
-    res = metrics.get("scylla_scheduler_tasks_processed", {'group': group})
-    if res is None:
-        return 0
-    return res
-
@pytest.mark.asyncio
 async def _verify_tasks_processed_metrics(manager, server, used_group, unused_group, func):
-    number_of_requests = 1000
+    number_of_requests = 3000

    def get_processed_tasks_for_group(metrics, group):
        res = metrics.get("scylla_scheduler_tasks_processed", {'group': group})
+        logger.info(f"group={group}, tasks_processed={res}")
+
        if res is None:
            return 0
        return res
@@ -627,8 +623,10 @@ async def _verify_tasks_processed_metrics(manager, server, used_group, unused_gr
    await asyncio.gather(*[asyncio.to_thread(func) for i in range(number_of_requests)])

    metrics = await manager.metrics.query(server.ip_addr)
-    assert get_processed_tasks_for_group(metrics, used_group) - initial_tasks_processed_by_used_group > number_of_requests
-    assert get_processed_tasks_for_group(metrics, unused_group) - initial_tasks_processed_by_unused_group < number_of_requests
+    tasks_processed_by_used_group = get_processed_tasks_for_group(metrics, used_group)
+    tasks_processed_by_unused_group = get_processed_tasks_for_group(metrics, unused_group)
+    assert tasks_processed_by_used_group - initial_tasks_processed_by_used_group > number_of_requests
+    assert tasks_processed_by_unused_group - initial_tasks_processed_by_unused_group < number_of_requests

@pytest.mark.asyncio
 async def test_driver_service_level_not_used_for_user_queries(manager: ManagerClient) -> None:
--- a/test/cluster/dtest/ccmlib/scylla_node.py
+++ b/test/cluster/dtest/ccmlib/scylla_node.py
@@ -52,6 +52,18 @@ KNOWN_LOG_LEVELS = {
    "OFF": "info",
 }

+# Captures the aggregate metric before the "[READ ..., WRITE ...]" block.
+STRESS_SUMMARY_PATTERN = re.compile(r'^\s*([\d\.\,]+\d?)\s*\[.*')
+
+# Extracts the READ metric number inside the "[READ ..., WRITE ...]" block.
+STRESS_READ_PATTERN = re.compile(r'.*READ:\s*([\d\.\,]+\d?)[^\d].*')
+
+# Extracts the WRITE metric number inside the "[READ ..., WRITE ...]" block.
+STRESS_WRITE_PATTERN = re.compile(r'.*WRITE:\s*([\d\.\,]+\d?)[^\d].*')
+
+# Splits a "key : value" line into key and value.
+STRESS_KEY_VALUE_PATTERN = re.compile(r'^\s*([^:]+)\s*:\s*(\S.*)\s*$')
+

 class NodeError(Exception):
    def __init__(self, msg: str, process: int | None = None):
@@ -528,6 +540,15 @@ class ScyllaNode:
        return self.cluster.manager.server_get_workdir(server_id=self.server_id)

    def stress(self, stress_options: list[str], **kwargs):
+        """
+        Run `cassandra-stress` against this node.
+        This method does not do any result parsing.
+
+        :param stress_options: List of options to pass to `cassandra-stress`.
+        :param kwargs: Additional arguments to pass to `subprocess.Popen()`.
+        :return: Named tuple with `stdout`, `stderr`, and `rc` (return code).
+        """
+
        cmd_args = ["cassandra-stress"] + stress_options

        if not any(opt in cmd_args for opt in ("-d", "-node", "-cloudconf")):
@@ -549,6 +570,73 @@ class ScyllaNode:
        except KeyboardInterrupt:
            pass

+
+    def _set_stress_val(self, key, val, res):
+        """
+        Normalize a stress result string and populate aggregate/read/write metrics.
+
+        Removes comma-thousands separators from numbers, converts to float,
+        stores the aggregate metric under `key`.
+        If the value contains a "[READ ..., WRITE ...]" block, also stores the
+        read and write metrics under `key:read` and `key:write`.
+
+        :param key: The metric name
+        :param val: The metric value string
+        :param res: The dictionary to populate
+        """
+
+        def parse_num(s):
+            return float(s.replace(',', ''))
+
+        if "[" in val:
+            p = STRESS_SUMMARY_PATTERN
+            m = p.match(val)
+            if m:
+                res[key] = parse_num(m.group(1))
+            p = STRESS_READ_PATTERN
+            m = p.match(val)
+            if m:
+                res[key + ":read"] = parse_num(m.group(1))
+            p = STRESS_WRITE_PATTERN
+            m = p.match(val)
+            if m:
+                res[key + ":write"] = parse_num(m.group(1))
+        else:
+            try:
+                res[key] = parse_num(val)
+            except ValueError:
+                res[key] = val
+        
+
+    def stress_object(self, stress_options=None, ignore_errors=None, **kwargs):
+        """
+        Run stress test and return results as a structured metrics dictionary.
+
+        Runs `stress()`, finds the `Results:` section in `stdout`, and then
+        processes each `key : value` line, putting it into a dictionary.
+
+        :param stress_options: List of stress options to pass to `stress()`.
+        :param ignore_errors: Deprecated (no effect).
+        :param kwargs: Additional arguments to pass to `stress()`.
+        :return: Dictionary of stress test results.
+        """
+        if ignore_errors:
+            self.warning("passing `ignore_errors` to stress_object() is deprecated")
+        ret = self.stress(stress_options, **kwargs)
+        p = STRESS_KEY_VALUE_PATTERN
+        res = {}
+        start = False
+        for line in (s.strip() for s in ret.stdout.splitlines()):
+            if start:
+                m = p.match(line)
+                if m:
+                    self._set_stress_val(m.group(1).strip().lower(), m.group(2).strip(), res)
+            else:
+                if line == 'Results:':
+                    start = True
+        return res
+
+
    def flush(self, ks: str | None = None, table: str | None = None, **kwargs) -> None:
        cmd = ["flush"]
        if ks:
--- a/test/cluster/dtest/schema_management_test.py
+++ b/test/cluster/dtest/schema_management_test.py
@@ -0,0 +1,690 @@
+#
+# Copyright (C) 2015-present The Apache Software Foundation
+# Copyright (C) 2025-present ScyllaDB
+#
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+#
+
+import functools
+import logging
+import string
+import threading
+import time
+from concurrent import futures
+from typing import NamedTuple
+
+import pytest
+from cassandra import AlreadyExists, ConsistencyLevel, InvalidRequest
+from cassandra.concurrent import execute_concurrent_with_args
+from cassandra.query import SimpleStatement, dict_factory
+from concurrent.futures import ThreadPoolExecutor
+
+from dtest_class import Tester, create_cf, create_ks, read_barrier
+from tools.assertions import assert_all, assert_invalid
+from tools.cluster_topology import generate_cluster_topology
+from tools.data import create_c1c2_table, insert_c1c2, query_c1c2, rows_to_list
+
+logger = logging.getLogger(__name__)
+
+class TestSchemaManagement(Tester):
+    def prepare(self, racks_num: int, has_config: bool = True):
+        cluster = self.cluster
+        cluster_topology = generate_cluster_topology(rack_num=racks_num)
+
+        if has_config:
+            config = {
+                "ring_delay_ms": 5000,
+            }
+            cluster.set_configuration_options(values=config)
+
+        cluster.populate(cluster_topology)
+        cluster.start(wait_other_notice=True)
+
+        return cluster
+
+
+    def test_prepared_statements_work_after_node_restart_after_altering_schema_without_changing_columns(self):
+        cluster = self.prepare(racks_num=3)
+
+        [node1, node2, node3] = cluster.nodelist()
+
+        session = self.patient_cql_connection(node1)
+
+        logger.debug("Creating schema...")
+        create_ks(session, "ks", 3)
+        session.execute(
+            """
+            CREATE TABLE users (
+                id int,
+                firstname text,
+                lastname text,
+                PRIMARY KEY (id)
+             );
+         """
+        )
+
+        insert_statement = session.prepare("INSERT INTO users (id, firstname, lastname) VALUES (?, 'A', 'B')")
+        insert_statement.consistency_level = ConsistencyLevel.ALL
+        session.execute(insert_statement, [0])
+
+        logger.debug("Altering schema")
+        session.execute("ALTER TABLE users WITH comment = 'updated'")
+
+        logger.debug("Restarting node2")
+        node2.stop(gently=True)
+        node2.start(wait_for_binary_proto=True)
+
+        logger.debug("Restarting node3")
+        node3.stop(gently=True)
+        node3.start(wait_for_binary_proto=True, wait_other_notice=True)
+
+        n_partitions = 20
+        for i in range(n_partitions):
+            session.execute(insert_statement, [i])
+
+        rows = session.execute("SELECT * FROM users")
+        res = sorted(rows)
+        assert len(res) == n_partitions
+        for i in range(n_partitions):
+            expected = [i, "A", "B"]
+            assert list(res[i]) == expected, f"Expected {expected}, got {res[i]}"
+
+    def test_dropping_keyspace_with_many_columns(self):
+        """
+        Exploits https://github.com/scylladb/scylla/issues/1484
+        """
+        cluster = self.prepare(racks_num=1, has_config=False)
+
+        node1 = cluster.nodelist()[0]
+        session = self.patient_cql_connection(node1)
+
+        session.execute("CREATE KEYSPACE testxyz WITH replication = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 }")
+        for i in range(8):
+            session.execute(f"CREATE TABLE testxyz.test_{i} (k int, c int, PRIMARY KEY (k),)")
+        session.execute("drop keyspace testxyz")
+
+        for node in cluster.nodelist():
+            s = self.patient_cql_connection(node)
+            s.execute("CREATE KEYSPACE testxyz WITH replication = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 }")
+            s.execute("drop keyspace testxyz")
+
+    def test_multiple_create_table_in_parallel(self):
+        """
+        Run multiple create table statements via different nodes
+        1. Create a cluster of 3 nodes
+        2. Run create table with different table names in parallel - check all complete
+        3. Run create table with the same table name in parallel - check if they complete
+        """
+        logger.debug("1. Create a cluster of 3 nodes")
+        nodes_count = 3
+        cluster = self.prepare(racks_num=nodes_count)
+        sessions = [self.patient_exclusive_cql_connection(node) for node in cluster.nodelist()]
+        ks = "ks"
+        create_ks(sessions[0], ks, nodes_count)
+
+        def create_table(session, table_name):
+            create_statement = f"CREATE TABLE {ks}.{table_name} (p int PRIMARY KEY, c0 text, c1 text, c2 text, c3 text, c4 text, c5 text, c6 text, c7 text, c8 text, c9 text);"
+            logger.debug(f"create_statement {create_statement}")
+            session.execute(create_statement)
+
+        logger.debug("2. Run create table with different table names in parallel - check all complete")
+        step2_tables = [f"t{i}" for i in range(nodes_count)]
+        with ThreadPoolExecutor(max_workers=nodes_count) as executor:
+            list(executor.map(create_table, sessions, step2_tables))
+
+        for table in step2_tables:
+            sessions[0].execute(SimpleStatement(f"INSERT INTO {ks}.{table} (p) VALUES (1)", consistency_level=ConsistencyLevel.ALL))
+            rows = sessions[0].execute(SimpleStatement(f"SELECT * FROM {ks}.{table}", consistency_level=ConsistencyLevel.ALL))
+            assert len(rows_to_list(rows)) == 1, f"Expected 1 row but got rows:{rows} instead"
+
+        logger.debug("3. Run create table with the same table name in parallel - check if they complete")
+        step3_table = "test"
+        step3_tables = [step3_table for i in range(nodes_count)]
+        with ThreadPoolExecutor(max_workers=nodes_count) as executor:
+            res_futures = [executor.submit(create_table, *args) for args in zip(sessions, step3_tables)]
+            for res_future in res_futures:
+                try:
+                    res_future.result()
+                except AlreadyExists as e:
+                    logger.info(f"expected cassandra.AlreadyExists error {e}")
+
+        sessions[0].execute(SimpleStatement(f"INSERT INTO {ks}.{step3_table} (p) VALUES (1)", consistency_level=ConsistencyLevel.ALL))
+        sessions[0].execute(f"SELECT * FROM {ks}.{step3_table}")
+        rows = sessions[0].execute(SimpleStatement(f"SELECT * FROM {ks}.{step3_table}", consistency_level=ConsistencyLevel.ALL))
+        assert len(rows_to_list(rows)) == 1, f"Expected 1 row but got rows:{rows} instead"
+
+    @pytest.mark.parametrize("case", ("write", "read", "mixed"))
+    def test_alter_table_in_parallel_to_read_and_write(self, case):
+        """
+        Create a table and write into while altering the table
+        1. Create a cluster of 3 nodes and populate a table
+        2. Run write/read/read_and_write" statement in a loop
+        3. Alter table while inserts are running
+        """
+        logger.debug("1. Create a cluster of 3 nodes and populate a table")
+        cluster = self.prepare(racks_num=3)
+        col_number = 20
+
+        [node1, node2, node3] = cluster.nodelist()
+        session = self.patient_exclusive_cql_connection(node1)
+
+        def run_stress(stress_type, col=col_number - 2):
+            node2.stress_object([stress_type, "n=10000", "cl=QUORUM", "-schema", "replication(factor=3)", "-col", f"n=FIXED({col})", "-rate", "threads=1"])
+
+        logger.debug("Populate")
+        run_stress("write", col_number)
+
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            logger.debug(f"2. Run {case} statement in a loop")
+            statement_future = executor.submit(functools.partial(run_stress, case))
+
+            logger.debug(f"let's {case} statement work some time")
+            time.sleep(2)
+
+            logger.debug("3. Alter table while inserts are running")
+            alter_statement = f'ALTER TABLE keyspace1.standard1 DROP ("C{col_number - 1}", "C{col_number - 2}")'
+            logger.debug(f"alter_statement {alter_statement}")
+            alter_result = session.execute(alter_statement)
+            logger.debug(alter_result.all())
+
+            logger.debug(f"wait till {case} statement finished")
+            statement_future.result()
+
+        rows = session.execute(SimpleStatement("SELECT * FROM keyspace1.standard1 LIMIT 1;", consistency_level=ConsistencyLevel.ALL))
+        assert len(rows_to_list(rows)[0]) == col_number - 1, f"Expected {col_number - 1} columns but got rows:{rows} instead"
+
+        logger.debug("read and check data")
+        run_stress("read")
+
+    @pytest.mark.skip("unimplemented")
+    def commitlog_replays_after_schema_change(self):
+        """
+        Commitlog can be replayed even though schema has been changed
+        1. Create a table and insert data
+        2. Alter table
+        3. Kill node
+        4. Boot node and verify that commitlog have been replayed and that all data is restored
+        """
+        raise NotImplementedError
+
+    @pytest.mark.parametrize("case", ("create_table", "alter_table", "drop_table"))
+    def test_update_schema_while_node_is_killed(self, case):
+        """
+        Check that a node that is killed durring a table creation/alter/drop is able to rejoin and to synch on schema
+        """
+
+        logger.debug("1. Create a cluster and insert data")
+        cluster = self.prepare(racks_num=3)
+
+        [node1, node2, node3] = cluster.nodelist()
+
+        session = self.patient_cql_connection(node1)
+
+        def create_table_case():
+            try:
+                logger.debug("Creating table")
+                create_c1c2_table(session)
+                logger.debug("Populating")
+                insert_c1c2(session, n=10)
+            except AlreadyExists:
+                # the CQL command can be called multiple time case of retries
+                pass
+
+        def alter_table_case():
+            try:
+                session.execute("ALTER TABLE ks.cf ADD (c3 text);", timeout=180)
+            except InvalidRequest as exc:
+                # the CQL command can be called multiple time case of retries
+                assert "Invalid column name c3" in str(exc)
+
+        def drop_table_case():
+            try:
+                session.execute("DROP TABLE cf;", timeout=180)
+            except InvalidRequest as exc:
+                # the CQL command can be called multiple time case of retries
+                assert "Cannot drop non existing table" in str(exc)
+
+        logger.debug("Creating keyspace")
+        create_ks(session, "ks", 3)
+        if case != "create_table":
+            create_table_case()
+
+        case_map = {
+            "create_table": create_table_case,
+            "alter_table": alter_table_case,
+            "drop_table": drop_table_case,
+        }
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            logger.debug(f"2. kill node during {case}")
+            kill_node_future = executor.submit(node2.stop, gently=False, wait_other_notice=True)
+            case_map[case]()
+            kill_node_future.result()
+
+        logger.debug("3. Start the stopped node2")
+        node2.start(wait_for_binary_proto=True)
+
+        session = self.patient_exclusive_cql_connection(node2)
+        read_barrier(session)
+
+        def create_or_alter_table_expected_result(col_mun):
+            rows = session.execute(SimpleStatement("SELECT * FROM ks.cf LIMIT 1;", consistency_level=ConsistencyLevel.QUORUM))
+            assert len(rows_to_list(rows)[0]) == col_mun, f"Expected {col_mun} columns but got rows:{rows} instead"
+            for key in range(10):
+                query_c1c2(session=session, key=key, consistency=ConsistencyLevel.QUORUM)
+
+        expected_case_result_map = {
+            "create_table": functools.partial(create_or_alter_table_expected_result, 3),
+            "alter_table": functools.partial(create_or_alter_table_expected_result, 4),
+            "drop_table": functools.partial(assert_invalid, session, "SELECT * FROM test1"),
+        }
+        logger.debug("verify that commitlog has been replayed and that all data is restored")
+        expected_case_result_map[case]()
+
+    @pytest.mark.parametrize("is_gently_stop", [True, False])
+    def test_nodes_rejoining_a_cluster_synch_on_schema(self, is_gently_stop):
+        """
+        Nodes rejoining the cluster synch on schema changes
+        1. Create a cluster and insert data
+        2. Stop a node
+        3. Alter table
+        4. Insert additional data
+        5. Start the stopped node
+        6. Verify the stopped node synchs on the updated schema
+        """
+
+        logger.debug("1. Create a cluster and insert data")
+        cluster = self.prepare(racks_num=3)
+
+        [node1, node2, node3] = cluster.nodelist()
+
+        session = self.patient_cql_connection(node1)
+
+        logger.debug("Creating schema")
+        create_ks(session, "ks", 3)
+        create_c1c2_table(session)
+        create_cf(session, "cf", key_name="p", key_type="int", columns={"v": "text"})
+
+        logger.debug("Populating")
+        insert_c1c2(session, n=10, consistency=ConsistencyLevel.ALL)
+
+        logger.debug("2 Stop a node1")
+        node1.stop(gently=is_gently_stop, wait_other_notice=True)
+
+        logger.debug("3 Alter table")
+        session = self.patient_cql_connection(node2)
+        session.execute("ALTER TABLE ks.cf ADD (c3 text);", timeout=180)
+
+        logger.debug("4 Insert additional data")
+        session.execute(SimpleStatement("INSERT INTO ks.cf (key, c1, c2, c3) VALUES ('test', 'test', 'test', 'test')", consistency_level=ConsistencyLevel.QUORUM))
+
+        logger.debug("5. Start the stopped node1")
+        node1.start(wait_for_binary_proto=True)
+
+        logger.debug("6. Verify the stopped node synchs on the updated schema")
+        session = self.patient_exclusive_cql_connection(node1)
+        read_barrier(session)
+
+        rows = session.execute(SimpleStatement("SELECT * FROM ks.cf WHERE key='test'", consistency_level=ConsistencyLevel.ALL))
+        expected = [["test", "test", "test", "test"]]
+        assert rows_to_list(rows) == expected, f"Expected {expected} but got {rows} instead"
+        for key in range(10):
+            query_c1c2(session=session, key=key, consistency=ConsistencyLevel.ALL)
+
+    def test_reads_schema_recreated_while_node_down(self):
+        cluster = self.prepare(racks_num=3)
+
+        [node1, node2, node3] = cluster.nodelist()
+
+        session = self.patient_cql_connection(node1)
+
+        logger.debug("Creating schema")
+        create_ks(session, "ks", 3)
+        session.execute("CREATE TABLE cf (p int PRIMARY KEY, v text);")
+
+        logger.debug("Populating")
+        session.execute(SimpleStatement("INSERT INTO cf (p, v) VALUES (1, '1')", consistency_level=ConsistencyLevel.ALL))
+
+        logger.debug("Stopping node2")
+        node2.stop(gently=True)
+
+        logger.debug("Re-creating schema")
+        session.execute("DROP TABLE cf;")
+        session.execute("CREATE TABLE cf (p int PRIMARY KEY, v1 bigint, v2 text);")
+
+        logger.debug("Restarting node2")
+        node2.start(wait_for_binary_proto=True)
+        session2 = self.patient_cql_connection(node2)
+        read_barrier(session2)
+
+        rows = session.execute(SimpleStatement("SELECT * FROM cf", consistency_level=ConsistencyLevel.ALL))
+        assert rows_to_list(rows) == [], f"Expected an empty result set, got {rows}"
+
+    def test_writes_schema_recreated_while_node_down(self):
+        cluster = self.prepare(racks_num=3)
+
+        [node1, node2, node3] = cluster.nodelist()
+
+        session = self.patient_cql_connection(node1)
+
+        logger.debug("Creating schema")
+        create_ks(session, "ks", 3)
+        session.execute("CREATE TABLE cf (p int PRIMARY KEY, v text);")
+
+        logger.debug("Populating")
+        session.execute(SimpleStatement("INSERT INTO cf (p, v) VALUES (1, '1')", consistency_level=ConsistencyLevel.ALL))
+
+        logger.debug("Stopping node2")
+        node2.stop(gently=True, wait_other_notice=True)
+
+        logger.debug("Re-creating schema")
+        session.execute("DROP TABLE cf;")
+        session.execute("CREATE TABLE cf (p int PRIMARY KEY, v text);")
+
+        logger.debug("Restarting node2")
+        node2.start(wait_for_binary_proto=True)
+        session2 = self.patient_cql_connection(node2)
+        read_barrier(session2)
+
+        session.execute(SimpleStatement("INSERT INTO cf (p, v) VALUES (2, '2')", consistency_level=ConsistencyLevel.ALL))
+
+        rows = session.execute(SimpleStatement("SELECT * FROM cf", consistency_level=ConsistencyLevel.ALL))
+        expected = [[2, "2"]]
+        assert rows_to_list(rows) == expected, f"Expected {expected}, got {rows_to_list(rows)}"
+
+
+class TestLargePartitionAlterSchema(Tester):
+    # Issue scylladb/scylla: #5135:
+    #
+    # Issue: Cache reads may miss some writes if schema alter followed by a read happened concurrently with preempted
+    # partition entry update
+    # Affects only tables with multi-row partitions, which are the only ones that can experience the update of partition
+    # entry being preempted.
+    #
+    # The scenario in which the problem could have happened has to involve:
+    # - a large partition with many rows, large enough for preemption (every 0.5ms) to happen during the scan of the partition.
+    # - appending writes to the partition (not overwrites)
+    # - scans of the partition
+    # - schema alter of that table. The issue is exposed only by adding or dropping a column, such that the added/dropped
+    #   column lands in the middle (in alphabetical order) of the old column set.
+    #
+    # Memtable flush has to happen after a schema alter concurrently with a read.
+    #
+    # The bug could result in cache corruption which manifests as some past writes being missing (not visible to reads).
+
+    PARTITIONS = 50
+    STRING_VALUE = string.ascii_lowercase
+
+    def prepare(self, cluster_topology: dict[str, dict[str, int]], rf: int):
+        if not self.cluster.nodelist():
+            self.cluster.populate(cluster_topology)
+            self.cluster.start(wait_other_notice=True)
+
+        node1 = self.cluster.nodelist()[0]
+        session = self.patient_cql_connection(node=node1)
+        self.create_schema(session=session, rf=rf)
+
+        return session
+
+    def create_schema(self, session, rf):
+        logger.debug("Creating schema")
+        create_ks(session=session, name="ks", rf=rf)
+
+        session.execute(
+            """
+            CREATE TABLE lp_table (
+                pk int,
+                ck1 int,
+                val1 text,
+                val2 text,
+                PRIMARY KEY (pk, ck1)
+            );
+        """
+        )
+
+    def populate(self, session, data, ck_start, ck_end=None, stop_populating: threading.Event = None):
+        ck = ck_start
+        def _populate_loop():
+            nonlocal ck
+            while True:
+                if stop_populating is not None and stop_populating.is_set():
+                    return
+                if ck_end is not None and ck >= ck_end:
+                    return
+                for pk in range(self.PARTITIONS):
+                    row = [pk, ck, self.STRING_VALUE, self.STRING_VALUE]
+                    data.append(row)
+                    yield tuple(row)
+                ck += 1
+
+        records_written = ck - ck_start
+
+        logger.debug(f"Start populate DB: {self.PARTITIONS} partitions with {ck_end - ck_start if ck_end else 'infinite'} records in each partition")
+
+        parameters = _populate_loop()
+
+        stmt = session.prepare("INSERT INTO lp_table (pk, ck1, val1, val2) VALUES (?, ?, ?, ?)")
+
+        execute_concurrent_with_args(session=session, statement=stmt, parameters=parameters, concurrency=100)
+        logger.debug(f"Finish populate DB: {self.PARTITIONS} partitions with {records_written} records in each partition")
+        return data
+
+    def read(self, session, ck_max, stop_reading: threading.Event = None):
+        def _read_loop():
+            while True:
+                for ck in range(ck_max):
+                    for pk in range(self.PARTITIONS):
+                        if stop_reading is not None and stop_reading.is_set():
+                            return
+                        session.execute(f"select * from lp_table where pk = {pk} and ck1 = {ck}")
+                if stop_reading is None:
+                    return
+
+        logger.debug(f"Start reading..")
+        _read_loop()
+        logger.debug(f"Finish reading..")
+
+    def add_column(self, session, column_name, column_type):
+        logger.debug(f"Add {column_name} column")
+        session.execute(f"ALTER TABLE lp_table ADD {column_name} {column_type}")
+
+    def drop_column(self, session, column_name):
+        logger.debug(f"Drop {column_name} column")
+        session.execute(f"ALTER TABLE lp_table DROP {column_name}")
+
+    def test_large_partition_with_add_column(self):
+        cluster_topology = generate_cluster_topology()
+        session = self.prepare(cluster_topology, rf=1)
+        data = self.populate(session=session, data=[], ck_start=0, ck_end=10)
+
+        threads = []
+        timeout = 300
+        ck_end = 5000
+        if self.cluster.scylla_mode == "debug":
+            timeout = 900
+            ck_end = 500
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            stop_populating = threading.Event()
+            stop_reading = threading.Event()
+            # Insert new rows in background
+            threads.append(executor.submit(self.populate, session=session, data=data, ck_start=10, ck_end=None, stop_populating=stop_populating))
+            threads.append(executor.submit(self.read, session=session, ck_max=ck_end, stop_reading=stop_reading))
+            # Wait for running load
+            time.sleep(10)
+            self.add_column(session, "new_clmn", "int")
+
+            # Memtable flush has to happen after a schema alter concurrently with a read
+            logger.debug("Flush data")
+            self.cluster.nodelist()[0].flush()
+
+            # Stop populating and reading soon after flush
+            time.sleep(1)
+            logger.debug("Stop populating and reading")
+            stop_populating.set()
+            stop_reading.set()
+
+            for future in futures.as_completed(threads, timeout=timeout):
+                try:
+                    future.result()
+                except Exception as exc:  # noqa: BLE001
+                    pytest.fail(f"Generated an exception: {exc}")
+
+        # Add 'null' values for the new column `new_clmn` in the expected data
+        for i, _ in enumerate(data):
+            data[i].append(None)
+
+        assert_all(session, f"select pk, ck1, val1, val2, new_clmn from lp_table", data, ignore_order=True, print_result_on_failure=False)
+
+    def test_large_partition_with_drop_column(self):
+        cluster_topology = generate_cluster_topology()
+        session = self.prepare(cluster_topology, rf=1)
+        data = self.populate(session=session, data=[], ck_start=0, ck_end=10)
+
+        threads = []
+        timeout = 300
+        ck_end = 5000
+        if self.cluster.scylla_mode == "debug":
+            timeout = 900
+            ck_end = 500
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            stop_populating = threading.Event()
+            stop_reading = threading.Event()
+            # Insert new rows in background
+            threads.append(executor.submit(self.populate, session=session, data=data, ck_start=10, ck_end=None, stop_populating=stop_populating))
+            threads.append(executor.submit(self.read, session=session, ck_max=ck_end, stop_reading=stop_reading))
+            # Wait for running load
+            time.sleep(10)
+            self.drop_column(session=session, column_name="val1")
+
+            # Memtable flush has to happen after a schema alter concurrently with a read
+            logger.debug("Flush data")
+            self.cluster.nodelist()[0].flush()
+
+            # Stop populating and reading soon after flush
+            time.sleep(1)
+            logger.debug("Stop populating and reading")
+            stop_populating.set()
+            stop_reading.set()
+
+            result = []
+            for future in futures.as_completed(threads, timeout=timeout):
+                try:
+                    result.append(future.result())
+                except Exception as exc:  # noqa: BLE001
+                    # "Unknown identifier val1" is expected error
+                    if not len(exc.args) or "Unknown identifier val1" not in exc.args[0]:
+                        pytest.fail(f"Generated an exception: {exc}")
+
+
+class HistoryVerifier:
+    def __init__(self, table_name="table1", keyspace_name="lwt_load_ks"):
+        """
+        Initialize parameters for further verification of schema history.
+        :param table_name: table thats we change it's schema and verify schema history accordingly.
+        """
+
+        self.table_name = table_name
+        self.keyspace_name = keyspace_name
+        self.versions = []
+        self.versions_dict = {}
+        self.query = ""
+
+    def verify(self, session, expected_current_diff, expected_prev_diff, query):
+        """
+        Verify current schema history entry by comparing to previous schema entry.
+        :param session: python cql session
+        :param expected_current_diff: difference of current schema from previous schema
+        :param expected_prev_diff: difference of previous schema from current schema
+        :param query: The query that created new schema
+        """
+
+        def get_table_id(session, keyspace_name, table_name):
+            assert keyspace_name, f"Input kesyspcase should have value, keyspace_name={keyspace_name}"
+            assert table_name, f"Input table_name should have value, table_name={table_name}"
+            query = "select keyspace_name,table_name,id from system_schema.tables"
+            query += f" WHERE keyspace_name='{keyspace_name}' AND table_name='{table_name}'"
+            current_rows = session.execute(query).current_rows
+            assert len(current_rows) == 1, f"Not found table description, ks={keyspace_name} table_name={table_name}"
+            res = current_rows[0]
+            return res["id"]
+
+        def read_schema_history_table(session, cf_id):
+            """
+            read system.scylla_table_schema_history and verify current version diff from previous vesion
+            :param session: python cql session
+            :param cf_id: uuid of the table we changed it's schema
+            """
+
+            query = f"select * from system.scylla_table_schema_history WHERE cf_id={cf_id}"
+            res = session.execute(query).current_rows
+            new_versions = list({
+                entry["schema_version"]
+                for entry in res
+                if str(entry["schema_version"]) not in self.versions
+            })
+            msg = f"Expect 1, got len(new_versions)={len(new_versions)}"
+            assert len(new_versions) == 1, msg
+            current_version = str(new_versions[0])
+            logger.debug(f"New schema_version {current_version} after executing '{self.query}'")
+            columns_list = (
+                {"column_name": entry["column_name"], "type": entry["type"]}
+                for entry in res
+                if entry["kind"] == "regular" and current_version == str(entry["schema_version"])
+            )
+            self.versions_dict[current_version] = {}
+            for item in columns_list:
+                self.versions_dict[current_version][item["column_name"]] = item["type"]
+
+            self.versions.append(current_version)
+            if len(self.versions) > 1:
+                current_id = self.versions[-1]
+                previous_id = self.versions[-2]
+                set_current = set(self.versions_dict[current_id].items())
+                set_previous = set(self.versions_dict[previous_id].items())
+                current_diff = set_current - set_previous
+                previous_diff = set_previous - set_current
+                msg1 = f"Expect diff(new schema,old schema) to be {expected_current_diff} got {current_diff}"
+                msg2 = f" query is '{self.query}' versions={current_id},{previous_id}"
+                if current_diff != expected_current_diff:
+                    logger.debug(msg1 + msg2)
+                assert current_diff == expected_current_diff, msg1 + msg2
+                msg1 = f"Expect diff(old schema,new schema) to be {expected_prev_diff} got {previous_diff}"
+                assert previous_diff == expected_prev_diff, msg1 + msg2
+
+        self.query = query
+        cf_id = get_table_id(session, keyspace_name=self.keyspace_name, table_name=self.table_name)
+        read_schema_history_table(session, cf_id)
+
+
+class DDL(NamedTuple):
+    ddl_command: str
+    expected_current_diff: set | None
+    expected_prev_diff: set | None
+
+
+class TestSchemaHistory(Tester):
+    def prepare(self):
+        cluster = self.cluster
+        # in case support tablets and rf-rack-valid-keyspaces
+        # create cluster with 3 racks with 1 node in each rack
+        cluster_topology = generate_cluster_topology(rack_num=3)
+        rf = 3
+        cluster.populate(cluster_topology).start(wait_other_notice=True)
+        self.session = self.patient_cql_connection(self.cluster.nodelist()[0], row_factory=dict_factory)
+        create_ks(self.session, "lwt_load_ks", rf)
+
+    def test_schema_history_alter_table(self):
+        """test schema history changes following alter table cql commands"""
+        self.prepare()
+        verifier = HistoryVerifier(table_name="table2")
+        queries_and_expected_diffs = [
+            DDL(ddl_command="CREATE TABLE IF NOT EXISTS lwt_load_ks.table2 (pk int PRIMARY KEY, v int, int_col int)", expected_current_diff=None, expected_prev_diff=None),
+            DDL(ddl_command="ALTER TABLE lwt_load_ks.table2 ALTER v TYPE varint", expected_current_diff={("v", "varint")}, expected_prev_diff={("v", "int")}),
+            DDL(ddl_command="ALTER TABLE lwt_load_ks.table2 ADD (v2 int, v3 int)", expected_current_diff={("v2", "int"), ("v3", "int")}, expected_prev_diff=set()),
+            DDL(ddl_command="ALTER TABLE lwt_load_ks.table2 ALTER int_col TYPE varint", expected_current_diff={("int_col", "varint")}, expected_prev_diff={("int_col", "int")}),
+            DDL(ddl_command="ALTER TABLE lwt_load_ks.table2 DROP int_col", expected_current_diff=set(), expected_prev_diff={("int_col", "varint")}),
+            DDL(ddl_command="ALTER TABLE lwt_load_ks.table2 ADD int_col bigint", expected_current_diff={("int_col", "bigint")}, expected_prev_diff=set()),
+            DDL(ddl_command="ALTER TABLE lwt_load_ks.table2 DROP (int_col,v)", expected_current_diff=set(), expected_prev_diff={("int_col", "bigint"), ("v", "varint")}),
+        ]
+        for ddl in queries_and_expected_diffs:
+            self.session.execute(ddl.ddl_command)
+            verifier.verify(self.session, ddl.expected_current_diff, ddl.expected_prev_diff, query=ddl.ddl_command)
--- a/test/cluster/dtest/tools/assertions.py
+++ b/test/cluster/dtest/tools/assertions.py
@@ -218,6 +218,18 @@ def assert_row_count_in_select_less(
    assert count < max_rows_expected, f'Expected a row count < of {max_rows_expected} in query "{query}", but got {count}'


+def assert_length_equal(object_with_length, expected_length):
+    """
+    Assert an object has a specific length.
+    @param object_with_length The object whose length will be checked
+    @param expected_length The expected length of the object
+
+    Examples:
+    assert_length_equal(res, nb_counter)
+    """
+    assert len(object_with_length) == expected_length, f"Expected {object_with_length} to have length {expected_length}, but instead is of length {len(object_with_length)}"
+
+
 def assert_lists_equal_ignoring_order(list1, list2, sort_key=None):
    """
    asserts that the contents of the two provided lists are equal
--- a/test/cluster/dtest/tools/data.py
+++ b/test/cluster/dtest/tools/data.py
@@ -14,6 +14,7 @@ from cassandra.query import SimpleStatement
 from cassandra.concurrent import execute_concurrent_with_args

 from test.cluster.dtest.dtest_class import create_cf
+from test.cluster.dtest.tools import assertions


 logger = logging.getLogger(__name__)
@@ -51,6 +52,27 @@ def insert_c1c2(  # noqa: PLR0913
        execute_concurrent_with_args(session, statement, [[f"k{k}"] for k in keys], concurrency=concurrency)


+def query_c1c2(  # noqa: PLR0913
+    session,
+    key,
+    consistency=ConsistencyLevel.QUORUM,
+    tolerate_missing=False,
+    must_be_missing=False,
+    c1_value="value1",
+    c2_value="value2",
+    ks="ks",
+    cf="cf",
+):
+    query = SimpleStatement(f"SELECT c1, c2 FROM {ks}.{cf} WHERE key='k{key}'", consistency_level=consistency)
+    rows = list(session.execute(query))
+    if not tolerate_missing and not must_be_missing:
+        assertions.assert_length_equal(rows, 1)
+        res = rows[0]
+        assert len(res) == 2 and res[0] == c1_value and res[1] == c2_value, res
+    if must_be_missing:
+        assertions.assert_length_equal(rows, 0)
+
+
 def rows_to_list(rows):
    new_list = [list(row) for row in rows]
    return new_list
--- a/test/cluster/random_failures/test_random_failures.py
+++ b/test/cluster/random_failures/test_random_failures.py
@@ -181,11 +181,14 @@ async def test_random_failures(manager: ManagerClient,
            LOGGER.info("Found following message in the coordinator's log:\n\t%s", matches[-1][0])
            await manager.server_stop(server_id=s_info.server_id)

+    BANNED_NOTIFICATION = "received notification of being banned from the cluster from"
+    STARTUP_FAILED_PATTERN = f"init - Startup failed:|{BANNED_NOTIFICATION}"
+
    if s_info in await manager.running_servers():
        LOGGER.info("Wait until the new node initialization completes or fails.")
-        await server_log.wait_for("init - (Startup failed:|Scylla version .* initialization completed)", timeout=120)
+        await server_log.wait_for(f"init - (Startup failed:|Scylla version .* initialization completed)|{BANNED_NOTIFICATION}", timeout=120)

-        if await server_log.grep("init - Startup failed:"):
+        if await server_log.grep(STARTUP_FAILED_PATTERN):
            LOGGER.info("Check that the new node is dead.")
            expected_statuses = [psutil.STATUS_DEAD]
        else:
@@ -216,7 +219,7 @@ async def test_random_failures(manager: ManagerClient,
    else:
        if s_info in await manager.running_servers():
            LOGGER.info("The new node is dead.  Check if it failed to startup.")
-            assert await server_log.grep("init - Startup failed:")
+            assert await server_log.grep(STARTUP_FAILED_PATTERN)
            await manager.server_stop(server_id=s_info.server_id)  # remove the node from the list of running servers

        LOGGER.info("Try to remove the dead new node from the cluster.")
--- a/test/cluster/suite.yaml
+++ b/test/cluster/suite.yaml
@@ -26,6 +26,7 @@ skip_in_release:
  - test_raft_cluster_features
  - test_cluster_features
  - dtest/limits_test
+  - dtest/schema_management_test
 skip_in_debug:
  - test_shutdown_hang
  - test_replace
--- a/test/cluster/test_client_routes.py
+++ b/test/cluster/test_client_routes.py
@@ -0,0 +1,294 @@
+# Copyright (C) 2025-present ScyllaDB
+#
+# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+import asyncio
+import pytest
+import logging
+import time
+import uuid
+
+from test.cluster.conftest import skip_mode
+from test.pylib.manager_client import ManagerClient
+from test.pylib.rest_client import HTTPError
+from test.pylib.util import wait_for
+from test.cluster.util import trigger_snapshot
+
+from cassandra.protocol import EventMessage
+import cassandra.protocol
+
+logger = logging.getLogger(__name__)
+CLIENT_ROUTES_CHANGE_EVENT_NAME = "CLIENT_ROUTES_CHANGE"
+
+async def wait_for_expected_client_routes_size(cql, expected_routes_size):
+    async def expected_client_routes_size(cql, expected_size):
+        client_routes = await cql.run_async("SELECT * FROM system.client_routes")
+        logger.info(f"Got client routes, expected_size={expected_size}, res={client_routes}")
+        if len(client_routes) == expected_size:
+            return client_routes
+        return None
+    await wait_for(lambda: expected_client_routes_size(cql, expected_routes_size), time.time() + 10)
+
+def generate_connection_id(i):
+    # Make the string longer than 30 characters to make sure that in C++ the string has a heap allocation
+    return f"connection_id_{i}_" + "abc" * 10
+
+def generate_host_id(i):
+    return str(uuid.UUID(int=(i + 100)))
+
+def generate_client_routes_entry(i):
+    return {
+        "connection_id": generate_connection_id(i),
+        "host_id": generate_host_id(i),
+        "address": "addr1.test",
+        "port": 8001,
+        "tls_port": 8002,
+        "alternator_port": 8003,
+        "alternator_https_port": 8004
+    }
+
+@pytest.mark.asyncio
+async def test_client_routes(request, manager: ManagerClient):
+    num_servers = 3
+    cql = None
+    # Run three nodes one by one
+    for i in range(num_servers):
+        # SMP=2 to verify that requests work properly even when a shard other than 0 receives them
+        servers = await manager.servers_add(1, cmdline=['--smp=2'])
+        cql, hosts = await manager.get_ready_cql(await manager.running_servers())
+        await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(i)])
+        await wait_for_expected_client_routes_size(cql, i+1)
+
+
+    # Remove one node
+    running_servers = await manager.running_servers()
+    server_to_stop = running_servers[0]
+    running_server = running_servers[1]
+    await manager.server_stop(server_to_stop.server_id)
+    await manager.remove_node(running_server.server_id, server_to_stop.server_id)
+    await wait_for_expected_client_routes_size(cql, num_servers)
+
+    # Verify everything works
+    await manager.api.client.post("/v2/client-routes", host=running_server.ip_addr, json=[generate_client_routes_entry(num_servers + 1)])
+    await wait_for_expected_client_routes_size(cql, num_servers + 1)
+    await manager.api.client.delete("/v2/client-routes", host=running_server.ip_addr, json=[generate_client_routes_entry(0)])
+    await wait_for_expected_client_routes_size(cql, num_servers)
+
+@pytest.mark.asyncio
+async def test_client_routes_node_restart(request, manager: ManagerClient):
+    """
+    This test verifies that a node receives updates if client routes were updated
+    when the node was down.
+    """
+    servers = await manager.servers_add(3)
+    cql, hosts = await manager.get_ready_cql(servers)
+    server_to_restart = servers[2]
+
+    await manager.server_stop(server_to_restart.server_id)
+    await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(1)])
+    await wait_for_expected_client_routes_size(cql, 1)
+
+    await manager.server_start(server_to_restart.server_id)
+    cql = await manager.get_cql_exclusive(server_to_restart)
+    await wait_for_expected_client_routes_size(cql, 1)
+
+@pytest.mark.asyncio
+@skip_mode('release', 'error injections are not supported in release mode')
+async def test_client_routes_upgrade(request, manager: ManagerClient):
+    """
+    This test verifies updating the system to a version with the CLIENT_ROUTES feature in the following steps:
+      1. Create 2 nodes with the CLIENT_ROUTES feature disabled.
+      2. Verify `/v2/client-routes` rejects requests.
+      3. Enable the `CLIENT_ROUTES` feature after restart.
+      4. Verify `/v2/client-routes` works.
+    """
+    num_servers = 2
+    config = [
+        {"name": "suppress_features", "value": "CLIENT_ROUTES"}
+    ]
+    servers = await manager.servers_add(num_servers, config={'error_injections_at_startup': config})
+    cql, hosts = await manager.get_ready_cql(servers)
+    # Empty `system.client_routes` is there even if the feature is disabled.
+    wait_for_expected_client_routes_size(cql, 0)
+
+    with pytest.raises(HTTPError) as exc:
+        await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)])
+    with pytest.raises(HTTPError) as exc:
+        await manager.api.client.delete("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)])
+    with pytest.raises(HTTPError) as exc:
+        await manager.api.client.get("/v2/client-routes", host=servers[0].ip_addr)
+
+    for server in servers:
+        await manager.server_update_config(server.server_id, "error_injections_at_startup", [])
+        await manager.server_restart(server.server_id)
+
+    async def client_routes_ready():
+        try:
+            await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)])
+            await manager.api.client.delete("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)])
+            await manager.api.client.get("/v2/client-routes", host=servers[0].ip_addr)
+            return True
+        except HTTPError as exc:
+            # Allow cluster to be not ready
+            if "requires all nodes to support the CLIENT_ROUTES cluster feature" not in exc.message:
+                raise exc
+        return None
+
+    wait_for(client_routes_ready, time.time() + 10)
+
+
+@pytest.mark.asyncio
+async def test_client_routes_lost_quorum(request, manager: ManagerClient):
+    """
+    This test verifies that `/v2/client-routes` fails with a timeout if the Raft quorum cannot be reached.
+    """
+    num_servers = 3
+    timeout = 10
+    config = {'group0_raft_op_timeout_in_ms': timeout * 1000}
+    servers = await manager.servers_add(num_servers, config=config)
+    cql, hosts = await manager.get_ready_cql(servers)
+
+    await wait_for_expected_client_routes_size(cql, 0)
+    await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)], timeout=timeout + 60)
+    await wait_for_expected_client_routes_size(cql, 1)
+
+    for server in servers[1:]:
+        await manager.server_stop(server.server_id)
+
+    async def fail_req(f):
+        with pytest.raises(HTTPError) as exc:
+            await f("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)], timeout=timeout + 60)
+        assert "raft operation [read_barrier] timed out, there is no raft quorum" in exc.value.message
+
+    await asyncio.gather(fail_req(manager.api.client.post), fail_req(manager.api.client.delete))
+    await wait_for_expected_client_routes_size(cql, 1)
+
+def setup_events_test(cql, received_events, monkeypatch):
+    # scylla-driver >= 3.29.6  supports CLIENT_ROUTES_CHANGE events.
+    # For older python driver, monkeypatching is necessary
+    if CLIENT_ROUTES_CHANGE_EVENT_NAME not in cassandra.protocol.known_event_types:
+        def _recv_client_routes_change(f, arg):
+            logger.info(f"monkeypatch_driver recv_client_routes_change, f={f} arg={arg}")
+            change_type = cassandra.protocol.read_string(f)
+            connection_ids = [cassandra.protocol.read_string(f) for _ in range(cassandra.protocol.read_short(f))]
+            host_ids = [cassandra.protocol.read_string(f) for _ in range(cassandra.protocol.read_short(f))]
+            return {
+                "change_type": change_type,
+                "connection_ids": connection_ids,
+                "host_ids": host_ids
+            }
+        monkeypatch.setattr(cassandra.protocol, "known_event_types", cassandra.protocol.known_event_types.union([CLIENT_ROUTES_CHANGE_EVENT_NAME]), raising=True)
+        monkeypatch.setattr(EventMessage, "recv_client_routes_change", _recv_client_routes_change, raising=False)
+
+    def on_event(event):
+        logger.info(f"Received an event: {event}")
+        if len(received_events) > 0 and received_events[-1] == event:
+            logger.info(f"The received event is a duplicate: {event}")
+        else:
+            received_events.append(event)
+
+    cql.cluster.control_connection._connection.register_watchers({CLIENT_ROUTES_CHANGE_EVENT_NAME: on_event})
+
+async def wait_for_expected_event_num(expected_num, received_events):
+    async def expected_event_num(num):
+        logger.info(f"Checking if number of events is equal expected_num={expected_num}, events={received_events}")
+        if len(received_events) == num:
+            return num
+        return None
+    await wait_for(lambda: expected_event_num(expected_num), time.time() + 10)
+
+@pytest.mark.asyncio
+async def test_events(request, manager: ManagerClient, monkeypatch):
+    """
+    This test verifies client routes change events in the following steps:
+      1. Add one new entry to client_routes.
+      2. Verify that the driver received one new event.
+      3. Add two new entries to client_routes using one POST request.
+      4. Verify that the driver received one new event with two updates.
+      5. Delete an entry, and verify that the driver received the event.
+    """
+
+    servers = await manager.servers_add(2, cmdline=['--smp=2'])
+    cql, hosts = await manager.get_ready_cql(servers)
+
+    received_events = []
+    setup_events_test(cql, received_events, monkeypatch)
+
+    await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)])
+
+    await wait_for_expected_event_num(1, received_events)
+    assert received_events[0]["change_type"] == "UPDATE_NODES"
+    assert received_events[0]["connection_ids"] == [generate_connection_id(0)]
+    assert received_events[0]["host_ids"] == [generate_host_id(0)]
+
+    await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[
+        generate_client_routes_entry(1),
+        generate_client_routes_entry(2),
+    ])
+    await wait_for_expected_event_num(2, received_events)
+    assert received_events[1]["change_type"] == "UPDATE_NODES"
+    assert received_events[1]["connection_ids"] == [generate_connection_id(1), generate_connection_id(2)]
+    assert received_events[1]["host_ids"] == [generate_host_id(1), generate_host_id(2)]
+
+    await manager.api.client.delete("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(0)])
+    await wait_for_expected_event_num(3, received_events)
+    assert received_events[2]["change_type"] == "UPDATE_NODES"
+    assert received_events[2]["connection_ids"] == [generate_connection_id(0)]
+    assert received_events[2]["host_ids"] == [generate_host_id(0)]
+
+@pytest.mark.asyncio
+@skip_mode("release", "error injections are not supported in release mode")
+async def test_client_routes_snapshot_transfer(request, manager: ManagerClient, monkeypatch):
+    """
+    This test verifies that client routes change events are sent when client_routes
+    data is propagated via snapshot transfer:
+      1. Create a 3-node cluster.
+      2. Enable `block_group0_transfer_snapshot` error injection on one node, and stop it.
+      3. Change client routes with a POST request on other nodes, and trigger a snapshot.
+      4. Start the stopped node, and send a message to stop waiting on `block_group0_transfer_snapshot`.
+      5. Verify that an event was sent.
+    """
+    servers = await manager.servers_add(3, cmdline=['--smp=2'])
+    cql, hosts = await manager.get_ready_cql(servers)
+    server_to_restart = servers[2]
+    error_to_inject = "block_group0_transfer_snapshot"
+
+    await manager.server_update_config(server_to_restart.server_id, "error_injections_at_startup", [error_to_inject])
+    await manager.server_stop(server_to_restart.server_id)
+
+    await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(1)])
+    await wait_for_expected_client_routes_size(cql, 1)
+    await trigger_snapshot(manager, servers[0])
+
+    await manager.server_start(server_to_restart.server_id)
+    log = await manager.server_open_log(server_to_restart.server_id)
+    await log.wait_for("block_group0_transfer_snapshot: waiting for message")
+    cql = await manager.get_cql_exclusive(server_to_restart)
+    await wait_for_expected_client_routes_size(cql, 0)
+
+    received_events = []
+    setup_events_test(cql, received_events, monkeypatch)
+
+    await manager.api.message_injection(server_to_restart.ip_addr, error_to_inject)
+    await wait_for_expected_client_routes_size(cql, 1)
+    await wait_for_expected_event_num(1, received_events)
+    assert received_events[0]["change_type"] == "UPDATE_NODES"
+    assert received_events[0]["connection_ids"] == [generate_connection_id(1)]
+    assert received_events[0]["host_ids"] == [generate_host_id(1)]
+    await log.wait_for("transfer snapshot: raft snapshot includes client_routes mutation")
+
+@pytest.mark.asyncio
+async def test_huge_event(request, manager: ManagerClient, monkeypatch):
+    """
+    This test verifies that an event can be sent to the driver even when it contains many host_ids and connection_ids.
+    """
+    servers = await manager.servers_add(2, cmdline=['--smp=2'])
+    cql, hosts = await manager.get_ready_cql(servers)
+
+    received_events = []
+    setup_events_test(cql, received_events, monkeypatch)
+
+    await manager.api.client.post("/v2/client-routes", host=servers[0].ip_addr, json=[generate_client_routes_entry(i) for i in range(1000)])
+
+    await wait_for_expected_event_num(1, received_events)
+    assert set(received_events[0]["connection_ids"]) == set([generate_connection_id(i) for i in range(1000)])
+    assert set(received_events[0]["host_ids"]) == set([generate_host_id(i) for i in range(1000)])
--- a/test/cluster/test_cluster_features.py
+++ b/test/cluster/test_cluster_features.py
@@ -146,13 +146,13 @@ async def test_joining_old_node_fails(manager: ManagerClient) -> None:

    # Try to add a node that doesn't support the feature - should fail
    new_server_info = await manager.server_add(start=False, property_file=servers[0].property_file())
-    await manager.server_start(new_server_info.server_id, expected_error="Feature check failed")
+    await manager.server_start(new_server_info.server_id, expected_error="Feature check failed|received notification of being banned from the cluster from")

    # Try to replace with a node that doesn't support the feature - should fail
    await manager.server_stop_gracefully(servers[0].server_id)
    replace_cfg = ReplaceConfig(replaced_id=servers[0].server_id, reuse_ip_addr=False, use_host_id=False)
    new_server_info = await manager.server_add(start=False, replace_cfg=replace_cfg, property_file=servers[0].property_file())
-    await manager.server_start(new_server_info.server_id, expected_error="Feature check failed")
+    await manager.server_start(new_server_info.server_id, expected_error="Feature check failed|received notification of being banned from the cluster from")


@pytest.mark.asyncio
--- a/Show More
+++ b/Show More