Update pgo profiles - aarch64

Update pgo profiles - x86_64
Merge '[Backport 2025.2] test: cluster: deflake consistency checks after decommission' from Scylladb[bot]
2025-09-15 04:39:07 +03:00 · 2025-09-15 04:07:08 +03:00 · 2025-09-11 13:04:21 +02:00 · 2025-09-10 17:48:35 +00:00 · 2025-09-10 17:48:35 +00:00 · 2025-09-10 09:47:53 +02:00
399 changed files with 10167 additions and 3440 deletions
--- a/.github/scripts/auto-backport.py
+++ b/.github/scripts/auto-backport.py
@@ -112,10 +112,15 @@ def backport(repo, pr, version, commits, backport_base_branch, is_collaborator):
                    is_draft = True
                    repo_local.git.add(A=True)
                    repo_local.git.cherry_pick('--continue')
-            repo_local.git.push(fork_repo, new_branch_name, force=True)
-            create_pull_request(repo, new_branch_name, backport_base_branch, pr, backport_pr_title, commits,
-                                is_draft, is_collaborator)
-
+            # Check if the branch already exists in the remote fork
+            remote_refs = repo_local.git.ls_remote('--heads', fork_repo, new_branch_name)
+            if not remote_refs:
+                # Branch does not exist, create it with a regular push
+                repo_local.git.push(fork_repo, new_branch_name)
+                create_pull_request(repo, new_branch_name, backport_base_branch, pr, backport_pr_title, commits,
+                                    is_draft, is_collaborator)
+            else:
+                logging.info(f"Remote branch {new_branch_name} already exists in fork. Skipping push.")
        except GitCommandError as e:
            logging.warning(f"GitCommandError: {e}")

--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2025.2.0-dev
+VERSION=2025.2.3

 if test -f version
 then
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -141,6 +141,50 @@ json::json_return_type make_streamed(rjson::value&& value) {
    return func;
 }

+// make_streamed_with_extra_array() is variant of make_streamed() above, which
+// builds a response from a JSON object (rjson::value) but adds to it at the
+// end an additional array. The extra array is given a separate chunked_vector
+// to avoid putting it inside the rjson::value - because RapidJSON does
+// contiguous allocations for arrays which we want to avoid for potentially
+// long arrays in Query/Scan responses (see #23535).
+// If we ever fix RapidJSON to avoid contiguous allocations for arrays, or
+// replace it entirely (#24458), we can remove this function and the function
+// rjson::print_with_extra_array() which it calls.
+json::json_return_type make_streamed_with_extra_array(rjson::value&& value,
+    std::string array_name, utils::chunked_vector<rjson::value>&& array) {
+    // CMH. json::json_return_type uses std::function, not noncopyable_function.
+    // Need to make a copyable version of value. Gah.
+    auto rs = make_shared<rjson::value>(std::move(value));
+    auto ns = make_shared<std::string>(std::move(array_name));
+    auto as = make_shared<utils::chunked_vector<rjson::value>>(std::move(array));
+    std::function<future<>(output_stream<char>&&)> func = [rs, ns, as](output_stream<char>&& os) mutable -> future<> {
+        // move objects to coroutine frame.
+        auto los = std::move(os);
+        auto lrs = std::move(rs);
+        auto lns = std::move(ns);
+        auto las = std::move(as);
+        std::exception_ptr ex;
+        try {
+            co_await rjson::print_with_extra_array(*lrs, *lns, *las, los);
+        } catch (...) {
+            // at this point, we cannot really do anything. HTTP headers and return code are
+            // already written, and quite potentially a portion of the content data.
+            // just log + rethrow. It is probably better the HTTP server closes connection
+            // abruptly or something...
+            ex = std::current_exception();
+            elogger.error("Exception during streaming HTTP response: {}", ex);
+        }
+        co_await los.close();
+        co_await rjson::destroy_gently(std::move(*lrs));
+        // TODO: can/should we also destroy the array (*las) gently?
+        if (ex) {
+            co_await coroutine::return_exception_ptr(std::move(ex));
+        }
+        co_return;
+    };
+    return func;
+}
+
 json_string::json_string(std::string&& value)
    : _value(std::move(value))
 {}
@@ -1006,6 +1050,17 @@ void rmw_operation::set_default_write_isolation(std::string_view value) {
    default_write_isolation = parse_write_isolation(value);
 }

+// Alternator uses tags whose keys start with the "system:" prefix for
+// internal purposes. Those should not be readable by ListTagsOfResource,
+// nor writable with TagResource or UntagResource (see #24098).
+// Only a few specific system tags, currently only system:write_isolation,
+// are deliberately intended to be set and read by the user, so are not
+// considered "internal".
+static bool tag_key_is_internal(std::string_view tag_key) {
+    return tag_key.starts_with("system:") &&
+        tag_key != rmw_operation::WRITE_ISOLATION_TAG_KEY;
+}
+
 enum class update_tags_action { add_tags, delete_tags };
 static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>& tags_map, update_tags_action action) {
    if (action == update_tags_action::add_tags) {
@@ -1030,6 +1085,9 @@ static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>
            if (!validate_legal_tag_chars(tag_key)) {
                throw api_error::validation("A tag Key can only contain letters, spaces, and [+-=._:/]");
            }
+            if (tag_key_is_internal(tag_key)) {
+                throw api_error::validation(fmt::format("Tag key '{}' is reserved for internal use", tag_key));
+            }
            // Note tag values are limited similarly to tag keys, but have a
            // longer length limit, and *can* be empty.
            if (tag_value.size() > 256) {
@@ -1042,7 +1100,11 @@ static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>
        }
    } else if (action == update_tags_action::delete_tags) {
        for (auto it = tags.Begin(); it != tags.End(); ++it) {
-            tags_map.erase(sstring(it->GetString(), it->GetStringLength()));
+            auto tag_key = rjson::to_string_view(*it);
+            if (tag_key_is_internal(tag_key)) {
+                throw api_error::validation(fmt::format("Tag key '{}' is reserved for internal use", tag_key));
+            }
+            tags_map.erase(sstring(tag_key));
        }
    }

@@ -1117,6 +1179,9 @@ future<executor::request_return_type> executor::list_tags_of_resource(client_sta

    rjson::value& tags = ret["Tags"];
    for (auto& tag_entry : tags_map) {
+        if (tag_key_is_internal(tag_entry.first)) {
+            continue;
+        }
        rjson::value new_entry = rjson::empty_object();
        rjson::add(new_entry, "Key", rjson::from_string(tag_entry.first));
        rjson::add(new_entry, "Value", rjson::from_string(tag_entry.second));
@@ -4509,7 +4574,11 @@ class describe_items_visitor {
    const filter& _filter;
    typename columns_t::const_iterator _column_it;
    rjson::value _item;
-    rjson::value _items;
+    // _items is a chunked_vector<rjson::value> instead of a RapidJson array
+    // (rjson::value) because unfortunately RapidJson arrays are stored
+    // contiguously in memory, and cause large allocations when a Query/Scan
+    // returns a long list of short items (issue #23535).
+    utils::chunked_vector<rjson::value> _items;
    size_t _scanned_count;

 public:
@@ -4519,7 +4588,6 @@ public:
            , _filter(filter)
            , _column_it(columns.begin())
            , _item(rjson::empty_object())
-            , _items(rjson::empty_array())
            , _scanned_count(0)
    {
        // _filter.check() may need additional attributes not listed in
@@ -4598,13 +4666,13 @@ public:
                rjson::remove_member(_item, attr);
            }

-            rjson::push_back(_items, std::move(_item));
+            _items.push_back(std::move(_item));
        }
        _item = rjson::empty_object();
        ++_scanned_count;
    }

-    rjson::value get_items() && {
+    utils::chunked_vector<rjson::value> get_items() && {
        return std::move(_items);
    }

@@ -4613,13 +4681,25 @@ public:
    }
 };

-static future<std::tuple<rjson::value, size_t>> describe_items(const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, std::optional<attrs_to_get>&& attrs_to_get, filter&& filter) {
+// describe_items() returns a JSON object that includes members "Count"
+// and "ScannedCount", but *not* "Items" - that is returned separately
+// as a chunked_vector to avoid large contiguous allocations which
+// RapidJSON does of its array. The caller should add "Items" to the
+// returned JSON object if needed, or print it separately.
+// The returned chunked_vector (the items) is std::optional<>, because
+// the user may have requested only to count items, and not return any
+// items - which is different from returning an empty list of items.
+static future<std::tuple<rjson::value, std::optional<utils::chunked_vector<rjson::value>>, size_t>> describe_items(
+        const cql3::selection::selection& selection,
+        std::unique_ptr<cql3::result_set> result_set,
+        std::optional<attrs_to_get>&& attrs_to_get,
+        filter&& filter) {
    describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
    co_await result_set->visit_gently(visitor);
    auto scanned_count = visitor.get_scanned_count();
-    rjson::value items = std::move(visitor).get_items();
+    utils::chunked_vector<rjson::value> items = std::move(visitor).get_items();
    rjson::value items_descr = rjson::empty_object();
-    auto size = items.Size();
+    auto size = items.size();
    rjson::add(items_descr, "Count", rjson::value(size));
    rjson::add(items_descr, "ScannedCount", rjson::value(scanned_count));
    // If attrs_to_get && attrs_to_get->empty(), this means the user asked not
@@ -4629,10 +4709,11 @@ static future<std::tuple<rjson::value, size_t>> describe_items(const cql3::selec
    // In that case, we currently build a list of empty items and here drop
    // it. We could just count the items and not bother with the empty items.
    // (However, remember that when we do have a filter, we need the items).
+    std::optional<utils::chunked_vector<rjson::value>> opt_items;
    if (!attrs_to_get || !attrs_to_get->empty()) {
-        rjson::add(items_descr, "Items", std::move(items));
+        opt_items = std::move(items);
    }
-    co_return std::tuple<rjson::value, size_t>{std::move(items_descr), size};
+    co_return std::tuple(std::move(items_descr), std::move(opt_items), size);
 }

 static rjson::value encode_paging_state(const schema& schema, const service::pager::paging_state& paging_state) {
@@ -4670,6 +4751,12 @@ static rjson::value encode_paging_state(const schema& schema, const service::pag
    return last_evaluated_key;
 }

+// RapidJSON allocates arrays contiguously in memory, so we want to avoid
+// returning a large number of items as a single rapidjson array, and use
+// a chunked_vector instead. The following constant is an arbitrary cutoff
+// point for when to switch from a rapidjson array to a chunked_vector.
+static constexpr int max_items_for_rapidjson_array = 256;
+
 static future<executor::request_return_type> do_query(service::storage_proxy& proxy,
        schema_ptr table_schema,
        const rjson::value* exclusive_start_key,
@@ -4742,19 +4829,35 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
    }
    auto paging_state = rs->get_metadata().paging_state();
    bool has_filter = filter;
-    auto [items, size] = co_await describe_items(*selection, std::move(rs), std::move(attrs_to_get), std::move(filter));
+    auto [items_descr, opt_items, size] = co_await describe_items(*selection, std::move(rs), std::move(attrs_to_get), std::move(filter));
    if (paging_state) {
-        rjson::add(items, "LastEvaluatedKey", encode_paging_state(*table_schema, *paging_state));
+        rjson::add(items_descr, "LastEvaluatedKey", encode_paging_state(*table_schema, *paging_state));
    }
    if (has_filter){
        cql_stats.filtered_rows_read_total += p->stats().rows_read_total;
        // update our "filtered_row_matched_total" for all the rows matched, despited the filter
        cql_stats.filtered_rows_matched_total += size;
    }
-    if (is_big(items)) {
-        co_return executor::request_return_type(make_streamed(std::move(items)));
+    if (opt_items) {
+        if (opt_items->size() >= max_items_for_rapidjson_array) {
+            // There are many items, better print the JSON and the array of
+            // items (opt_items) separately to avoid RapidJSON's contiguous
+            // allocation of arrays.
+            co_return executor::request_return_type(make_streamed_with_extra_array(std::move(items_descr), "Items", std::move(*opt_items)));
+        }
+        // There aren't many items in the chunked vector opt_items,
+        // let's just insert them into the JSON object and print the
+        // full JSON normally.
+        rjson::value items_json = rjson::empty_array();
+        for (auto& item : *opt_items) {
+            rjson::push_back(items_json, std::move(item));
+        }
+        rjson::add(items_descr, "Items", std::move(items_json));
    }
-    co_return executor::request_return_type(make_jsonable(std::move(items)));
+    if (is_big(items_descr)) {
+        co_return executor::request_return_type(make_streamed(std::move(items_descr)));
+    }
+    co_return executor::request_return_type(make_jsonable(std::move(items_descr)));
 }

 static dht::token token_for_segment(int segment, int total_segments) {
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -2144,6 +2144,31 @@
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
+                  },
+                  {
+                     "name":"skip_cleanup",
+                     "description":"Don't cleanup keys from loaded sstables. Invalid if load_and_stream is true",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"skip_reshape",
+                     "description":"Don't reshape the loaded sstables. Invalid if load_and_stream is true",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"scope",
+                     "description":"Defines the set of nodes to which mutations can be streamed",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query",
+                     "enum": ["all", "dc", "rack", "node"]
                  }
               ]
            }
@@ -3136,6 +3161,22 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/storage_service/raft_topology/cmd_rpc_status",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get information about currently running topology cmd rpc",
+               "type":"string",
+               "nickname":"raft_topology_get_cmd_status",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
      }
   ],
   "models":{
@@ -3272,11 +3313,11 @@
         "properties":{
            "start_token":{
               "type":"string",
-               "description":"The range start token"
+               "description":"The range start token (exclusive)"
            },
            "end_token":{
               "type":"string",
-               "description":"The range start token"
+               "description":"The range end token (inclusive)"
            },
            "endpoints":{
               "type":"array",
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -453,17 +453,26 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
        auto cf = req->get_query_param("cf");
        auto stream = req->get_query_param("load_and_stream");
        auto primary_replica = req->get_query_param("primary_replica_only");
+        auto skip_cleanup_p = req->get_query_param("skip_cleanup");
        boost::algorithm::to_lower(stream);
        boost::algorithm::to_lower(primary_replica);
        bool load_and_stream = stream == "true" || stream == "1";
        bool primary_replica_only = primary_replica == "true" || primary_replica == "1";
+        bool skip_cleanup = skip_cleanup_p == "true" || skip_cleanup_p == "1";
+        auto scope = parse_stream_scope(req->get_query_param("scope"));
+        auto skip_reshape_p = req->get_query_param("skip_reshape");
+        auto skip_reshape = skip_reshape_p == "true" || skip_reshape_p == "1";
+
+        if (scope != sstables_loader::stream_scope::all && !load_and_stream) {
+            throw httpd::bad_param_exception("scope takes no effect without load-and-stream");
+        }
        // No need to add the keyspace, since all we want is to avoid always sending this to the same
        // CPU. Even then I am being overzealous here. This is not something that happens all the time.
        auto coordinator = std::hash<sstring>()(cf) % smp::count;
        return sst_loader.invoke_on(coordinator,
                [ks = std::move(ks), cf = std::move(cf),
-                load_and_stream, primary_replica_only] (sstables_loader& loader) {
-            return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, sstables_loader::stream_scope::all);
+                load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope] (sstables_loader& loader) {
+            return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope);
        }).then_wrapped([] (auto&& f) {
            if (f.failed()) {
                auto msg = fmt::format("Failed to load new sstables: {}", f.get_exception());
@@ -738,13 +747,7 @@ rest_force_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
            fmopt = flush_mode::skip;
        }
        auto task = co_await compaction_module.make_and_start_task<global_major_compaction_task_impl>({}, db, fmopt, consider_only_existing_data);
-        try {
-            co_await task->done();
-        } catch (...) {
-            apilog.error("force_compaction failed: {}", std::current_exception());
-            throw;
-        }
-
+        co_await task->done();
        co_return json_void();
 }

@@ -771,13 +774,7 @@ rest_force_keyspace_compaction(http_context& ctx, std::unique_ptr<http::request>
            fmopt = flush_mode::skip;
        }
        auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), db, table_infos, fmopt, consider_only_existing_data);
-        try {
-            co_await task->done();
-        } catch (...) {
-            apilog.error("force_keyspace_compaction: keyspace={} tables={} failed: {}", task->get_status().keyspace, table_infos, std::current_exception());
-            throw;
-        }
-
+        co_await task->done();
        co_return json_void();
 }

@@ -802,13 +799,7 @@ rest_force_keyspace_cleanup(http_context& ctx, sharded<service::storage_service>
        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
        auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>(
            {}, std::move(keyspace), db, table_infos, flush_mode::all_tables, tasks::is_user_task::yes);
-        try {
-            co_await task->done();
-        } catch (...) {
-            apilog.error("force_keyspace_cleanup: keyspace={} tables={} failed: {}", task->get_status().keyspace, table_infos, std::current_exception());
-            throw;
-        }
-
+        co_await task->done();
        co_return json::json_return_type(0);
 }

@@ -830,12 +821,7 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
        auto& db = ctx.db;
        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
        auto task = co_await compaction_module.make_and_start_task<global_cleanup_compaction_task_impl>({}, db);
-        try {
-            co_await task->done();
-        } catch (...) {
-            apilog.error("cleanup_all failed: {}", std::current_exception());
-            throw;
-        }
+        co_await task->done();
        co_return json::json_return_type(0);
 }

@@ -847,13 +833,7 @@ rest_perform_keyspace_offstrategy_compaction(http_context& ctx, std::unique_ptr<
        bool res = false;
        auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
        auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, &res);
-        try {
-            co_await task->done();
-        } catch (...) {
-            apilog.error("perform_keyspace_offstrategy_compaction: keyspace={} tables={} failed: {}", task->get_status().keyspace, table_infos, std::current_exception());
-            throw;
-        }
-
+        co_await task->done();
        co_return json::json_return_type(res);
 }

@@ -868,13 +848,7 @@ rest_upgrade_sstables(http_context& ctx, std::unique_ptr<http::request> req) {

        auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
        auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
-        try {
-            co_await task->done();
-        } catch (...) {
-            apilog.error("upgrade_sstables: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
-            throw;
-        }
-
+        co_await task->done();
        co_return json::json_return_type(0);
 }

@@ -1667,6 +1641,18 @@ rest_raft_topology_upgrade_status(sharded<service::storage_service>& ss, std::un
        co_return sstring(format("{}", ustate));
 }

+static
+future<json::json_return_type>
+rest_raft_topology_get_cmd_status(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
+        const auto status = co_await ss.invoke_on(0, [] (auto& ss) {
+            return ss.get_topology_cmd_status();
+        });
+        if (status.active_dst.empty()) {
+            co_return sstring("none");
+        }
+        co_return sstring(fmt::format("{}[{}]: {}", status.current, status.index, fmt::join(status.active_dst, ",")));
+}
+
 static
 future<json::json_return_type>
 rest_move_tablet(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -1898,6 +1884,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
    ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
    ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
+    ss::raft_topology_get_cmd_status.set(r, rest_bind(rest_raft_topology_get_cmd_status, ss));
    ss::move_tablet.set(r, rest_bind(rest_move_tablet, ctx, ss));
    ss::add_tablet_replica.set(r, rest_bind(rest_add_tablet_replica, ctx, ss));
    ss::del_tablet_replica.set(r, rest_bind(rest_del_tablet_replica, ctx, ss));
@@ -1979,6 +1966,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
    ss::reload_raft_topology_state.unset(r);
    ss::upgrade_to_raft_topology.unset(r);
    ss::raft_topology_upgrade_status.unset(r);
+    ss::raft_topology_get_cmd_status.unset(r);
    ss::move_tablet.unset(r);
    ss::add_tablet_replica.unset(r);
    ss::del_tablet_replica.unset(r);
--- a/api/token_metadata.cc
+++ b/api/token_metadata.cc
@@ -74,6 +74,9 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
    });

    ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
+        if (!g.local().is_enabled()) {
+            throw std::runtime_error("The gossiper is not ready yet");
+        }
        std::vector<ss::mapper> res;
        auto map = tm.local().get()->get_host_ids() |
            std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |
--- a/auth/allow_all_authenticator.cc
+++ b/auth/allow_all_authenticator.cc
@@ -9,6 +9,7 @@
 #include "auth/allow_all_authenticator.hh"

 #include "service/migration_manager.hh"
+#include "utils/alien_worker.hh"
 #include "utils/class_registrator.hh"

 namespace auth {
@@ -21,6 +22,7 @@ static const class_registrator<
        allow_all_authenticator,
        cql3::query_processor&,
        ::service::raft_group0_client&,
-        ::service::migration_manager&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
+        ::service::migration_manager&,
+        utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");

 }
--- a/auth/allow_all_authenticator.hh
+++ b/auth/allow_all_authenticator.hh
@@ -13,6 +13,7 @@
 #include "auth/authenticated_user.hh"
 #include "auth/authenticator.hh"
 #include "auth/common.hh"
+#include "utils/alien_worker.hh"

 namespace cql3 {
 class query_processor;
@@ -28,7 +29,7 @@ extern const std::string_view allow_all_authenticator_name;

 class allow_all_authenticator final : public authenticator {
 public:
-    allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&) {
+    allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&) {
    }

    virtual future<> start() override {
--- a/auth/certificate_authenticator.cc
+++ b/auth/certificate_authenticator.cc
@@ -33,13 +33,14 @@ static const class_registrator<auth::authenticator
    , auth::certificate_authenticator
    , cql3::query_processor&
    , ::service::raft_group0_client&
-    , ::service::migration_manager&> cert_auth_reg(CERT_AUTH_NAME);
+    , ::service::migration_manager&
+    , utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);

 enum class auth::certificate_authenticator::query_source {
    subject, altname
 };

-auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&)
+auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&)
    : _queries([&] {
        auto& conf = qp.db().get_config();
        auto queries = conf.auth_certificate_role_queries();
--- a/auth/certificate_authenticator.hh
+++ b/auth/certificate_authenticator.hh
@@ -10,6 +10,7 @@
 #pragma once

 #include "auth/authenticator.hh"
+#include "utils/alien_worker.hh"
 #include <boost/regex_fwd.hpp>  // IWYU pragma: keep

 namespace cql3 {
@@ -31,7 +32,7 @@ class certificate_authenticator : public authenticator {
    enum class query_source;
    std::vector<std::pair<query_source, boost::regex>> _queries;
 public:
-    certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&);
+    certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&);
    ~certificate_authenticator();

    future<> start() override;
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -119,6 +119,11 @@ future<> create_legacy_metadata_table_if_missing(
    return qs;
 }

+::service::raft_timeout get_raft_timeout() noexcept {
+    auto dur = internal_distributed_query_state().get_client_state().get_timeout_config().other_timeout;
+    return ::service::raft_timeout{.value = lowres_clock::now() + dur};
+}
+
 static future<> announce_mutations_with_guard(
        ::service::raft_group0_client& group0_client,
        std::vector<canonical_mutation> muts,
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -17,6 +17,7 @@

 #include "types/types.hh"
 #include "service/raft/raft_group0_client.hh"
+#include "timeout_config.hh"

 using namespace std::chrono_literals;

@@ -77,6 +78,8 @@ future<> create_legacy_metadata_table_if_missing(
 ///
 ::service::query_state& internal_distributed_query_state() noexcept;

+::service::raft_timeout get_raft_timeout() noexcept;
+
 // Execute update query via group0 mechanism, mutations will be applied on all nodes.
 // Use this function when need to perform read before write on a single guard or if
 // you have more than one mutation and potentially exceed single command size limit.
--- a/auth/ldap_role_manager.cc
+++ b/auth/ldap_role_manager.cc
@@ -338,8 +338,7 @@ future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants()
 }

 future<> ldap_role_manager::ensure_superuser_is_created() {
-    // ldap is responsible for users
-    co_return;
+    return _std_mgr.ensure_superuser_is_created();
 }

 } // namespace auth
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -48,14 +48,14 @@ static const class_registrator<
        password_authenticator,
        cql3::query_processor&,
        ::service::raft_group0_client&,
-        ::service::migration_manager&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
+        ::service::migration_manager&,
+        utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");

 static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());

 static std::string_view get_config_value(std::string_view value, std::string_view def) {
    return value.empty() ? def : value;
 }
-
 std::string password_authenticator::default_superuser(const db::config& cfg) {
    return std::string(get_config_value(cfg.auth_superuser_name(), DEFAULT_USER_NAME));
 }
@@ -63,12 +63,13 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
 password_authenticator::~password_authenticator() {
 }

-password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm)
+password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, utils::alien_worker& hashing_worker)
    : _qp(qp)
    , _group0_client(g0)
    , _migration_manager(mm)
    , _stopped(make_ready_future<>()) 
    , _superuser(default_superuser(qp.db().get_config()))
+    , _hashing_worker(hashing_worker)
 {}

 static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
@@ -117,33 +118,95 @@ future<> password_authenticator::migrate_legacy_metadata() const {
    });
 }

-future<> password_authenticator::create_default_if_missing() {
+future<> password_authenticator::legacy_create_default_if_missing() {
+    SCYLLA_ASSERT(legacy_mode(_qp));
    const auto exists = co_await default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
    if (exists) {
        co_return;
    }
    std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
    if (salted_pwd.empty()) {
-        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
+        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
    }
    const auto query = update_row_query();
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(
+    co_await _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
            internal_distributed_query_state(),
            {salted_pwd, _superuser},
            cql3::query_processor::cache_internal::no);
-        plogger.info("Created default superuser authentication record.");
-    } else {
-        co_await announce_mutations(_qp, _group0_client, query,
-            {salted_pwd, _superuser}, _as, ::service::raft_timeout{});
-        plogger.info("Created default superuser authentication record.");
+    plogger.info("Created default superuser authentication record.");
+}
+
+future<> password_authenticator::maybe_create_default_password() {
+    auto needs_password = [this] () -> future<bool> {
+        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
+        auto results = co_await _qp.execute_internal(query,
+                db::consistency_level::LOCAL_ONE,
+                internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
+        // Don't add default password if
+        // - there is no default superuser
+        // - there is a superuser with a password.
+        bool has_default = false;
+        bool has_superuser_with_password = false;
+        for (auto& result : *results) {
+            if (result.get_as<sstring>(meta::roles_table::role_col_name) == _superuser) {
+                has_default = true;
+            }
+            if (has_salted_hash(result)) {
+                has_superuser_with_password = true;
+            }
+        }
+        co_return has_default && !has_superuser_with_password;
+    };
+    if (!co_await needs_password()) {
+        co_return;
+    }
+    // We don't want to start operation earlier to avoid quorum requirement in
+    // a common case.
+    ::service::group0_batch batch(
+            co_await _group0_client.start_operation(_as, get_raft_timeout()));
+    // Check again as the state may have changed before we took the guard (batch).
+    if (!co_await needs_password()) {
+        co_return;
+    }
+    // Set default superuser's password.
+    std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
+    if (salted_pwd.empty()) {
+        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
+    }
+    const auto update_query = update_row_query();
+    co_await collect_mutations(_qp, batch, update_query, {salted_pwd, _superuser});
+    co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
+    plogger.info("Created default superuser authentication record.");
+}
+
+future<> password_authenticator::maybe_create_default_password_with_retries() {
+    size_t retries = _migration_manager.get_concurrent_ddl_retries();
+    while (true)  {
+        try {
+            co_return co_await maybe_create_default_password();
+        } catch (const ::service::group0_concurrent_modification& ex) {
+            plogger.warn("Failed to execute maybe_create_default_password due to guard conflict.{}.", retries ? " Retrying" : " Number of retries exceeded, giving up");
+            if (retries--) {
+                continue;
+            }
+            // Log error but don't crash the whole node startup sequence.
+            plogger.error("Failed to create default superuser password due to guard conflict.");
+            co_return;
+        } catch (const ::service::raft_operation_timeout_error& ex) {
+            plogger.error("Failed to create default superuser password due to exception: {}", ex.what());
+            co_return;
+        }
    }
 }

 future<> password_authenticator::start() {
    return once_among_shards([this] {
+        // Verify that at least one hashing scheme is supported.
+        passwords::detail::verify_scheme(_scheme);
+        plogger.info("Using password hashing scheme: {}", passwords::detail::prefix_for_scheme(_scheme));
+
        _stopped = do_after_system_ready(_as, [this] {
            return async([this] {
                if (legacy_mode(_qp)) {
@@ -164,11 +227,14 @@ future<> password_authenticator::start() {
                        migrate_legacy_metadata().get();
                        return;
                    }
+                    legacy_create_default_if_missing().get();
                }
                utils::get_local_injector().inject("password_authenticator_start_pause", utils::wait_for_message(5min)).get();
-                create_default_if_missing().get();
                if (!legacy_mode(_qp)) {
-                    _superuser_created_promise.set_value();
+                    maybe_create_default_password_with_retries().get();
+                    if (!_superuser_created_promise.available()) {
+                        _superuser_created_promise.set_value();
+                    }
                }
            });
        });
@@ -228,7 +294,13 @@ future<authenticated_user> password_authenticator::authenticate(

    try {
        const std::optional<sstring> salted_hash = co_await get_password_hash(username);
-        if (!salted_hash || !passwords::check(password, *salted_hash)) {
+        if (!salted_hash) {
+            throw exceptions::authentication_exception("Username and/or password are incorrect");
+        }
+        const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash = std::move(salted_hash)]{
+            return passwords::check(password, *salted_hash);
+        });
+        if (!password_match) {
            throw exceptions::authentication_exception("Username and/or password are incorrect");
        }
        co_return username;
@@ -252,7 +324,7 @@ future<> password_authenticator::create(std::string_view role_name, const authen
    auto maybe_hash = options.credentials.transform([&] (const auto& creds) -> sstring {
        return std::visit(make_visitor(
                [&] (const password_option& opt) {
-                    return passwords::hash(opt.password, rng_for_salt);
+                    return passwords::hash(opt.password, rng_for_salt, _scheme);
                },
                [] (const hashed_password_option& opt) {
                    return opt.hashed_password;
@@ -295,11 +367,11 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
                query,
                consistency_for_user(role_name),
                internal_distributed_query_state(),
-                {passwords::hash(password, rng_for_salt), sstring(role_name)},
+                {passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)},
                cql3::query_processor::cache_internal::no).discard_result();
    } else {
        co_await collect_mutations(_qp, mc, query,
-                {passwords::hash(password, rng_for_salt), sstring(role_name)});
+                {passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)});
    }
 }

--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -15,7 +15,9 @@

 #include "db/consistency_level_type.hh"
 #include "auth/authenticator.hh"
+#include "auth/passwords.hh"
 #include "service/raft/raft_group0_client.hh"
+#include "utils/alien_worker.hh"

 namespace db {
    class config;
@@ -41,14 +43,17 @@ class password_authenticator : public authenticator {
    ::service::migration_manager& _migration_manager;
    future<> _stopped;
    abort_source _as;
-    std::string _superuser;
+    std::string _superuser; // default superuser name from the config (may or may not be present in roles table)
    shared_promise<> _superuser_created_promise;
+    // We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
+    constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
+    utils::alien_worker& _hashing_worker;

 public:
    static db::consistency_level consistency_for_user(std::string_view role_name);
    static std::string default_superuser(const db::config&);

-    password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&);
+    password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&);

    ~password_authenticator();

@@ -89,7 +94,10 @@ private:

    future<> migrate_legacy_metadata() const;

-    future<> create_default_if_missing();
+    future<> legacy_create_default_if_missing();
+
+    future<> maybe_create_default_password();
+    future<> maybe_create_default_password_with_retries();

    sstring update_row_query() const;
 };
--- a/auth/passwords.cc
+++ b/auth/passwords.cc
@@ -21,18 +21,14 @@ static thread_local crypt_data tlcrypt = {};

 namespace detail {

-scheme identify_best_supported_scheme() {
-    const auto all_schemes = { scheme::bcrypt_y, scheme::bcrypt_a, scheme::sha_512, scheme::sha_256, scheme::md5 };
-    // "Random", for testing schemes.
+void verify_scheme(scheme scheme) {
    const sstring random_part_of_salt = "aaaabbbbccccdddd";

-    for (scheme c : all_schemes) {
-        const sstring salt = sstring(prefix_for_scheme(c)) + random_part_of_salt;
-        const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
+    const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
+    const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);

-        if (e && (e[0] != '*')) {
-            return c;
-        }
+    if (e && (e[0] != '*')) {
+        return;
    }

    throw no_supported_schemes();
--- a/auth/passwords.hh
+++ b/auth/passwords.hh
@@ -21,10 +21,11 @@ class no_supported_schemes : public std::runtime_error {
 public:
    no_supported_schemes();
 };
-
 ///
-/// Apache Cassandra uses a library to provide the bcrypt scheme. Many Linux implementations do not support bcrypt, so
-/// we support alternatives. The cost is loss of direct compatibility with Apache Cassandra system tables.
+/// Apache Cassandra uses a library to provide the bcrypt scheme. In ScyllaDB, we use SHA-512
+/// instead of bcrypt for performance and for historical reasons (see scylladb#24524).
+/// Currently, SHA-512 is always chosen as the hashing scheme for new passwords, but the other
+/// algorithms remain supported for CREATE ROLE WITH HASHED PASSWORD and backward compatibility.
 ///
 enum class scheme {
    bcrypt_y,
@@ -51,11 +52,11 @@ sstring generate_random_salt_bytes(RandomNumberEngine& g) {
 }

 ///
-/// Test each allowed hashing scheme and report the best supported one on the current system.
+/// Test given hashing scheme on the current system.
 ///
-/// \throws \ref no_supported_schemes when none of the known schemes is supported.
+/// \throws \ref no_supported_schemes when scheme is unsupported.
 ///
-scheme identify_best_supported_scheme();
+void verify_scheme(scheme scheme);

 std::string_view prefix_for_scheme(scheme) noexcept;

@@ -67,8 +68,7 @@ std::string_view prefix_for_scheme(scheme) noexcept;
 /// \throws \ref no_supported_schemes when no known hashing schemes are supported on the system.
 ///
 template <typename RandomNumberEngine>
-sstring generate_salt(RandomNumberEngine& g) {
-    static const scheme scheme = identify_best_supported_scheme();
+sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
    static const sstring prefix = sstring(prefix_for_scheme(scheme));
    return prefix + generate_random_salt_bytes(g);
 }
@@ -93,8 +93,8 @@ sstring hash_with_salt(const sstring& pass, const sstring& salt);
 /// \throws \ref std::system_error when the implementation-specific implementation fails to hash the cleartext.
 ///
 template <typename RandomNumberEngine>
-sstring hash(const sstring& pass, RandomNumberEngine& g) {
-    return detail::hash_with_salt(pass, detail::generate_salt(g));
+sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
+    return detail::hash_with_salt(pass, detail::generate_salt(g, scheme));
 }

 ///
--- a/auth/saslauthd_authenticator.cc
+++ b/auth/saslauthd_authenticator.cc
@@ -34,9 +34,10 @@ static const class_registrator<
        saslauthd_authenticator,
        cql3::query_processor&,
        ::service::raft_group0_client&,
-        ::service::migration_manager&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
+        ::service::migration_manager&,
+        utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");

-saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&)
+saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&)
    : _socket_path(qp.db().get_config().saslauthd_socket_path())
 {}

--- a/auth/saslauthd_authenticator.hh
+++ b/auth/saslauthd_authenticator.hh
@@ -11,6 +11,7 @@
 #pragma once

 #include "auth/authenticator.hh"
+#include "utils/alien_worker.hh"

 namespace cql3 {
 class query_processor;
@@ -28,7 +29,7 @@ namespace auth {
 class saslauthd_authenticator : public authenticator {
    sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
 public:
-    saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&);
+    saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&);

    future<> start() override;

--- a/auth/service.cc
+++ b/auth/service.cc
@@ -187,14 +187,15 @@ service::service(
        ::service::migration_notifier& mn,
        ::service::migration_manager& mm,
        const service_config& sc,
-        maintenance_socket_enabled used_by_maintenance_socket)
+        maintenance_socket_enabled used_by_maintenance_socket,
+        utils::alien_worker& hashing_worker)
            : service(
                      std::move(c),
                      qp,
                      g0,
                      mn,
                      create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
-                      create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm),
+                      create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, hashing_worker),
                      create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm),
                      used_by_maintenance_socket) {
 }
@@ -240,6 +241,13 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
        });
    }
    co_await _role_manager->start();
+    if (this_shard_id() == 0) {
+        // Role manager and password authenticator have this odd startup
+        // mechanism where they asynchronously create the superuser role
+        // in the background. Correct password creation depends on role
+        // creation therefore we need to wait here.
+        co_await _role_manager->ensure_superuser_is_created();
+    }
    co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
    _permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
    co_await once_among_shards([this] {
--- a/auth/service.hh
+++ b/auth/service.hh
@@ -26,6 +26,7 @@
 #include "cql3/description.hh"
 #include "seastarx.hh"
 #include "service/raft/raft_group0_client.hh"
+#include "utils/alien_worker.hh"
 #include "utils/observable.hh"
 #include "utils/serialized_action.hh"
 #include "service/maintenance_mode.hh"
@@ -126,7 +127,8 @@ public:
            ::service::migration_notifier&,
            ::service::migration_manager&,
            const service_config&,
-            maintenance_socket_enabled);
+            maintenance_socket_enabled,
+            utils::alien_worker&);

    future<> start(::service::migration_manager&, db::system_keyspace&);

--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -9,6 +9,7 @@
 #include "auth/standard_role_manager.hh"

 #include <optional>
+#include <stdexcept>
 #include <unordered_set>
 #include <vector>

@@ -28,6 +29,7 @@
 #include "cql3/util.hh"
 #include "db/consistency_level_type.hh"
 #include "exceptions/exceptions.hh"
+#include "utils/error_injection.hh"
 #include "utils/log.hh"
 #include <seastar/core/loop.hh>
 #include <seastar/coroutine/maybe_yield.hh>
@@ -178,7 +180,8 @@ future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const
                    _migration_manager)).discard_result();
 }

-future<> standard_role_manager::create_default_role_if_missing() {
+future<> standard_role_manager::legacy_create_default_role_if_missing() {
+    SCYLLA_ASSERT(legacy_mode(_qp));
    try {
        const auto exists = co_await default_role_row_satisfies(_qp, &has_can_login, _superuser);
        if (exists) {
@@ -188,16 +191,12 @@ future<> standard_role_manager::create_default_role_if_missing() {
                get_auth_ks_name(_qp),
                meta::roles_table::name,
                meta::roles_table::role_col_name);
-        if (legacy_mode(_qp)) {
-            co_await _qp.execute_internal(
-                    query,
-                    db::consistency_level::QUORUM,
-                    internal_distributed_query_state(),
-                    {_superuser},
-                    cql3::query_processor::cache_internal::no).discard_result();
-        } else {
-            co_await announce_mutations(_qp, _group0_client, query, {_superuser}, _as, ::service::raft_timeout{});
-        }
+        co_await _qp.execute_internal(
+                query,
+                db::consistency_level::QUORUM,
+                internal_distributed_query_state(),
+                {_superuser},
+                cql3::query_processor::cache_internal::no).discard_result();
        log.info("Created default superuser role '{}'.", _superuser);
    } catch(const exceptions::unavailable_exception& e) {
        log.warn("Skipped default role setup: some nodes were not ready; will retry");
@@ -205,6 +204,60 @@ future<> standard_role_manager::create_default_role_if_missing() {
    }
 }

+future<> standard_role_manager::maybe_create_default_role() {
+    auto has_superuser = [this] () -> future<bool> {
+        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
+        auto results = co_await _qp.execute_internal(query, db::consistency_level::LOCAL_ONE,
+                internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
+        for (const auto& result : *results) {
+            if (has_can_login(result)) {
+                co_return true;
+            }
+        }
+        co_return false;
+    };
+    if (co_await has_superuser()) {
+        co_return;
+    }
+    // We don't want to start operation earlier to avoid quorum requirement in
+    // a common case.
+    ::service::group0_batch batch(
+            co_await _group0_client.start_operation(_as, get_raft_timeout()));
+    // Check again as the state may have changed before we took the guard (batch).
+    if (co_await has_superuser()) {
+        co_return;
+    }
+    // There is no superuser which has can_login field - create default role.
+    // Note that we don't check if can_login is set to true.
+    const sstring insert_query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
+            get_auth_ks_name(_qp),
+            meta::roles_table::name,
+            meta::roles_table::role_col_name);
+    co_await collect_mutations(_qp, batch, insert_query, {_superuser});
+    co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
+    log.info("Created default superuser role '{}'.", _superuser);
+}
+
+future<> standard_role_manager::maybe_create_default_role_with_retries() {
+    size_t retries = _migration_manager.get_concurrent_ddl_retries();
+    while (true)  {
+        try {
+            co_return co_await maybe_create_default_role();
+        } catch (const ::service::group0_concurrent_modification& ex) {
+            log.warn("Failed to execute maybe_create_default_role due to guard conflict.{}.", retries ? " Retrying" : " Number of retries exceeded, giving up");
+            if (retries--) {
+                continue;
+            }
+            // Log error but don't crash the whole node startup sequence.
+            log.error("Failed to create default superuser role due to guard conflict.");
+            co_return;
+        } catch (const ::service::raft_operation_timeout_error& ex) {
+            log.error("Failed to create default superuser role due to exception: {}", ex.what());
+            co_return;
+        }
+    }
+}
+
 static const sstring legacy_table_name{"users"};

 bool standard_role_manager::legacy_metadata_exists() {
@@ -266,10 +319,13 @@ future<> standard_role_manager::start() {
                    co_await migrate_legacy_metadata();
                    co_return;
                }
+                co_await legacy_create_default_role_if_missing();
            }
-            co_await create_default_role_if_missing();
            if (!legacy) {
-                _superuser_created_promise.set_value();
+                co_await maybe_create_default_role_with_retries();
+                if (!_superuser_created_promise.available()) {
+                    _superuser_created_promise.set_value();
+                }
            }
        };

@@ -619,6 +675,12 @@ future<role_set> standard_role_manager::query_all() {
    // To avoid many copies of a view.
    static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);

+    if (utils::get_local_injector().enter("standard_role_manager_fail_legacy_query")) {
+        if (legacy_mode(_qp)) {
+            throw std::runtime_error("standard_role_manager::query_all: failed due to error injection");
+        }
+    }
+
    const auto results = co_await _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -95,7 +95,10 @@ private:

    future<> migrate_legacy_metadata();

-    future<> create_default_role_if_missing();
+    future<> legacy_create_default_role_if_missing();
+
+    future<> maybe_create_default_role();
+    future<> maybe_create_default_role_with_retries();

    future<> create_or_replace(std::string_view role_name, const role_config&, ::service::group0_batch&);

--- a/auth/transitional.cc
+++ b/auth/transitional.cc
@@ -37,8 +37,8 @@ class transitional_authenticator : public authenticator {
 public:
    static const sstring PASSWORD_AUTHENTICATOR_NAME;

-    transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm)
-            : transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm)) {
+    transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, utils::alien_worker& hashing_worker)
+            : transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, hashing_worker)) {
    }
    transitional_authenticator(std::unique_ptr<authenticator> a)
            : _authenticator(std::move(a)) {
@@ -239,7 +239,8 @@ static const class_registrator<
        auth::transitional_authenticator,
        cql3::query_processor&,
        ::service::raft_group0_client&,
-        ::service::migration_manager&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
+        ::service::migration_manager&,
+        utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");

 static const class_registrator<
        auth::authorizer,
--- a/cdc/log.cc
+++ b/cdc/log.cc
@@ -960,8 +960,12 @@ public:
    // Given a reference to such a column from the base schema, this function sets the corresponding column
    // in the log to the given value for the given row.
    void set_value(const clustering_key& log_ck, const column_definition& base_cdef, const managed_bytes_view& value) {
-        auto& log_cdef = *_log_schema.get_column_definition(log_data_column_name_bytes(base_cdef.name()));
-        _log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*base_cdef.type, _ts, value, _ttl));
+        auto log_cdef_ptr = _log_schema.get_column_definition(log_data_column_name_bytes(base_cdef.name()));
+        if (!log_cdef_ptr) {
+            throw exceptions::invalid_request_exception(format("CDC log schema for {}.{} does not have base column {}",
+                _log_schema.ks_name(), _log_schema.cf_name(), base_cdef.name_as_text()));
+        }
+        _log_mut.set_cell(log_ck, *log_cdef_ptr, atomic_cell::make_live(*base_cdef.type, _ts, value, _ttl));
    }

    // Each regular and static column in the base schema has a corresponding column in the log schema
@@ -969,7 +973,13 @@ public:
    // Given a reference to such a column from the base schema, this function sets the corresponding column
    // in the log to `true` for the given row. If not called, the column will be `null`.
    void set_deleted(const clustering_key& log_ck, const column_definition& base_cdef) {
-        _log_mut.set_cell(log_ck, log_data_column_deleted_name_bytes(base_cdef.name()), data_value(true), _ts, _ttl);
+        auto log_cdef_ptr = _log_schema.get_column_definition(log_data_column_deleted_name_bytes(base_cdef.name()));
+        if (!log_cdef_ptr) {
+            throw exceptions::invalid_request_exception(format("CDC log schema for {}.{} does not have base column {}",
+                _log_schema.ks_name(), _log_schema.cf_name(), base_cdef.name_as_text()));
+        }
+        auto& log_cdef = *log_cdef_ptr;
+        _log_mut.set_cell(log_ck, *log_cdef_ptr, atomic_cell::make_live(*log_cdef.type, _ts, log_cdef.type->decompose(true), _ttl));
    }

    // Each regular and static non-atomic column in the base schema has a corresponding column in the log schema
@@ -978,7 +988,12 @@ public:
    // Given a reference to such a column from the base schema, this function sets the corresponding column
    // in the log to the given set of keys for the given row.
    void set_deleted_elements(const clustering_key& log_ck, const column_definition& base_cdef, const managed_bytes& deleted_elements) {
-        auto& log_cdef = *_log_schema.get_column_definition(log_data_column_deleted_elements_name_bytes(base_cdef.name()));
+        auto log_cdef_ptr = _log_schema.get_column_definition(log_data_column_deleted_elements_name_bytes(base_cdef.name()));
+        if (!log_cdef_ptr) {
+            throw exceptions::invalid_request_exception(format("CDC log schema for {}.{} does not have base column {}",
+                _log_schema.ks_name(), _log_schema.cf_name(), base_cdef.name_as_text()));
+        }
+        auto& log_cdef = *log_cdef_ptr;
        _log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*log_cdef.type, _ts, deleted_elements, _ttl));
    }

@@ -1865,5 +1880,10 @@ bool cdc::cdc_service::needs_cdc_augmentation(const std::vector<mutation>& mutat

 future<std::tuple<std::vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>>
 cdc::cdc_service::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations, tracing::trace_state_ptr tr_state, db::consistency_level write_cl) {
+    if (utils::get_local_injector().enter("sleep_before_cdc_augmentation")) {
+        return seastar::sleep(std::chrono::milliseconds(100)).then([this, timeout, mutations = std::move(mutations), tr_state = std::move(tr_state), write_cl] () mutable {
+            return _impl->augment_mutation_call(timeout, std::move(mutations), std::move(tr_state), write_cl);
+        });
+    }
    return _impl->augment_mutation_call(timeout, std::move(mutations), std::move(tr_state), write_cl);
 }
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -1126,8 +1126,11 @@ future<> compaction_manager::drain() {
        // Disable the state so that it can be enabled later if requested.
        _state = state::disabled;
    }
+    _compaction_submission_timer.cancel();
    // Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
    co_await stop_ongoing_compactions("drain");
+    // Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
+    reevaluate_postponed_compactions();
    cmlog.info("Drained");
 }

--- a/compound.hh
+++ b/compound.hh
@@ -255,6 +255,9 @@ public:
    // Returns true iff given prefix has no missing components
    bool is_full(managed_bytes_view v) const {
        SCYLLA_ASSERT(AllowPrefixes == allow_prefixes::yes);
+        if (_types.size() == 0) {
+            return v.empty();
+        }
        return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
    }
    bool is_empty(managed_bytes_view v) const {
--- a/compress.cc
+++ b/compress.cc
@@ -15,6 +15,8 @@
 #include <seastar/core/metrics.hh>
 #include <seastar/core/sharded.hh>
 #include <seastar/core/weak_ptr.hh>
+#include <seastar/core/thread.hh>
+#include <seastar/core/reactor.hh>
 #include "utils/reusable_buffer.hh"
 #include "sstables/compress.hh"
 #include "sstables/exceptions.hh"
@@ -27,7 +29,7 @@

 // SHA256
 using dict_id = std::array<std::byte, 32>;
-class sstable_compressor_factory_impl;
+class dictionary_holder;

 static seastar::logger compressor_factory_logger("sstable_compressor_factory");

@@ -41,11 +43,11 @@ template <> struct fmt::formatter<compression_parameters::algorithm> : fmt::form
 // raw dicts might be used (and kept alive) directly by compressors (in particular, lz4 decompressor)
 // or referenced by algorithm-specific dicts.
 class raw_dict : public enable_lw_shared_from_this<raw_dict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    dict_id _id;
    std::vector<std::byte> _dict;
 public:
-    raw_dict(sstable_compressor_factory_impl& owner, dict_id key, std::span<const std::byte> dict);
+    raw_dict(dictionary_holder& owner, dict_id key, std::span<const std::byte> dict);
    ~raw_dict();
    const std::span<const std::byte> raw() const { return _dict; }
    dict_id id() const { return _id; }
@@ -79,13 +81,13 @@ struct zstd_callback_allocator {
 // (which internally holds a pointer to the raw dictionary blob
 // and parsed entropy tables).
 class zstd_ddict : public enable_lw_shared_from_this<zstd_ddict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    lw_shared_ptr<const raw_dict> _raw;
    size_t _used_memory = 0;
    zstd_callback_allocator _alloc;
    std::unique_ptr<ZSTD_DDict, decltype(&ZSTD_freeDDict)> _dict;
 public:
-    zstd_ddict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw);
+    zstd_ddict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw);
    ~zstd_ddict();
    auto dict() const { return _dict.get(); }
    auto raw() const { return _raw->raw(); }
@@ -100,14 +102,14 @@ public:
 // so the level of compression is decided at the time of construction
 // of this dict.
 class zstd_cdict : public enable_lw_shared_from_this<zstd_cdict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    lw_shared_ptr<const raw_dict> _raw;
    int _level;
    size_t _used_memory = 0;
    zstd_callback_allocator _alloc;
    std::unique_ptr<ZSTD_CDict, decltype(&ZSTD_freeCDict)> _dict;
 public:
-    zstd_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw, int level);
+    zstd_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw, int level);
    ~zstd_cdict();
    auto dict() const { return _dict.get(); }
    auto raw() const { return _raw->raw(); }
@@ -119,11 +121,11 @@ public:
 // and a hash index over the substrings of the blob).
 //
 class lz4_cdict : public enable_lw_shared_from_this<lz4_cdict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    lw_shared_ptr<const raw_dict> _raw;
    std::unique_ptr<LZ4_stream_t, decltype(&LZ4_freeStream)> _dict;
 public:
-    lz4_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw);
+    lz4_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw);
    ~lz4_cdict();
    auto dict() const { return _dict.get(); }
    auto raw() const { return _raw->raw(); }
@@ -164,6 +166,7 @@ public:
    size_t compress_max_size(size_t input_len) const override;
    std::map<sstring, sstring> options() const override;
    algorithm get_algorithm() const override;
+    std::optional<unsigned> get_dict_owner_for_test() const override;
 };

 class snappy_processor: public compressor {
@@ -266,6 +269,7 @@ public:
    size_t compress_max_size(size_t input_len) const override;
    algorithm get_algorithm() const override;
    std::map<sstring, sstring> options() const override;
+    std::optional<unsigned> get_dict_owner_for_test() const override;
 };

 zstd_processor::zstd_processor(const compression_parameters& opts, cdict_ptr cdict, ddict_ptr ddict) {
@@ -323,6 +327,16 @@ auto zstd_processor::get_algorithm() const -> algorithm {
    return (_cdict || _ddict) ? algorithm::zstd_with_dicts : algorithm::zstd;
 }

+std::optional<unsigned> zstd_processor::get_dict_owner_for_test() const {
+    if (_cdict) {
+        return _cdict.get_owner_shard();
+    } else if (_ddict) {
+        return _ddict.get_owner_shard();
+    } else {
+        return std::nullopt;
+    }
+}
+
 const std::string_view DICTIONARY_OPTION = ".dictionary.";

 static std::map<sstring, sstring> dict_as_options(std::span<const std::byte> d) {
@@ -384,6 +398,10 @@ std::map<sstring, sstring> compressor::options() const {
    return {};
 }

+std::optional<unsigned> compressor::get_dict_owner_for_test() const {
+    return std::nullopt;
+}
+
 std::string compressor::name() const {
    return compression_parameters::algorithm_to_qualified_name(get_algorithm());
 }
@@ -434,7 +452,7 @@ std::string_view compression_parameters::algorithm_to_name(algorithm alg) {
        case algorithm::snappy: return "SnappyCompressor";
        case algorithm::zstd: return "ZstdCompressor";
        case algorithm::zstd_with_dicts: return "ZstdWithDictsCompressor";
-        case algorithm::none: on_internal_error(compressor_factory_logger, "algorithm_to_name(): called with algorithm::none");
+        case algorithm::none: return "none"; // Name used only for logging purposes, can't be chosen by the user.
    }
    abort();
 }
@@ -518,13 +536,17 @@ compression_parameters::compression_parameters(const std::map<sstring, sstring>&
    }
 }

-void compression_parameters::validate(const gms::feature_service& fs) {
-    if (!fs.sstable_compression_dicts) {
-        if (_algorithm == algorithm::zstd_with_dicts || _algorithm == algorithm::lz4_with_dicts) {
+void compression_parameters::validate(dicts_feature_enabled dicts_enabled, dicts_usage_allowed dicts_allowed) {
+    if (_algorithm == algorithm::zstd_with_dicts || _algorithm == algorithm::lz4_with_dicts) {
+        if (!dicts_enabled) {
            throw std::runtime_error(std::format("sstable_compression {} can't be used before "
                                                 "all nodes are upgraded to a versions which supports it",
                                                 algorithm_to_name(_algorithm)));
        }
+        if (!dicts_allowed) {
+            throw std::runtime_error(std::format("sstable_compression {} has been disabled by `sstable_compression_dictionaries_allow_in_ddl: false`",
+                                                 algorithm_to_name(_algorithm)));
+        }
    }
    if (_chunk_length) {
        auto chunk_length = _chunk_length.value();
@@ -660,6 +682,16 @@ std::map<sstring, sstring> lz4_processor::options() const {
    }
 }

+std::optional<unsigned> lz4_processor::get_dict_owner_for_test() const {
+    if (_cdict) {
+        return _cdict.get_owner_shard();
+    } else if (_ddict) {
+        return _ddict.get_owner_shard();
+    } else {
+        return std::nullopt;
+    }
+}
+
 compressor_ptr make_lz4_sstable_compressor_for_tests() {
    return std::make_unique<lz4_processor>();
 }
@@ -751,21 +783,12 @@ size_t snappy_processor::compress_max_size(size_t input_len) const {
    return snappy_max_compressed_length(input_len);
 }

-// Constructs compressors and decompressors for SSTables,
-// making sure that the expensive identical parts (dictionaries) are shared
-// across nodes.
-//
 // Holds weak pointers to all live dictionaries
 // (so that they can be cheaply shared with new SSTables if an identical dict is requested),
 // and shared (lifetime-extending) pointers to the current writer ("recommended")
 // dict for each table (so that they can be shared with new SSTables without consulting
 // `system.dicts`).
 //
-// To make coordination work without resorting to std::mutex and such, dicts have owner shards,
-// (and are borrowed by foreign shared pointers) and all requests for a given dict ID go through its owner.
-// (Note: this shouldn't pose a performance problem because a dict is only requested once per an opening of an SSTable).
-// (Note: at the moment of this writing, one shard owns all. Later we can spread the ownership. (E.g. shard it by dict hash)).
-//
 // Whenever a dictionary dies (because its refcount reaches 0), its weak pointer
 // is removed from the factory.
 //
@@ -774,10 +797,10 @@ size_t snappy_processor::compress_max_size(size_t input_len) const {
 // Has a configurable memory budget for live dicts. If the budget is exceeded,
 // will return null dicts to new writers (to avoid making the memory usage even worse)
 // and print warnings.
-class sstable_compressor_factory_impl : public sstable_compressor_factory, public weakly_referencable<sstable_compressor_factory_impl> {
+class dictionary_holder : public weakly_referencable<dictionary_holder> {
    mutable logger::rate_limit budget_warning_rate_limit{std::chrono::minutes(10)};
-    shard_id _owner_shard;
-    config _cfg;
+    using config = default_sstable_compressor_factory::config;
+    const config& _cfg;
    uint64_t _total_live_dict_memory = 0;
    metrics::metric_groups _metrics;
    struct zstd_cdict_id {
@@ -789,7 +812,7 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
    std::map<zstd_cdict_id, const zstd_cdict*> _zstd_cdicts;
    std::map<dict_id, const zstd_ddict*> _zstd_ddicts;
    std::map<dict_id, const lz4_cdict*> _lz4_cdicts;
-    std::map<table_id, lw_shared_ptr<const raw_dict>> _recommended;
+    std::map<table_id, lw_shared_ptr<foreign_ptr<lw_shared_ptr<const raw_dict>>>> _recommended;

    size_t memory_budget() const {
        return _cfg.memory_fraction_starting_at_which_we_stop_writing_dicts() * seastar::memory::stats().total_memory();
@@ -806,8 +829,11 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
            memory_budget()
        );
    }
+public:
    lw_shared_ptr<const raw_dict> get_canonical_ptr(std::span<const std::byte> dict) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (dict.empty()) {
+            return nullptr;
+        }
        auto id = get_sha256(dict);
        if (auto it = _raw_dicts.find(id); it != _raw_dicts.end()) {
            return it->second->shared_from_this();
@@ -819,7 +845,9 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
    }
    using foreign_zstd_ddict = foreign_ptr<lw_shared_ptr<const zstd_ddict>>;
    foreign_zstd_ddict get_zstd_dict_for_reading(lw_shared_ptr<const raw_dict> raw, int level) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (!raw) {
+            return nullptr;
+        }
        lw_shared_ptr<const zstd_ddict> ddict;
        // Fo reading, we must allocate a new dict, even if memory budget is exceeded. We have no other choice.
        // In any case, if the budget is exceeded after we print a rate-limited warning about it.
@@ -835,15 +863,11 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
        }
        return make_foreign(std::move(ddict));
    }
-    future<foreign_zstd_ddict> get_zstd_dict_for_reading(std::span<const std::byte> dict, int level) {
-        return smp::submit_to(_owner_shard, [this, dict, level] -> foreign_zstd_ddict {
-            auto raw = get_canonical_ptr(dict);
-            return get_zstd_dict_for_reading(raw, level);
-        });
-    }
    using foreign_zstd_cdict = foreign_ptr<lw_shared_ptr<const zstd_cdict>>;
    foreign_zstd_cdict get_zstd_dict_for_writing(lw_shared_ptr<const raw_dict> raw, int level) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (!_cfg.enable_writing_dictionaries() || !raw) {
+            return nullptr;
+        }
        lw_shared_ptr<const zstd_cdict> cdict;
        // If we can share an already-allocated dict, we do that regardless of memory budget.
        // If we would have to allocate a new dict for writing, we only do that if we haven't exceeded
@@ -859,19 +883,6 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
        }
        return make_foreign(std::move(cdict));
    }
-    future<foreign_zstd_cdict> get_zstd_dict_for_writing(table_id t, int level) {
-        return smp::submit_to(_owner_shard, [this, t, level] -> foreign_zstd_cdict {
-            if (!_cfg.enable_writing_dictionaries()) {
-                return {};
-            }
-            auto rec_it = _recommended.find(t);
-            if (rec_it != _recommended.end()) {
-                return get_zstd_dict_for_writing(rec_it->second, level);
-            } else {
-                return {};
-            }
-        });
-    }
    using lz4_dicts = std::pair<
        foreign_ptr<lw_shared_ptr<const raw_dict>>,
        foreign_ptr<lw_shared_ptr<const lz4_cdict>>
@@ -879,18 +890,12 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
    using foreign_lz4_ddict = foreign_ptr<lw_shared_ptr<const raw_dict>>;
    using foreign_lz4_cdict = foreign_ptr<lw_shared_ptr<const lz4_cdict>>;
    foreign_lz4_ddict get_lz4_dict_for_reading(lw_shared_ptr<const raw_dict> raw) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
-        lw_shared_ptr<const raw_dict> ddict;
        return make_foreign(std::move(raw));
    }
-    future<foreign_lz4_ddict> get_lz4_dicts_for_reading(std::span<const std::byte> dict) {
-        return smp::submit_to(_owner_shard, [this, dict] -> foreign_lz4_ddict {
-            auto raw = get_canonical_ptr(dict);
-            return get_lz4_dict_for_reading(raw);
-        });
-    }
    foreign_lz4_cdict get_lz4_dict_for_writing(lw_shared_ptr<const raw_dict> raw) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (!_cfg.enable_writing_dictionaries() || !raw) {
+            return nullptr;
+        }
        lw_shared_ptr<const lz4_cdict> cdict;
        // If we can share an already-allocated dict, we do that regardless of memory budget.
        // If we would have to allocate a new dict for writing, we only do that if we haven't exceeded
@@ -905,24 +910,10 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
        }
        return make_foreign(std::move(cdict));
    }
-    future<foreign_lz4_cdict> get_lz4_dicts_for_writing(table_id t) {
-        return smp::submit_to(_owner_shard, [this, t] -> foreign_lz4_cdict {
-            if (!_cfg.enable_writing_dictionaries()) {
-                return {};
-            }
-            auto rec_it = _recommended.find(t);
-            if (rec_it != _recommended.end()) {
-                return get_lz4_dict_for_writing(rec_it->second);
-            } else {
-                return {};
-            }
-        });
-    }

 public:
-    sstable_compressor_factory_impl(config cfg)
-        : _owner_shard(this_shard_id())
-        , _cfg(std::move(cfg))
+    dictionary_holder(const config& cfg)
+        : _cfg(cfg)
    {
        if (_cfg.register_metrics) {
            namespace sm = seastar::metrics;
@@ -931,8 +922,8 @@ public:
            });
        }
    }
-    sstable_compressor_factory_impl(sstable_compressor_factory_impl&&) = delete;
-    ~sstable_compressor_factory_impl() {
+    dictionary_holder(dictionary_holder&&) = delete;
+    ~dictionary_holder() {
        // Note: `_recommended` might be the only thing keeping some dicts alive,
        // so clearing it will destroy them.
        //
@@ -948,39 +939,39 @@ public:
        _recommended.clear();
    }
    void forget_raw_dict(dict_id id) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _raw_dicts.erase(id);
    }
    void forget_zstd_cdict(dict_id id, int level) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _zstd_cdicts.erase({id, level});
    }
    void forget_zstd_ddict(dict_id id) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _zstd_ddicts.erase(id);
    }
    void forget_lz4_cdict(dict_id id) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _lz4_cdicts.erase(id);
    }
-    future<> set_recommended_dict(table_id t, std::span<const std::byte> dict) override {
-        return smp::submit_to(_owner_shard, [this, t, dict] {
-            _recommended.erase(t);
-            if (dict.size()) {
-                auto canonical_ptr = get_canonical_ptr(dict);
-                _recommended.emplace(t, canonical_ptr);
-                compressor_factory_logger.debug("set_recommended_dict: table={} size={} id={}",
-                    t, dict.size(), fmt_hex(canonical_ptr->id()));
-            } else {
-                compressor_factory_logger.debug("set_recommended_dict: table={} size=0", t);
-            }
-        });
+    void set_recommended_dict(table_id t, foreign_ptr<lw_shared_ptr<const raw_dict>> dict) {
+        _recommended.erase(t);
+        if (dict) {
+            compressor_factory_logger.debug("set_recommended_dict: table={} size={} id={}",
+                t, dict->raw().size(), fmt_hex(dict->id()));
+            _recommended.emplace(t, make_lw_shared(std::move(dict)));
+        } else {
+            compressor_factory_logger.debug("set_recommended_dict: table={} size=0", t);
+        }
+    }
+    future<foreign_ptr<lw_shared_ptr<const raw_dict>>> get_recommended_dict(table_id t) {
+        auto rec_it = _recommended.find(t);
+        if (rec_it == _recommended.end()) {
+            co_return nullptr;
+        }
+        // Note that rec_it might be invalidated while we are doing the copy(),
+        // so we have to make a copy of the outer shared ptr first.
+        lw_shared_ptr<foreign_ptr<lw_shared_ptr<const raw_dict>>> ptr = rec_it->second;
+        co_return co_await ptr->copy();
    }
-    future<compressor_ptr> make_compressor_for_writing(schema_ptr) override;
-    future<compressor_ptr> make_compressor_for_reading(sstables::compression&) override;

    void account_memory_delta(ssize_t n) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        if (static_cast<ssize_t>(_total_live_dict_memory) + n < 0) {
            compressor_factory_logger.error(
                "Error in dictionary memory accounting: delta {} brings live memory {} below 0",
@@ -990,19 +981,85 @@ public:
    }
 };

+default_sstable_compressor_factory::default_sstable_compressor_factory(config cfg)
+    : _cfg(std::move(cfg))
+    , _holder(std::make_unique<dictionary_holder>(_cfg))
+{
+    for (shard_id i = 0; i < smp::count; ++i) {
+        auto numa_id = _cfg.numa_config[i];
+        _numa_groups.resize(std::max<size_t>(_numa_groups.size(), numa_id + 1));
+        _numa_groups[numa_id].push_back(i);
+    }
+}

-future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_writing(schema_ptr s) {
-    const auto params = s->get_compressor_params();
+default_sstable_compressor_factory::~default_sstable_compressor_factory() {
+}
+
+std::vector<unsigned> default_sstable_compressor_factory_config::get_default_shard_to_numa_node_mapping() {
+    auto sp = local_engine->smp().shard_to_numa_node_mapping();
+    return std::vector<unsigned>(sp.begin(), sp.end());
+}
+
+unsigned default_sstable_compressor_factory::local_numa_id() {
+    return _cfg.numa_config[this_shard_id()];
+}
+
+shard_id default_sstable_compressor_factory::get_dict_owner(unsigned numa_id, const sha256_type& sha) {
+    auto hash = read_unaligned<uint64_t>(sha.data());
+    const auto& group = _numa_groups[numa_id];
+    if (group.empty()) {
+        on_internal_error(compressor_factory_logger, "get_dict_owner called on an empty NUMA group");
+    }
+    return group[hash % group.size()];
+}
+
+future<> default_sstable_compressor_factory::set_recommended_dict_local(table_id t, std::span<const std::byte> dict) {
+    if (_leader_shard != this_shard_id()) {
+        on_internal_error(compressor_factory_logger, fmt::format("set_recommended_dict_local called on wrong shard. Expected: {}, got {}", _leader_shard, this_shard_id()));
+    }
+    auto units = co_await get_units(_recommendation_setting_sem, 1);
+    auto sha = get_sha256(dict);
+    for (unsigned numa_id = 0; numa_id < _numa_groups.size(); ++numa_id) {
+        const auto& group = _numa_groups[numa_id];
+        if (group.empty()) {
+            continue;
+        }
+        auto r = get_dict_owner(numa_id, sha);
+        auto d = co_await container().invoke_on(r, [dict](self& local) {
+            return make_foreign(local._holder->get_canonical_ptr(dict));
+        });
+        auto local_coordinator = group[0];
+        co_await container().invoke_on(local_coordinator, coroutine::lambda([t, d = std::move(d)](self& local) mutable {
+            local._holder->set_recommended_dict(t, std::move(d));
+        }));
+    }
+}
+
+future<> default_sstable_compressor_factory::set_recommended_dict(table_id t, std::span<const std::byte> dict) {
+    return container().invoke_on(_leader_shard, &self::set_recommended_dict_local, t, dict);
+}
+
+future<foreign_ptr<lw_shared_ptr<const raw_dict>>> default_sstable_compressor_factory::get_recommended_dict(table_id t) {
+    const auto local_coordinator = _numa_groups[local_numa_id()][0];
+    return container().invoke_on(local_coordinator, [t](self& local) {
+        return local._holder->get_recommended_dict(t);
+    });
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_writing_impl(const compression_parameters& params, table_id id) {
    using algorithm = compression_parameters::algorithm;
    const auto algo = params.get_algorithm();
-    compressor_factory_logger.debug("make_compressor_for_writing: table={} algo={}", s->id(), algo);
+    compressor_factory_logger.debug("make_compressor_for_writing: table={} algo={}", id, algo);
    switch (algo) {
    case algorithm::lz4:
        co_return std::make_unique<lz4_processor>(nullptr, nullptr);
    case algorithm::lz4_with_dicts: {
-        auto cdict = _cfg.enable_writing_dictionaries()
-            ? co_await get_lz4_dicts_for_writing(s->id())
-            : nullptr;
+        holder::foreign_lz4_cdict cdict;
+        if (auto recommended = co_await get_recommended_dict(id)) {
+            cdict = co_await container().invoke_on(recommended.get_owner_shard(), [recommended = std::move(recommended)] (self& local) mutable {
+                return local._holder->get_lz4_dict_for_writing(recommended.release());
+            });
+        }
        if (cdict) {
            compressor_factory_logger.debug("make_compressor_for_writing: using dict id={}", fmt_hex(cdict->id()));
        }
@@ -1015,9 +1072,13 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_writ
    case algorithm::zstd:
        co_return std::make_unique<zstd_processor>(params, nullptr, nullptr);
    case algorithm::zstd_with_dicts: {
-        auto cdict = _cfg.enable_writing_dictionaries()
-            ? co_await get_zstd_dict_for_writing(s->id(), params.zstd_compression_level().value_or(ZSTD_defaultCLevel()))
-            : nullptr;
+        holder::foreign_zstd_cdict cdict;
+        if (auto recommended = co_await get_recommended_dict(id)) {
+            auto level = params.zstd_compression_level().value_or(ZSTD_defaultCLevel());
+            cdict = co_await container().invoke_on(recommended.get_owner_shard(), [level, recommended = std::move(recommended)] (self& local) mutable {
+                return local._holder->get_zstd_dict_for_writing(recommended.release(), level);
+            });
+        }
        if (cdict) {
            compressor_factory_logger.debug("make_compressor_for_writing: using dict id={}", fmt_hex(cdict->id()));
        }
@@ -1029,17 +1090,28 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_writ
    abort();
 }

-future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_reading(sstables::compression& c) {
-    const auto params = compression_parameters(sstables::options_from_compression(c));
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_writing(schema_ptr s) {
+    return make_compressor_for_writing_impl(s->get_compressor_params(), s->id());
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_writing_for_tests(const compression_parameters& params, table_id id) {
+    return make_compressor_for_writing_impl(params, id);
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_reading_impl(const compression_parameters& params, std::span<const std::byte> dict) {
    using algorithm = compression_parameters::algorithm;
    const auto algo = params.get_algorithm();
-    compressor_factory_logger.debug("make_compressor_for_reading: compression={} algo={}", fmt::ptr(&c), algo);
    switch (algo) {
    case algorithm::lz4:
        co_return std::make_unique<lz4_processor>(nullptr, nullptr);
    case algorithm::lz4_with_dicts: {
-        auto dict = dict_from_options(c);
-        auto ddict = co_await get_lz4_dicts_for_reading(std::as_bytes(std::span(*dict)));
+        auto dict_span = dict;
+        auto sha = get_sha256(dict_span);
+        auto dict_owner = get_dict_owner(local_numa_id(), sha);
+        auto ddict = co_await container().invoke_on(dict_owner, [dict_span] (self& local) mutable {
+            auto d = local._holder->get_canonical_ptr(dict_span);
+            return local._holder->get_lz4_dict_for_reading(std::move(d));
+        });
        if (ddict) {
            compressor_factory_logger.debug("make_compressor_for_reading: using dict id={}", fmt_hex(ddict->id()));
        }
@@ -1054,8 +1126,13 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_read
    }
    case algorithm::zstd_with_dicts: {
        auto level = params.zstd_compression_level().value_or(ZSTD_defaultCLevel());
-        auto dict = dict_from_options(c);
-        auto ddict = co_await get_zstd_dict_for_reading(std::as_bytes(std::span(*dict)), level);
+        auto dict_span = dict;
+        auto sha = get_sha256(dict_span);
+        auto dict_owner = get_dict_owner(local_numa_id(), sha);
+        auto ddict = co_await container().invoke_on(dict_owner, [level, dict_span] (self& local) mutable {
+            auto d = local._holder->get_canonical_ptr(dict_span);
+            return local._holder->get_zstd_dict_for_reading(std::move(d), level);
+        });
        if (ddict) {
            compressor_factory_logger.debug("make_compressor_for_reading: using dict id={}", fmt_hex(ddict->id()));
        }
@@ -1067,7 +1144,19 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_read
    abort();
 }

-raw_dict::raw_dict(sstable_compressor_factory_impl& owner, dict_id key, std::span<const std::byte> dict)
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_reading(sstables::compression& c) {
+    const auto params = compression_parameters(sstables::options_from_compression(c));
+    auto dict = dict_from_options(c);
+    const auto algo = params.get_algorithm();
+    compressor_factory_logger.debug("make_compressor_for_reading: compression={} algo={}", fmt::ptr(&c), algo);
+    co_return co_await make_compressor_for_reading_impl(params, std::as_bytes(std::span(*dict)));
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_reading_for_tests(const compression_parameters& params, std::span<const std::byte> dict) {
+    return make_compressor_for_reading_impl(params, dict);
+}
+
+raw_dict::raw_dict(dictionary_holder& owner, dict_id key, std::span<const std::byte> dict)
    : _owner(owner.weak_from_this())
    , _id(key)
    , _dict(dict.begin(), dict.end())
@@ -1082,7 +1171,7 @@ raw_dict::~raw_dict() {
    }
 }

-zstd_cdict::zstd_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw, int level)
+zstd_cdict::zstd_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw, int level)
    : _owner(owner.weak_from_this())
    , _raw(raw)
    , _level(level)
@@ -1114,7 +1203,7 @@ zstd_cdict::~zstd_cdict() {
    }
 }

-zstd_ddict::zstd_ddict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw)
+zstd_ddict::zstd_ddict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw)
    : _owner(owner.weak_from_this())
    , _raw(raw)
    , _alloc([this] (ssize_t n) {
@@ -1143,7 +1232,7 @@ zstd_ddict::~zstd_ddict() {
    }
 }

-lz4_cdict::lz4_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw)
+lz4_cdict::lz4_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw)
    : _owner(owner.weak_from_this())
    , _raw(raw)
    , _dict(LZ4_createStream(), LZ4_freeStream)
@@ -1162,6 +1251,28 @@ lz4_cdict::~lz4_cdict() {
    }
 }

-std::unique_ptr<sstable_compressor_factory> make_sstable_compressor_factory(sstable_compressor_factory::config cfg) {
-    return std::make_unique<sstable_compressor_factory_impl>(std::move(cfg));
+std::unique_ptr<sstable_compressor_factory> make_sstable_compressor_factory_for_tests_in_thread() {
+    SCYLLA_ASSERT(thread::running_in_thread());
+    struct wrapper : sstable_compressor_factory {
+        using impl = default_sstable_compressor_factory;
+        sharded<impl> _impl;
+        future<compressor_ptr> make_compressor_for_writing(schema_ptr s) override {
+            return _impl.local().make_compressor_for_writing(s);
+        }
+        future<compressor_ptr> make_compressor_for_reading(sstables::compression& c) override {
+            return _impl.local().make_compressor_for_reading(c);
+        }
+        future<> set_recommended_dict(table_id t, std::span<const std::byte> d) override {
+            return _impl.local().set_recommended_dict(t, d);
+        };
+        wrapper(wrapper&&) = delete;
+        wrapper() {
+            _impl.start().get();
+        }
+        ~wrapper() {
+            _impl.stop().get();
+        }
+    };
+    return std::make_unique<wrapper>();
 }
+
--- a/compress.hh
+++ b/compress.hh
@@ -13,12 +13,9 @@

 #include <seastar/core/future.hh>
 #include <seastar/core/sstring.hh>
+#include <seastar/util/bool_class.hh>
 #include "seastarx.hh"

-namespace gms {
-class feature_service;
-} // namespace gms
-
 class compression_parameters;

 class compressor {
@@ -64,6 +61,8 @@ public:

    virtual algorithm get_algorithm() const = 0;

+    virtual std::optional<unsigned> get_dict_owner_for_test() const;
+
    using ptr_type = std::unique_ptr<compressor>;
 };

@@ -106,7 +105,10 @@ public:
    algorithm get_algorithm() const { return _algorithm; }
    std::optional<int> zstd_compression_level() const { return _zstd_compression_level; }

-    void validate(const gms::feature_service&);
+    using dicts_feature_enabled = bool_class<struct dicts_feature_enabled_tag>;
+    using dicts_usage_allowed = bool_class<struct dicts_usage_allowed_tag>;
+    void validate(dicts_feature_enabled, dicts_usage_allowed);
+
    std::map<sstring, sstring> get_options() const;

    bool compression_enabled() const { 
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -855,3 +855,18 @@ rf_rack_valid_keyspaces: false
 # Maximum number of items in single BatchWriteItem command. Default is 100.
 # Note: DynamoDB has a hard-coded limit of 25.
 # alternator_max_items_in_batch_write: 100
+
+# 
+# io-streaming rate limiting
+# When setting this value to be non-zero scylla throttles disk throughput for
+# stream (network) activities such as backup, repair, tablet migration and more.
+# This limit is useful for user queries so the network interface does 
+# not get saturated by streaming activities.
+# The recommended value is 75% of network bandwidth
+# E.g for i4i.8xlarge (https://github.com/scylladb/scylla-machine-image/tree/next/common/aws_net_params.json):
+# network: 18.75 GiB/s --> 18750 Mib/s --> 1875 MB/s (from network bits to network bytes: divide by 10, not 8)
+# Converted to disk bytes: 1875 * 1000 / 1024 = 1831 MB/s (disk wise)
+# 75% of disk bytes is: 0.75 * 1831 = 1373 megabytes/s
+# stream_io_throughput_mb_per_sec: 1373
+# 
+
--- a/configure.py
+++ b/configure.py
@@ -981,6 +981,7 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/result_set.cc',
                'cql3/prepare_context.cc',
                'db/batchlog_manager.cc',
+                'db/corrupt_data_handler.cc',
                'db/commitlog/commitlog.cc',
                'db/commitlog/commitlog_entry.cc',
                'db/commitlog/commitlog_replayer.cc',
@@ -1034,6 +1035,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/multiprecision_int.cc',
                'utils/gz/crc_combine.cc',
                'utils/gz/crc_combine_table.cc',
+                'utils/http.cc',
                'utils/s3/aws_error.cc',
                'utils/s3/client.cc',
                'utils/s3/retryable_http_client.cc',
@@ -1338,6 +1340,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/replica_exception.idl.hh',
        'idl/per_partition_rate_limit_info.idl.hh',
        'idl/position_in_partition.idl.hh',
+        'idl/full_position.idl.hh',
        'idl/experimental/broadcast_tables_lang.idl.hh',
        'idl/storage_service.idl.hh',
        'idl/join_node.idl.hh',
@@ -1538,6 +1541,7 @@ deps['test/boost/combined_tests'] += [
    'test/boost/secondary_index_test.cc',
    'test/boost/sessions_test.cc',
    'test/boost/sstable_compaction_test.cc',
+    'test/boost/sstable_compressor_factory_test.cc',
    'test/boost/sstable_directory_test.cc',
    'test/boost/sstable_set_test.cc',
    'test/boost/statement_restrictions_test.cc',
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -267,33 +267,44 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
            muts.insert(muts.begin(), schema_mutations.begin(), schema_mutations.end());
        }

+        auto rs = locator::abstract_replication_strategy::create_replication_strategy(
+                ks_md_update->strategy_name(),
+                locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets()));
+
        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
        // would lead to an RF-rack-valid keyspace. Verify that this change does not.
        // For more context, see: scylladb/scylladb#23071.
-        if (qp.db().get_config().rf_rack_valid_keyspaces()) {
-            auto rs = locator::abstract_replication_strategy::create_replication_strategy(
-                    ks_md_update->strategy_name(),
-                    locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets()));
-
-            try {
-                // There are two things to note here:
-                // 1. We hold a group0_guard, so it's correct to check this here.
-                //    The topology or schema cannot change while we're performing this query.
-                // 2. The replication strategy we use here does NOT represent the actual state
-                //    we will arrive at after applying the schema change. For instance, if the user
-                //    did not specify the RF for some of the DCs, it's equal to 0 in the replication
-                //    strategy we pass to this function, while in reality that means that the RF
-                //    will NOT change. That is not a problem:
-                //    - RF=0 is valid for all DCs, so it won't trigger an exception on its own,
-                //    - the keyspace must've been RF-rack-valid before this change. We check that
-                //      condition for all keyspaces at startup.
-                //    The second hyphen is not really true because currently topological changes can
-                //    disturb it (see scylladb/scylladb#23345), but we ignore that.
-                locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-            } catch (const std::exception& e) {
+        try {
+            // There are two things to note here:
+            // 1. We hold a group0_guard, so it's correct to check this here.
+            //    The topology or schema cannot change while we're performing this query.
+            // 2. The replication strategy we use here does NOT represent the actual state
+            //    we will arrive at after applying the schema change. For instance, if the user
+            //    did not specify the RF for some of the DCs, it's equal to 0 in the replication
+            //    strategy we pass to this function, while in reality that means that the RF
+            //    will NOT change. That is not a problem:
+            //    - RF=0 is valid for all DCs, so it won't trigger an exception on its own,
+            //    - the keyspace must've been RF-rack-valid before this change. We check that
+            //      condition for all keyspaces at startup.
+            //    The second hyphen is not really true because currently topological changes can
+            //    disturb it (see scylladb/scylladb#23345), but we ignore that.
+            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
+        } catch (const std::exception& e) {
+            if (qp.db().get_config().rf_rack_valid_keyspaces()) {
                // There's no guarantee what the type of the exception will be, so we need to
                // wrap it manually here in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
+            } else {
+                // Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
+                // we'd like to inform the user that the keyspace they're altering will not
+                // satisfy the restriction after the change--but just as a warning.
+                // For more context, see issue: scylladb/scylladb#23330.
+                warnings.push_back(seastar::format(
+                    "Keyspace '{}' is not RF-rack-valid: the replication factor doesn't match "
+                    "the rack count in at least one datacenter. A rack failure may reduce availability. "
+                    "For more context, see: "
+                    "https://docs.scylladb.com/manual/stable/reference/glossary.html#term-RF-rack-valid-keyspace.",
+                    _name));
            }
        }

--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -8,6 +8,7 @@
 * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
 */

+#include "cdc/log.hh"
 #include "utils/assert.hh"
 #include <seastar/core/coroutine.hh>
 #include "cql3/query_options.hh"
@@ -27,6 +28,7 @@
 #include "db/view/view.hh"
 #include "cql3/query_processor.hh"
 #include "cdc/cdc_extension.hh"
+#include "cdc/cdc_partitioner.hh"

 namespace cql3 {

@@ -290,6 +292,53 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
        throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View");
    }

+    const bool is_cdc_log_table = cdc::is_log_for_some_table(db.real_database(), s->ks_name(), s->cf_name());
+    // Only a CDC log table will have this partitioner name. User tables should
+    // not be able to set this. Note that we perform a similar check when trying to
+    // re-enable CDC for a table, when the log table has been replaced by a user table.
+    // For better visualization of the above, consider this
+    //
+    // cqlsh> CREATE TABLE ks.t (p int PRIMARY KEY, v int) WITH cdc = {'enabled': true};
+    // cqlsh> INSERT INTO ks.t (p, v) VALUES (1, 2);
+    // cqlsh> ALTER TABLE ks.t WITH cdc = {'enabled': false};
+    // cqlsh> DESC TABLE ks.t_scylla_cdc_log WITH INTERNALS; # Save this output!
+    // cqlsh> DROP TABLE ks.t_scylla_cdc_log;
+    // cqlsh> [Recreate the log table using the received statement]
+    // cqlsh> ALTER TABLE ks.t WITH cdc = {'enabled': true};
+    //
+    // InvalidRequest: Error from server: code=2200 [Invalid query] message="Cannot create CDC log
+    //                 table for table ks.t because a table of name ks.t_scylla_cdc_log already exists"
+    //
+    // See commit adda43edc75b901b2329bca8f3eb74596698d05f for more information on THAT case.
+    // We reuse the same technique here.
+    const bool was_cdc_log_table = s->get_partitioner().name() == cdc::cdc_partitioner::classname;
+
+    if (_column_changes.size() != 0 && is_cdc_log_table) {
+        throw exceptions::invalid_request_exception(
+                "You cannot modify the set of columns of a CDC log table directly. "
+                "Modify the base table instead.");
+    }
+    if (_column_changes.size() != 0 && was_cdc_log_table) {
+        throw exceptions::invalid_request_exception(
+                "You cannot modify the set of columns of a CDC log table directly. "
+                "Although the base table has deactivated CDC, this table will continue being "
+                "a CDC log table until it is dropped. If you want to modify the columns in it, "
+                "you can only do that by reenabling CDC on the base table, which will reattach "
+                "this log table. Then you will be able to modify the columns in the base table, "
+                "and that will have effect on the log table too. Modifying the columns of a CDC "
+                "log table directly is never allowed.");
+    }
+
+    if (_renames.size() != 0 && is_cdc_log_table) {
+        throw exceptions::invalid_request_exception("Cannot rename a column of a CDC log table.");
+    }
+    if (_renames.size() != 0 && was_cdc_log_table) {
+        throw exceptions::invalid_request_exception(
+                "You cannot rename a column of a CDC log table. Although the base table "
+                "has deactivated CDC, this table will continue being a CDC log table until it "
+                "is dropped.");
+    }
+
    auto cfm = schema_builder(s);

    if (_properties->get_id()) {
--- a/cql3/statements/cf_prop_defs.cc
+++ b/cql3/statements/cf_prop_defs.cc
@@ -23,6 +23,7 @@
 #include "db/per_partition_rate_limit_options.hh"
 #include "db/tablet_options.hh"
 #include "utils/bloom_calculations.hh"
+#include "db/config.hh"

 #include <boost/algorithm/string/predicate.hpp>

@@ -135,7 +136,9 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
            throw exceptions::configuration_exception(sstring("Missing sub-option '") + compression_parameters::SSTABLE_COMPRESSION + "' for the '" + KW_COMPRESSION + "' option.");
        }
        compression_parameters cp(*compression_options);
-        cp.validate(db.features());
+        cp.validate(
+            compression_parameters::dicts_feature_enabled(bool(db.features().sstable_compression_dicts)),
+            compression_parameters::dicts_usage_allowed(db.get_config().sstable_compression_dictionaries_allow_in_ddl()));
    }

    auto per_partition_rate_limit_options = get_per_partition_rate_limit_options(schema_extensions);
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -113,10 +113,9 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector
        if (rs->uses_tablets()) {
            warnings.push_back(
                "Tables in this keyspace will be replicated using Tablets "
-                "and will not support CDC, LWT and counters features. "
-                "To use CDC, LWT or counters, drop this keyspace and re-create it "
-                "without tablets by adding AND TABLETS = {'enabled': false} "
-                "to the CREATE KEYSPACE statement.");
+                "and will not support Materialized Views, Secondary Indexes, CDC, LWT and counters features. "
+                "To use Materialized Views, Secondary Indexes, CDC, LWT or counters, drop this keyspace and re-create it "
+                "without tablets by adding AND TABLETS = {'enabled': false} to the CREATE KEYSPACE statement.");
            if (ksm->initial_tablets().value()) {
                warnings.push_back("Keyspace `initial` tablets option is deprecated.  Use per-table tablet options instead.");
            }
@@ -125,15 +124,26 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector
        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
        // Verify that it's RF-rack-valid.
        // For more context, see: scylladb/scylladb#23071.
-        if (cfg.rf_rack_valid_keyspaces()) {
-            try {
-                // We hold a group0_guard, so it's correct to check this here.
-                // The topology or schema cannot change while we're performing this query.
-                locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
-            } catch (const std::exception& e) {
+        try {
+            // We hold a group0_guard, so it's correct to check this here.
+            // The topology or schema cannot change while we're performing this query.
+            locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
+        } catch (const std::exception& e) {
+            if (cfg.rf_rack_valid_keyspaces()) {
                // There's no guarantee what the type of the exception will be, so we need to
                // wrap it manually here in a type that can be passed to the user.
                throw exceptions::invalid_request_exception(e.what());
+            } else {
+                // Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
+                // we'd like to inform the user that the keyspace they're creating does not
+                // satisfy the restriction--but just as a warning.
+                // For more context, see issue: scylladb/scylladb#23330.
+                warnings.push_back(seastar::format(
+                    "Keyspace '{}' is not RF-rack-valid: the replication factor doesn't match "
+                    "the rack count in at least one datacenter. A rack failure may reduce availability. "
+                    "For more context, see: "
+                    "https://docs.scylladb.com/manual/stable/reference/glossary.html#term-RF-rack-valid-keyspace.",
+                    _name));
            }
        }
    } catch (const exceptions::already_exists_exception& e) {
--- a/db/CMakeLists.txt
+++ b/db/CMakeLists.txt
@@ -27,6 +27,7 @@ target_sources(db
    extensions.cc
    heat_load_balance.cc
    large_data_handler.cc
+    corrupt_data_handler.cc
    marshal/type_parser.cc
    batchlog_manager.cc
    tags/utils.cc
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -36,7 +36,7 @@

 static logging::logger blogger("batchlog_manager");

-const uint32_t db::batchlog_manager::replay_interval;
+const std::chrono::seconds db::batchlog_manager::replay_interval;
 const uint32_t db::batchlog_manager::page_size;

 db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
@@ -116,7 +116,8 @@ future<> db::batchlog_manager::batchlog_replay_loop() {
        } catch (...) {
            blogger.error("Exception in batch replay: {}", std::current_exception());
        }
-        delay = std::chrono::milliseconds(replay_interval);
+        delay = utils::get_local_injector().is_enabled("short_batchlog_manager_replay_interval") ?
+                std::chrono::seconds(1) : replay_interval;
    }
 }

@@ -132,6 +133,8 @@ future<> db::batchlog_manager::drain() {
        _sem.broken();
    }

+    co_await _qp.proxy().abort_batch_writes();
+
    co_await std::move(_loop_done);
    blogger.info("Drained");
 }
@@ -173,6 +176,11 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
            return make_ready_future<stop_iteration>(stop_iteration::no);
        }

+        if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
+            blogger.debug("Skipping batch replay due to skip_batch_replay injection");
+            return make_ready_future<stop_iteration>(stop_iteration::no);
+        }
+
        // check version of serialization format
        if (!row.has("version")) {
            blogger.warn("Skipping logged batch because of unknown version");
@@ -242,7 +250,8 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
                // send to partially or wholly fail in actually sending stuff. Since we don't
                // have hints (yet), send with CL=ALL, and hope we can re-do this soon.
                // See below, we use retry on write failure.
-                return _qp.proxy().mutate(mutations, db::consistency_level::ALL, db::no_timeout, nullptr, empty_service_permit(), db::allow_per_partition_rate_limit::no);
+                auto timeout = db::timeout_clock::now() + write_timeout;
+                return _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
            });
        }).then_wrapped([this, id](future<> batch_result) {
            try {
--- a/db/batchlog_manager.hh
+++ b/db/batchlog_manager.hh
@@ -43,8 +43,9 @@ public:
    using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;

 private:
-    static constexpr uint32_t replay_interval = 60 * 1000; // milliseconds
+    static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
    static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
+    static constexpr std::chrono::seconds write_timeout = std::chrono::seconds(300);

    using clock_type = lowres_clock;

--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -800,6 +800,8 @@ class db::commitlog::segment : public enable_shared_from_this<segment>, public c
    void end_flush() {
        _segment_manager->end_flush();
        if (can_delete()) {
+            // #25709 - do this early if possible
+            _extended_segments.clear();
            _segment_manager->discard_unused_segments();
        }
    }
@@ -875,6 +877,8 @@ public:
    void release_cf_count(const cf_id_type& cf) {
        mark_clean(cf, 1);
        if (can_delete()) {
+            // #25709 - do this early if possible
+            _extended_segments.clear();
            _segment_manager->discard_unused_segments();
        }
    }
@@ -2576,20 +2580,24 @@ struct fmt::formatter<db::commitlog::segment::cf_mark> {
 void db::commitlog::segment_manager::discard_unused_segments() noexcept {
    clogger.trace("Checking for unused segments ({} active)", _segments.size());

-    std::erase_if(_segments, [=](sseg_ptr s) {
-        if (s->can_delete()) {
-            clogger.debug("Segment {} is unused", *s);
-            return true;
-        }
-        if (s->is_still_allocating()) {
-            clogger.debug("Not safe to delete segment {}; still allocating.", *s);
-        } else if (!s->is_clean()) {
-            clogger.debug("Not safe to delete segment {}; dirty is {}", *s, segment::cf_mark {*s});
-        } else {
-            clogger.debug("Not safe to delete segment {}; disk ops pending", *s);
-        }
-        return false;
-    });
+    // #25709 ensure we don't free any segment until after prune.
+    {
+        auto tmp = _segments; 
+        std::erase_if(_segments, [=](sseg_ptr s) {
+            if (s->can_delete()) {
+                clogger.debug("Segment {} is unused", *s);
+                return true;
+            }
+            if (s->is_still_allocating()) {
+                clogger.debug("Not safe to delete segment {}; still allocating.", *s);
+            } else if (!s->is_clean()) {
+                clogger.debug("Not safe to delete segment {}; dirty is {}", *s, segment::cf_mark {*s});
+            } else {
+                clogger.debug("Not safe to delete segment {}; disk ops pending", *s);
+            }
+            return false;
+        });
+    }

    // launch in background, but guard with gate so this deletion is
    // sure to finish in shutdown, because at least through this path,
@@ -2878,7 +2886,10 @@ future<> db::commitlog::segment_manager::do_pending_deletes() {
 }

 future<> db::commitlog::segment_manager::orphan_all() {
-    _segments.clear();
+    // #25709. the actual process of destroying the elements here
+    // might cause a call into discard_unused_segments.
+    // ensure the target vector is empty when we get to destructors
+    auto tmp = std::exchange(_segments, {});
    return clear_reserve_segments();
 }

@@ -3255,9 +3266,13 @@ const db::commitlog::config& db::commitlog::active_config() const {
    return _segment_manager->cfg;
 }

+db::commitlog::segment_data_corruption_error::segment_data_corruption_error(std::string_view msg, uint64_t s)
+    : _msg(fmt::format("Segment data corruption: {}", msg))
+    , _bytes(s)
+{}

-db::commitlog::segment_truncation::segment_truncation(uint64_t pos) 
-    : _msg(fmt::format("Segment truncation at {}", pos))
+db::commitlog::segment_truncation::segment_truncation(std::string_view reason, uint64_t pos)
+    : _msg(fmt::format("Segment truncation at {}. Reason: {}", pos, reason))
    , _pos(pos)
 {}

@@ -3447,7 +3462,8 @@ db::commitlog::read_log_file(const replay_state& state, sstring filename, sstrin

            while (rem < size) {
                if (eof) {
-                    throw segment_truncation(block_boundry);
+                    auto reason = fmt::format("unexpected EOF, rem={}, size={}", rem, size);
+                    throw segment_truncation(std::move(reason), block_boundry);
                }

                auto block_size = alignment - initial.size_bytes();
@@ -3458,7 +3474,8 @@ db::commitlog::read_log_file(const replay_state& state, sstring filename, sstrin

                if (tmp.size_bytes() == 0) {
                    eof = true;
-                    throw segment_truncation(block_boundry);
+                    auto reason = fmt::format("read 0 bytes, while tried to read {}", block_size);
+                    throw segment_truncation(std::move(reason), block_boundry);
                }

                crc32_nbo crc;
@@ -3493,10 +3510,12 @@ db::commitlog::read_log_file(const replay_state& state, sstring filename, sstrin
                    auto checksum = crc.checksum();

                    if (check != checksum) {
-                        throw segment_data_corruption_error("Data corruption", alignment);
+                        auto reason = fmt::format("checksums do not match: {:x} vs. {:x}", check, checksum);
+                        throw segment_data_corruption_error(std::move(reason), alignment);
                    }
                    if (id != this->id) {
-                        throw segment_truncation(pos + rem);
+                        auto reason = fmt::format("IDs do not match: {} vs. {}", id, this->id);
+                        throw segment_truncation(std::move(reason), pos + rem);
                    }
                }
                tmp.remove_suffix(detail::sector_overhead_size);
@@ -3771,7 +3790,8 @@ db::commitlog::read_log_file(const replay_state& state, sstring filename, sstrin
                    co_await read_chunk();
                }
                if (corrupt_size > 0) {
-                    throw segment_data_corruption_error("Data corruption", corrupt_size);
+                    auto reason = fmt::format("corrupted size while reading file: {}", corrupt_size);
+                    throw segment_data_corruption_error(std::move(reason), corrupt_size);
                }
            } catch (...) {
                p = std::current_exception();
--- a/db/commitlog/commitlog.hh
+++ b/db/commitlog/commitlog.hh
@@ -392,9 +392,7 @@ public:
    class segment_data_corruption_error: public segment_error {
        std::string _msg;
    public:
-        segment_data_corruption_error(std::string msg, uint64_t s)
-                : _msg(std::move(msg)), _bytes(s) {
-        }
+        segment_data_corruption_error(std::string_view msg, uint64_t s);
        uint64_t bytes() const {
            return _bytes;
        }
@@ -425,7 +423,7 @@ public:
        std::string _msg;
        uint64_t _pos;
    public:
-        segment_truncation(uint64_t);
+        segment_truncation(std::string_view reason, uint64_t position);

        uint64_t position() const;
        const char* what() const noexcept override;
--- a/db/config.cc
+++ b/db/config.cc
@@ -86,6 +86,12 @@ object_storage_endpoints_to_json(const std::vector<db::object_storage_endpoint_p
    return value_to_json(m);
 }

+static
+json::json_return_type
+uuid_to_json(const db::config::UUID& uuid) {
+    return value_to_json(format("{}", uuid));
+}
+
 // Convert a value that can be printed with fmt::format, or a vector of
 // such values, to JSON. An example is enum_option<T>, because enum_option<T>
 // has a specialization for fmt::formatter.
@@ -294,6 +300,12 @@ const config_type& config_type_for<std::vector<db::object_storage_endpoint_param
    return ct;
 }

+template <>
+const config_type& config_type_for<db::config::UUID>() {
+    static config_type ct("UUID", uuid_to_json);
+    return ct;
+}
+
 }

 namespace YAML {
@@ -491,6 +503,22 @@ struct convert<db::object_storage_endpoint_param> {
    }
 };

+template<>
+struct convert<utils::UUID> {
+    static bool decode(const Node& node, utils::UUID& uuid) {
+        std::string uuid_string;
+        if (!convert<std::string>::decode(node, uuid_string)) {
+            return false;
+        }
+        try {
+            std::istringstream(uuid_string) >> uuid;
+        } catch (boost::program_options::invalid_option_value&) {
+            return false;
+        }
+        return true;
+    }
+};
+
 }

 #if defined(DEBUG)
@@ -819,7 +847,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , inter_dc_stream_throughput_outbound_megabits_per_sec(this, "inter_dc_stream_throughput_outbound_megabits_per_sec", value_status::Unused, 0,
        "Throttles all streaming file transfer between the data centers. This setting allows throttles streaming throughput betweens data centers in addition to throttling all network stream traffic as configured with stream_throughput_outbound_megabits_per_sec.")
    , stream_io_throughput_mb_per_sec(this, "stream_io_throughput_mb_per_sec", liveness::LiveUpdate, value_status::Used, 0,
-        "Throttles streaming I/O to the specified total throughput (in MiBs/s) across the entire system. Streaming I/O includes the one performed by repair and both RBNO and legacy topology operations such as adding or removing a node. Setting the value to 0 disables stream throttling.")
+        "Throttles streaming I/O to the specified total throughput (in MiBs/s) across the entire system. Streaming I/O includes the one performed by repair and both RBNO and legacy topology operations such as adding or removing a node. Setting the value to 0 disables stream throttling. It is recommended to set the value for this parameter to be 75% of network bandwidth")
    , stream_plan_ranges_fraction(this, "stream_plan_ranges_fraction", liveness::LiveUpdate, value_status::Used, 0.1,
        "Specify the fraction of ranges to stream in a single stream plan. Value is between 0 and 1.")
    , enable_file_stream(this, "enable_file_stream", liveness::LiveUpdate, value_status::Used, true, "Set true to use file based stream for tablet instead of mutation based stream")
@@ -942,6 +970,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "The default timeout for other, miscellaneous operations.\n"
        "\n"
        "Related information: About hinted handoff writes")
+    , request_timeout_on_shutdown_in_seconds(this, "request_timeout_on_shutdown_in_seconds", value_status::Used, 30,
+        "Timeout for CQL server requests on shutdown. After this timeout the server will shutdown all connections.")
    , group0_raft_op_timeout_in_ms(this, "group0_raft_op_timeout_in_ms", liveness::LiveUpdate, value_status::Used, 60000,
            "The time in milliseconds that group0 allows a Raft operation to complete.")
    /**
@@ -1230,7 +1260,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , sstable_summary_ratio(this, "sstable_summary_ratio", value_status::Used, 0.0005, "Enforces that 1 byte of summary is written for every N (2000 by default)"
        "bytes written to data file. Value must be between 0 and 1.")
    , components_memory_reclaim_threshold(this, "components_memory_reclaim_threshold", liveness::LiveUpdate, value_status::Used, .2, "Ratio of available memory for all in-memory components of SSTables in a shard beyond which the memory will be reclaimed from components until it falls back under the threshold. Currently, this limit is only enforced for bloom filters.")
-    , large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, (size_t(128) << 10) + 1, "Warn about memory allocations above this size; set to zero to disable.")
+    , large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, size_t(1) << 20, "Warn about memory allocations above this size; set to zero to disable.")
    , enable_deprecated_partitioners(this, "enable_deprecated_partitioners", value_status::Used, false, "Enable the byteordered and random partitioners. These partitioners are deprecated and will be removed in a future version.")
    , enable_keyspace_column_family_metrics(this, "enable_keyspace_column_family_metrics", value_status::Used, false, "Enable per keyspace and per column family metrics reporting.")
    , enable_node_aggregated_table_metrics(this, "enable_node_aggregated_table_metrics", value_status::Used, true, "Enable aggregated per node, per keyspace and per table metrics reporting, applicable if enable_keyspace_column_family_metrics is false.")
@@ -1243,6 +1273,13 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format.  Deprecated, please use \"sstable_format\" instead.")
    , enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format.  Deprecated, please use \"sstable_format\" instead.")
    , sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
+    , sstable_compression_dictionaries_allow_in_ddl(this, "sstable_compression_dictionaries_allow_in_ddl", liveness::LiveUpdate, value_status::Used, true,
+        "Allows for configuring tables to use SSTable compression with shared dictionaries. "
+        "If the option is disabled, Scylla will reject CREATE and ALTER statements which try to set dictionary-based sstable compressors.\n"
+        "This is only enforced when this node validates a new DDL statement; disabling the option won't disable dictionary-based compression "
+        "on tables which already have it configured, and won't do anything to existing sstables.\n"
+        "To affect existing tables, you can ALTER them to a non-dictionary compressor, or disable dictionary compression "
+        "for the whole node through `sstable_compression_dictionaries_enable_writing`.")
    , sstable_compression_dictionaries_enable_writing(this, "sstable_compression_dictionaries_enable_writing", liveness::LiveUpdate, value_status::Used, true,
        "Enables SSTable compression with shared dictionaries (for tables which opt in). If set to false, this node won't write any new SSTables using dictionary compression.\n"
        "Option meant not for regular usage, but for unforeseen problems that call for disabling dictionaries without modifying table schema.")
@@ -1392,7 +1429,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.")
    , consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL.")
    , force_gossip_topology_changes(this, "force_gossip_topology_changes", value_status::Used, false, "Force gossip-based topology operations in a fresh cluster. Only the first node in the cluster must use it. The rest will fall back to gossip-based operations anyway. This option should be used only for testing.  Note: gossip topology changes are incompatible with tablets.")
-    , recovery_leader(this, "recovery_leader", liveness::LiveUpdate, value_status::Used, "", "Host ID of the node restarted first while performing the Manual Raft-based Recovery Procedure. Warning: this option disables some guardrails for the needs of the Manual Raft-based Recovery Procedure. Make sure you unset it at the end of the procedure.")
+    , recovery_leader(this, "recovery_leader", liveness::LiveUpdate, value_status::Used, utils::null_uuid(), "Host ID of the node restarted first while performing the Manual Raft-based Recovery Procedure. Warning: this option disables some guardrails for the needs of the Manual Raft-based Recovery Procedure. Make sure you unset it at the end of the procedure.")
    , wasm_cache_memory_fraction(this, "wasm_cache_memory_fraction", value_status::Used, 0.01, "Maximum total size of all WASM instances stored in the cache as fraction of total shard memory.")
    , wasm_cache_timeout_in_ms(this, "wasm_cache_timeout_in_ms", value_status::Used, 5000, "Time after which an instance is evicted from the cache.")
    , wasm_cache_instance_size_limit(this, "wasm_cache_instance_size_limit", value_status::Used, 1024*1024, "Instances with size above this limit will not be stored in the cache.")
--- a/db/config.hh
+++ b/db/config.hh
@@ -207,6 +207,7 @@ public:
    using seed_provider_type = db::seed_provider_type;
    using hinted_handoff_enabled_type = db::hints::host_filter;
    using error_injection_at_startup = db::error_injection_at_startup;
+    using UUID = utils::UUID;

    /*
     * All values and documentation taken from
@@ -322,6 +323,7 @@ public:
    named_value<uint32_t> truncate_request_timeout_in_ms;
    named_value<uint32_t> write_request_timeout_in_ms;
    named_value<uint32_t> request_timeout_in_ms;
+    named_value<uint32_t> request_timeout_on_shutdown_in_seconds;
    named_value<uint32_t> group0_raft_op_timeout_in_ms;
    named_value<bool> cross_node_timeout;
    named_value<uint32_t> internode_send_buff_size_in_bytes;
@@ -436,6 +438,7 @@ public:
    named_value<bool> enable_sstables_mc_format;
    named_value<bool> enable_sstables_md_format;
    named_value<sstring> sstable_format;
+    named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
    named_value<bool> sstable_compression_dictionaries_enable_writing;
    named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
    named_value<float> sstable_compression_dictionaries_retrain_period_in_seconds;
@@ -520,7 +523,7 @@ public:

    named_value<bool> consistent_cluster_management;
    named_value<bool> force_gossip_topology_changes;
-    named_value<sstring> recovery_leader;
+    named_value<UUID> recovery_leader;

    named_value<double> wasm_cache_memory_fraction;
    named_value<uint32_t> wasm_cache_timeout_in_ms;
--- a/db/corrupt_data_handler.cc
+++ b/db/corrupt_data_handler.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "db/corrupt_data_handler.hh"
+#include "reader_concurrency_semaphore.hh"
+#include "replica/database.hh"
+#include "utils/UUID_gen.hh"
+
+static logging::logger corrupt_data_logger("corrupt_data");
+
+namespace sm = seastar::metrics;
+
+namespace db {
+
+corrupt_data_handler::corrupt_data_handler(register_metrics rm) {
+    if (rm) {
+        _metrics.add_group("corrupt_data", {
+                sm::make_counter("entries_reported", _stats.corrupt_data_reported,
+                               sm::description("Counts the number of corrupt data instances reported to the corrupt data handler. "
+                                               "A non-zero value indicates that the database suffered data corruption."))
+                });
+    }
+}
+
+future<corrupt_data_handler::entry_id> corrupt_data_handler::record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    ++_stats.corrupt_data_reported;
+    ++_stats.corrupt_clustering_rows_reported;
+    return do_record_corrupt_clustering_row(s, pk, std::move(cr), std::move(origin), std::move(sstable_name)).then([this] (entry_id id) {
+        if (id) {
+            ++_stats.corrupt_data_recorded;
+            ++_stats.corrupt_clustering_rows_recorded;
+        }
+        return id;
+    });
+}
+
+system_table_corrupt_data_handler::system_table_corrupt_data_handler(config cfg, register_metrics rm)
+    : corrupt_data_handler(rm)
+    , _entry_ttl(cfg.entry_ttl)
+    , _sys_ks("system_table_corrupt_data_handler::system_keyspace")
+{
+}
+
+system_table_corrupt_data_handler::~system_table_corrupt_data_handler() {
+}
+
+reader_permit system_table_corrupt_data_handler::make_fragment_permit(const schema& s) {
+    return _fragment_semaphore->make_tracking_only_permit(s.shared_from_this(), "system_table_corrupt_data_handler::make_fragment_permit", db::no_timeout, {});
+}
+
+future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_mutation_fragment(
+        pluggable_system_keyspace::permit sys_ks,
+        const schema& user_table_schema,
+        const partition_key& pk,
+        const clustering_key& ck,
+        mutation_fragment_v2::kind kind,
+        frozen_mutation_fragment_v2 fmf,
+        sstring origin,
+        std::optional<sstring> sstable_name) {
+    const corrupt_data_handler::entry_id id{utils::UUID_gen::get_time_UUID()};
+
+    const auto corrupt_data_schema = sys_ks->local_db().find_column_family(system_keyspace::NAME, system_keyspace::CORRUPT_DATA).schema();
+
+    // Using the lower-level mutation API to avoid large allocation warnings when linearizing the frozen mutation fragment.
+    mutation entry_mutation(corrupt_data_schema, partition_key::from_exploded(*corrupt_data_schema, {serialized(user_table_schema.ks_name()), serialized(user_table_schema.cf_name())}));
+    auto& entry_row = entry_mutation.partition().clustered_row(*corrupt_data_schema, clustering_key::from_single_value(*corrupt_data_schema, serialized(timeuuid_native_type{id.uuid()})));
+
+    const auto timestamp = api::new_timestamp();
+
+    auto set_cell_raw = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, managed_bytes cell_value) {
+        auto cdef = corrupt_data_schema->get_column_definition(cell_name);
+        SCYLLA_ASSERT(cdef);
+
+        entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value, _entry_ttl));
+    }; 
+
+    auto set_cell = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, data_value cell_value) {
+        auto cdef = corrupt_data_schema->get_column_definition(cell_name);
+        SCYLLA_ASSERT(cdef);
+
+        entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value.serialize_nonnull(), _entry_ttl));
+    };
+
+    entry_row.apply(row_marker(timestamp, _entry_ttl, gc_clock::now() + _entry_ttl));
+    set_cell("partition_key", data_value(to_bytes(pk.representation())));
+    set_cell("clustering_key", data_value(to_bytes(ck.representation())));
+    set_cell("mutation_fragment_kind", fmt::to_string(kind));
+    // FIXME: Exposing knowledge here that bytes are serialized by just storing the raw value.
+    // Need to replace with a fragmented-buffer serialize API call, which we don't have yet.
+    set_cell_raw("frozen_mutation_fragment", std::move(fmf).representation().to_managed_bytes());
+    set_cell("origin", origin);
+    set_cell("sstable_name", sstable_name);
+
+    return sys_ks->apply_mutation(std::move(entry_mutation)).then([id] {
+        return id;
+    });
+}
+
+future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    auto sys_ks = _sys_ks.get_permit();
+    if (!sys_ks) {
+        co_return corrupt_data_handler::entry_id::create_null_id();
+    }
+
+    const auto ck = cr.key();
+    auto fmf = freeze(s, mutation_fragment_v2(s, make_fragment_permit(s), std::move(cr)));
+
+    co_return co_await do_record_corrupt_mutation_fragment(std::move(sys_ks), s, pk, ck, mutation_fragment_v2::kind::clustering_row, std::move(fmf),
+            std::move(origin), std::move(sstable_name));
+}
+
+void system_table_corrupt_data_handler::plug_system_keyspace(db::system_keyspace& sys_ks) noexcept {
+    _sys_ks.plug(sys_ks.shared_from_this());
+    _fragment_semaphore = std::make_unique<reader_concurrency_semaphore>(reader_concurrency_semaphore::no_limits{}, "system_table_corrupt_data_handler", reader_concurrency_semaphore::register_metrics::no);
+}
+
+future<> system_table_corrupt_data_handler::unplug_system_keyspace() noexcept {
+    co_await _sys_ks.unplug();
+    co_await _fragment_semaphore->stop();
+}
+
+future<corrupt_data_handler::entry_id> nop_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    return make_ready_future<entry_id>(entry_id::create_null_id());
+}
+
+} // namespace db
--- a/db/corrupt_data_handler.hh
+++ b/db/corrupt_data_handler.hh
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "db/system_keyspace.hh"
+#include "utils/UUID.hh"
+#include "utils/pluggable.hh"
+
+class reader_concurrency_semaphore;
+class reader_permit;
+
+namespace db {
+
+class corrupt_data_handler {
+public:
+    // An ID identifying the corrupt data entry.
+    // To be interpreted in the context of the storage where it is recorded, see storage_name().
+    using entry_id = utils::tagged_uuid<struct corrupt_data_entry_tag>;
+
+    struct stats {
+        // Counters for the number of corrupt data entries reported.
+        uint64_t corrupt_data_reported = 0;
+        // Counters for the number of corrupt data entries recorded.
+        // Can be less than reported depending on the configuration or if entries failed to be recorded.
+        uint64_t corrupt_data_recorded = 0;
+
+        uint64_t corrupt_clustering_rows_reported = 0;
+        uint64_t corrupt_clustering_rows_recorded = 0;
+    };
+
+private:
+    stats _stats;
+
+    seastar::metrics::metric_groups _metrics;
+
+protected:
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) = 0;
+
+public:
+    using register_metrics = bool_class<struct corrupt_data_handler_register_metrics_tag>;
+    explicit corrupt_data_handler(register_metrics);
+    virtual ~corrupt_data_handler() = default;
+
+    const stats& get_stats() const noexcept {
+        return _stats;
+    }
+
+    // The name of the storage where corrupt data is recorded.
+    // The storage-name and the entry-id together should allow the user to unambiguously locate the entry.
+    virtual sstring storage_name() const noexcept = 0;
+
+    // Record a corrupt clustering row.
+    // If the returned id is null, the row was not recorded.
+    future<entry_id> record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name);
+};
+
+// Stores corrupt data entries in the system.corrupt_data table.
+class system_table_corrupt_data_handler final : public corrupt_data_handler {
+public:
+    using pluggable_system_keyspace = utils::pluggable<db::system_keyspace>;
+
+    struct config {
+        gc_clock::duration entry_ttl;
+    };
+
+private:
+    gc_clock::duration _entry_ttl;
+
+    pluggable_system_keyspace _sys_ks;
+    std::unique_ptr<reader_concurrency_semaphore> _fragment_semaphore;
+
+private:
+    reader_permit make_fragment_permit(const schema& s);
+
+    future<entry_id> do_record_corrupt_mutation_fragment(pluggable_system_keyspace::permit sys_ks, const schema& user_table_schema, const partition_key& pk, const clustering_key& ck,
+            mutation_fragment_v2::kind kind, frozen_mutation_fragment_v2 mf, sstring origin, std::optional<sstring> sstable_name);
+
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
+
+public:
+    explicit system_table_corrupt_data_handler(config, register_metrics);
+    ~system_table_corrupt_data_handler();
+
+    virtual sstring storage_name() const noexcept override {
+        return format("{}.{}", db::system_keyspace::NAME, db::system_keyspace::CORRUPT_DATA);
+    }
+
+    void plug_system_keyspace(db::system_keyspace& sys_ks) noexcept;
+    future<> unplug_system_keyspace() noexcept;
+};
+
+// A no-op corrupt data handler that does not record any data.
+class nop_corrupt_data_handler final : public corrupt_data_handler {
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
+
+public:
+    explicit nop_corrupt_data_handler(register_metrics rm)
+        : corrupt_data_handler(rm) {}
+    virtual sstring storage_name() const noexcept override {
+        return "/dev/null";
+    }
+};
+
+} // namespace db
--- a/db/hints/internal/hint_endpoint_manager.cc
+++ b/db/hints/internal/hint_endpoint_manager.cc
@@ -65,18 +65,18 @@ future<> hint_endpoint_manager::do_store_hint(schema_ptr s, lw_shared_ptr<const
        const replay_position rp = rh.release();
        if (_last_written_rp < rp) {
            _last_written_rp = rp;
-            manager_logger.debug("[{}] Updated last written replay position to {}", end_point_key(), rp);
+            manager_logger.trace("hint_endpoint_manager[{}]:do_store_hint: Updated last written replay position to {}", end_point_key(), rp);
        }

        ++shard_stats().written;

-        manager_logger.trace("Hint to {} was stored", end_point_key());
+        manager_logger.trace("hint_endpoint_manager[{}]:do_store_hint: Hint has been stored", end_point_key());
        tracing::trace(tr_state, "Hint to {} was stored", end_point_key());
    } catch (...) {
        ++shard_stats().errors;
        const auto eptr = std::current_exception();

-        manager_logger.debug("store_hint(): got the exception when storing a hint to {}: {}", end_point_key(), eptr);
+        manager_logger.debug("hint_endpoint_manager[{}]:do_store_hint: Exception when storing a hint: {}", end_point_key(), eptr);
        tracing::trace(tr_state, "Failed to store a hint to {}: {}", end_point_key(), eptr);
    }

@@ -92,7 +92,7 @@ bool hint_endpoint_manager::store_hint(schema_ptr s, lw_shared_ptr<const frozen_
            return do_store_hint(std::move(s), std::move(fm), tr_state);
        });
    } catch (...) {
-        manager_logger.trace("Failed to store a hint to {}: {}", end_point_key(), std::current_exception());
+        manager_logger.trace("hint_endpoint_manager[{}]:store_hint: Failed to store a hint: {}", end_point_key(), std::current_exception());
        tracing::trace(tr_state, "Failed to store a hint to {}: {}", end_point_key(), std::current_exception());

        ++shard_stats().dropped;
@@ -109,16 +109,23 @@ future<> hint_endpoint_manager::populate_segments_to_replay() {
 }

 void hint_endpoint_manager::start() {
+    manager_logger.debug("hint_endpoint_manager[{}]:start: Starting", end_point_key());
+
    clear_stopped();
    allow_hints();
    _sender.start();
+
+    manager_logger.debug("hint_endpoint_manager[{}]:start: Finished", end_point_key());
 }

 future<> hint_endpoint_manager::stop(drain should_drain) noexcept {
-    if(stopped()) {
+    if (stopped()) {
+        manager_logger.warn("hint_endpoint_manager[{}]:stop: Stop had already been called", end_point_key());
        return make_exception_future<>(std::logic_error(format("ep_manager[{}]: stop() is called twice", _key).c_str()));
    }

+    manager_logger.debug("hint_endpoint_manager[{}]:stop: Starting", end_point_key());
+
    return seastar::async([this, should_drain] {
        std::exception_ptr eptr;

@@ -139,10 +146,11 @@ future<> hint_endpoint_manager::stop(drain should_drain) noexcept {
        }).handle_exception([&eptr] (auto e) { eptr = std::move(e); }).get();

        if (eptr) {
-            manager_logger.error("ep_manager[{}]: exception: {}", _key, eptr);
+            manager_logger.error("hint_endpoint_manager[{}]:stop: Exception occurred: {}", _key, eptr);
        }

        set_stopped();
+        manager_logger.debug("hint_endpoint_manager[{}]:stop: Finished", end_point_key());
    });
 }

@@ -194,7 +202,7 @@ future<hints_store_ptr> hint_endpoint_manager::get_or_load() {
 }

 future<db::commitlog> hint_endpoint_manager::add_store() noexcept {
-    manager_logger.trace("Going to add a store to {}", _hints_dir.c_str());
+    manager_logger.debug("hint_endpoint_manager[{}]:add_store: Going to add a store: {}", end_point_key(), _hints_dir.native());

    return futurize_invoke([this] {
        return io_check([name = _hints_dir.c_str()] { return recursive_touch_directory(name); }).then([this] () {
@@ -289,6 +297,8 @@ future<db::commitlog> hint_endpoint_manager::add_store() noexcept {
                    _sender.add_segment(std::move(seg));
                }

+                manager_logger.debug("hint_endpoint_manager[{}]:add_store: Finished", end_point_key());
+
                co_return l;
            });
        });
--- a/db/hints/internal/hint_sender.cc
+++ b/db/hints/internal/hint_sender.cc
@@ -56,8 +56,8 @@ future<> hint_sender::flush_maybe() noexcept {
    if (current_time >= _next_flush_tp) {
        return _ep_manager.flush_current_hints().then([this, current_time] {
            _next_flush_tp = current_time + manager::hints_flush_period;
-        }).handle_exception([] (auto eptr) {
-            manager_logger.trace("flush_maybe() failed: {}", eptr);
+        }).handle_exception([this] (auto eptr) {
+            manager_logger.debug("hint_sender[{}]:flush_maybe: Failed with {}", _ep_key, eptr);
            return make_ready_future<>();
        });
    }
@@ -115,7 +115,7 @@ const column_mapping& hint_sender::get_column_mapping(lw_shared_ptr<send_one_fil
            throw no_column_mapping(fm.schema_version());
        }

-        manager_logger.debug("new schema version {}", fm.schema_version());
+        manager_logger.trace("hint_sender[{}]:get_column_mapping: new schema version {}", _ep_key, fm.schema_version());
        cm_it = ctx_ptr->schema_ver_to_column_mapping.emplace(fm.schema_version(), *hr.get_column_mapping()).first;
    }

@@ -175,23 +175,22 @@ future<> hint_sender::stop(drain should_drain) noexcept {
            //
            // The next call for send_hints_maybe() will send the last hints to the current end point and when it is
            // done there is going to be no more pending hints and the corresponding hints directory may be removed.
-            manager_logger.trace("Draining for {}: start", end_point_key());
+            manager_logger.trace("hint_sender[{}]:stop: Draining starts", end_point_key());
            set_draining();
            send_hints_maybe();
-            _ep_manager.flush_current_hints().handle_exception([] (auto e) {
-                manager_logger.error("Failed to flush pending hints: {}. Ignoring...", e);
+            _ep_manager.flush_current_hints().handle_exception([this] (auto e) {
+                manager_logger.error("hint_sender[{}]:stop: Failed to flush pending hints: {}. Ignoring", _ep_key, e);
            }).get();
            send_hints_maybe();
-            manager_logger.trace("Draining for {}: end", end_point_key());
+            manager_logger.trace("hint_sender[{}]:stop: Draining finished", end_point_key());
        }
-        // TODO: Change this log to match the class name, but first make sure no test
-        //       relies on the old one.
-        manager_logger.trace("ep_manager({})::sender: exiting", end_point_key());
+
+        manager_logger.debug("hint_sender[{}]:stop: Finished", end_point_key());
    });
 }

 void hint_sender::cancel_draining() {
-    manager_logger.info("Draining of {} has been marked as canceled", _ep_key);
+    manager_logger.info("hint_sender[{}]:cancel_draining: Marking as canceled", _ep_key);
    if (_state.contains(state::draining)) {
        _state.remove(state::draining);
    }
@@ -222,9 +221,8 @@ void hint_sender::start() {

    attr.sched_group = _hints_cpu_sched_group;
    _stopped = seastar::async(std::move(attr), [this] {
-        // TODO: Change this log to match the class name, but first make sure no test
-        //       relies on the old one.
-        manager_logger.trace("ep_manager({})::sender: started", end_point_key());
+        manager_logger.debug("hint_sender[{}]:start: Starting", end_point_key());
+
        while (!stopping()) {
            try {
                flush_maybe().get();
@@ -237,11 +235,11 @@ void hint_sender::start() {
                break;
            } catch (...) {
                // log and keep on spinning
-                // TODO: Change this log to match the class name, but first make sure no test
-                //       relies on the old one.
-                manager_logger.trace("sender: got the exception: {}", std::current_exception());
+                manager_logger.debug("hint_sender[{}]:start: Exception in the loop: {}", _ep_key, std::current_exception());
            }
        }
+
+        manager_logger.debug("hint_sender[{}]:start: Exited the loop", _ep_key);
    });
 }

@@ -257,14 +255,14 @@ future<> hint_sender::send_one_mutation(frozen_mutation_and_schema m) {
        const auto dst = end_point_key();

        if (std::ranges::contains(natural_endpoints, dst) && !tm.is_leaving(dst)) {
-            manager_logger.trace("Sending directly to {}", dst);
+            manager_logger.trace("hint_sender[{}]:send_one_mutation: Sending directly", dst);
            return _proxy.send_hint_to_endpoint(std::move(m), std::move(ermp), dst);
        } else {
            if (manager_logger.is_enabled(log_level::trace)) {
                if (tm.is_leaving(end_point_key())) {
-                    manager_logger.trace("The original target endpoint {} is leaving. Mutating from scratch...", dst);
+                    manager_logger.trace("hint_sender[{}]:send_one_mutation: Original target is leaving. Mutating from scratch", dst);
                } else {
-                    manager_logger.trace("Endpoints set has changed and {} is no longer a replica. Mutating from scratch...", dst);
+                    manager_logger.trace("hint_sender[{}]:send_one_mutation: Endpoint set has changed and original target is no longer a replica. Mutating from scratch", dst);
                }
            }
            return _proxy.send_hint_to_all_replicas(std::move(m));
@@ -288,9 +286,9 @@ future<> hint_sender::send_one_hint(lw_shared_ptr<send_one_file_ctx> ctx_ptr, fr
                // Files are aggregated for at most manager::hints_timer_period therefore the oldest hint there is
                // (last_modification - manager::hints_timer_period) old.
                if (const auto now = gc_clock::now().time_since_epoch(); now - secs_since_file_mod > gc_grace_sec - manager::hints_flush_period) {
-                    manager_logger.debug("send_hints(): the hint is too old, skipping it, "
+                    manager_logger.trace("hint_sender[{}]:send_hints: Hint is too old, skipping it, "
                        "secs since file last modification {}, gc_grace_sec {}, hints_flush_period {}",
-                        now - secs_since_file_mod, gc_grace_sec, manager::hints_flush_period);
+                        _ep_key, now - secs_since_file_mod, gc_grace_sec, manager::hints_flush_period);
                    return make_ready_future<>();
                }

@@ -299,24 +297,24 @@ future<> hint_sender::send_one_hint(lw_shared_ptr<send_one_file_ctx> ctx_ptr, fr
                    ++this->shard_stats().sent_total;
                    this->shard_stats().sent_hints_bytes_total += mutation_size;
                }).handle_exception([this, ctx_ptr] (auto eptr) {
-                    manager_logger.trace("send_one_hint(): failed to send to {}: {}", end_point_key(), eptr);
+                    manager_logger.trace("hint_sender[{}]:send_one_hint: Failed to send: {}", end_point_key(), eptr);
                    ++this->shard_stats().send_errors;
                    return make_exception_future<>(std::move(eptr));
                });

            // ignore these errors and move on - probably this hint is too old and the KS/CF has been deleted...
            } catch (replica::no_such_column_family& e) {
-                manager_logger.debug("send_hints(): no_such_column_family: {}", e.what());
+                manager_logger.debug("hint_sender[{}]:send_one_hint: no_such_column_family: {}", _ep_key, e.what());
                ++this->shard_stats().discarded;
            } catch (replica::no_such_keyspace& e) {
-                manager_logger.debug("send_hints(): no_such_keyspace: {}", e.what());
+                manager_logger.debug("hint_sender[{}]:send_one_hint: no_such_keyspace: {}", _ep_key, e.what());
                ++this->shard_stats().discarded;
            } catch (no_column_mapping& e) {
-                manager_logger.debug("send_hints(): {} at {}: {}", fname, rp, e.what());
+                manager_logger.debug("hint_sender[{}]:send_one_hint: no_column_mapping: {} at {}: {}", _ep_key, fname, rp, e.what());
                ++this->shard_stats().discarded;
            } catch (...) {
                auto eptr = std::current_exception();
-                manager_logger.debug("send_hints(): unexpected error in file {} at {}: {}", fname, rp, eptr);
+                manager_logger.debug("hint_sender[{}]:send_one_hint: Unexpected error in file {} at {}: {}", _ep_key, fname, rp, eptr);
                ++this->shard_stats().send_errors;
                return make_exception_future<>(std::move(eptr));
            }
@@ -338,21 +336,24 @@ future<> hint_sender::send_one_hint(lw_shared_ptr<send_one_file_ctx> ctx_ptr, fr
            }
            f.ignore_ready_future();
        });
-    }).handle_exception([ctx_ptr, rp] (auto eptr) {
-        manager_logger.trace("send_one_file(): Hmmm. Something bad had happened: {}", eptr);
+    }).handle_exception([this, ctx_ptr, rp] (auto eptr) {
+        manager_logger.trace("hint_sender[{}]:send_one_hint: Exception occurred: {}", _ep_key, eptr);
        ctx_ptr->on_hint_send_failure(rp);
    });
 }

 void hint_sender::notify_replay_waiters() noexcept {
    if (!_foreign_segments_to_replay.empty()) {
-        manager_logger.trace("[{}] notify_replay_waiters(): not notifying because there are still {} foreign segments to replay", end_point_key(), _foreign_segments_to_replay.size());
+        manager_logger.trace("hint_sender[{}]:notify_replay_waiters: Not notifying because there are still {} foreign segments to replay",
+                end_point_key(), _foreign_segments_to_replay.size());
        return;
    }

-    manager_logger.trace("[{}] notify_replay_waiters(): replay position upper bound was updated to {}", end_point_key(), _sent_upper_bound_rp);
+    manager_logger.trace("hint_sender[{}]:notify_replay_waiters: Replay position upper bound was updated to {}", end_point_key(), _sent_upper_bound_rp);
    while (!_replay_waiters.empty() && _replay_waiters.begin()->first < _sent_upper_bound_rp) {
-        manager_logger.trace("[{}] notify_replay_waiters(): notifying one ({} < {})", end_point_key(), _replay_waiters.begin()->first, _sent_upper_bound_rp);
+        manager_logger.trace("hint_sender[{}]:notify_replay_waiters: Notifying one ({} < {})",
+                end_point_key(), _replay_waiters.begin()->first, _sent_upper_bound_rp);
+
        auto ptr = _replay_waiters.begin()->second;
        (**ptr).set_value();
        (*ptr) = std::nullopt; // Prevent it from being resolved by abort source subscription
@@ -362,7 +363,7 @@ void hint_sender::notify_replay_waiters() noexcept {

 void hint_sender::dismiss_replay_waiters() noexcept {
    for (auto& p : _replay_waiters) {
-        manager_logger.debug("[{}] dismiss_replay_waiters(): dismissing one", end_point_key());
+        manager_logger.debug("hint_sender[{}]:dismiss_replay_waiters: Dismissing one", end_point_key());
        auto ptr = p.second;
        (**ptr).set_exception(std::runtime_error(format("Hints manager for {} is stopping", end_point_key())));
        (*ptr) = std::nullopt; // Prevent it from being resolved by abort source subscription
@@ -371,14 +372,15 @@ void hint_sender::dismiss_replay_waiters() noexcept {
 }

 future<> hint_sender::wait_until_hints_are_replayed_up_to(abort_source& as, db::replay_position up_to_rp) {
-    manager_logger.debug("[{}] wait_until_hints_are_replayed_up_to(): entering with target {}", end_point_key(), up_to_rp);
+    manager_logger.debug("hint_sender[{}]:wait_until_hints_are_replayed_up_to: Entering with target {}", end_point_key(), up_to_rp);
    if (_foreign_segments_to_replay.empty() && up_to_rp < _sent_upper_bound_rp) {
-        manager_logger.debug("[{}] wait_until_hints_are_replayed_up_to(): hints were already replayed above the point ({} < {})", end_point_key(), up_to_rp, _sent_upper_bound_rp);
+        manager_logger.debug("hint_sender[{}]:wait_until_hints_are_replayed_up_to: Hints were already replayed above the point ({} < {})",
+                end_point_key(), up_to_rp, _sent_upper_bound_rp);
        return make_ready_future<>();
    }

    if (as.abort_requested()) {
-        manager_logger.debug("[{}] wait_until_hints_are_replayed_up_to(): already aborted - stopping", end_point_key());
+        manager_logger.debug("hint_sender[{}]:wait_until_hints_are_replayed_up_to: Already aborted - stopping", end_point_key());
        return make_exception_future<>(abort_requested_exception());
    }

@@ -389,7 +391,7 @@ future<> hint_sender::wait_until_hints_are_replayed_up_to(abort_source& as, db::
            // The promise already was resolved by `notify_replay_waiters` and removed from the map
            return;
        }
-        manager_logger.debug("[{}] wait_until_hints_are_replayed_up_to(): abort requested - stopping", end_point_key());
+        manager_logger.debug("hint_sender[{}]:wait_until_hints_are_replayed_up_to: Abort requested - stopping", end_point_key());
        _replay_waiters.erase(it);
        (**ptr).set_exception(abort_requested_exception());
    });
@@ -398,7 +400,7 @@ future<> hint_sender::wait_until_hints_are_replayed_up_to(abort_source& as, db::
    // therefore we cannot capture `this`
    auto ep = end_point_key();
    return (**ptr).get_future().finally([sub = std::move(sub), ep] {
-        manager_logger.debug("[{}] wait_until_hints_are_replayed_up_to(): returning after the future was satisfied", ep);
+        manager_logger.debug("hint_sender[{}]:wait_until_hints_are_replayed_up_to: Returning after the future was satisfied", ep);
    });
 }

@@ -470,7 +472,7 @@ bool hint_sender::send_one_file(const sstring& fname) {
                }

                if (canceled_draining()) {
-                    manager_logger.debug("[{}] Exiting reading from commitlog because of canceled draining", _ep_key);
+                    manager_logger.debug("hint_sender[{}]:send_one_file: Exiting reading from commitlog because of canceled draining", _ep_key);
                    // We need to throw an exception here to cancel reading the segment.
                    throw canceled_draining_exception{};
                }
@@ -502,13 +504,15 @@ bool hint_sender::send_one_file(const sstring& fname) {
            };
        }, _last_not_complete_rp.pos, &_db.extensions()).get();
    } catch (db::commitlog::segment_error& ex) {
-        manager_logger.error("{}: {}. Dropping...", fname, ex.what());
+        manager_logger.error("hint_sender[{}]:send_one_file: Segment error in {}: {}. Last not complete position={}",
+                _ep_key, fname, ex.what(), _last_not_complete_rp);
        ctx_ptr->segment_replay_failed = false;
        ++this->shard_stats().corrupted_files;
    } catch  (const canceled_draining_exception&) {
-        manager_logger.debug("[{}] Loop in send_one_file finishes due to canceled draining", _ep_key);
+        manager_logger.debug("hint_sender[{}]:send_one_file: Loop in send_one_file finishes due to canceled draining", _ep_key);
    } catch (...) {
-        manager_logger.trace("sending of {} failed: {}", fname, std::current_exception());
+        manager_logger.debug("hint_sender[{}]:send_one_file: Sending of {} failed: {}. Last not complete position={}",
+                _ep_key, fname, std::current_exception(), _last_not_complete_rp);
        ctx_ptr->segment_replay_failed = true;
    }

@@ -523,7 +527,7 @@ bool hint_sender::send_one_file(const sstring& fname) {

    // If we are draining ignore failures and drop the segment even if we failed to send it.
    if (draining() && ctx_ptr->segment_replay_failed) {
-        manager_logger.trace("send_one_file(): we are draining so we are going to delete the segment anyway");
+        manager_logger.debug("hint_sender[{}]:send_one_file: We are draining, so we are going to delete the segment anyway", _ep_key);
        ctx_ptr->segment_replay_failed = false;
    }

@@ -533,7 +537,7 @@ bool hint_sender::send_one_file(const sstring& fname) {
        // If there was an error thrown by read_log_file function itself, we will retry sending from
        // the last hint that was successfully sent (last_succeeded_rp).
        _last_not_complete_rp = ctx_ptr->first_failed_rp.value_or(ctx_ptr->last_succeeded_rp.value_or(_last_not_complete_rp));
-        manager_logger.trace("send_one_file(): error while sending hints from {}, last RP is {}", fname, _last_not_complete_rp);
+        manager_logger.debug("hint_sender[{}]:send_one_file: Error while sending hints from {}, last RP is {}", _ep_key, fname, _last_not_complete_rp);
        return false;
    }

@@ -546,7 +550,7 @@ bool hint_sender::send_one_file(const sstring& fname) {
    // clear the replay position - we are going to send the next segment...
    _last_not_complete_rp = replay_position();
    _last_schema_ver_to_column_mapping.clear();
-    manager_logger.trace("send_one_file(): segment {} was sent in full and deleted", fname);
+    manager_logger.debug("hint_sender[{}]:send_one_file: Segment {} has been sent in full and deleted", _ep_key, fname);
    return true;
 }

@@ -572,14 +576,15 @@ void hint_sender::pop_current_segment() {
 // Runs in the seastar::async context
 void hint_sender::send_hints_maybe() noexcept {
    using namespace std::literals::chrono_literals;
-    manager_logger.trace("send_hints(): going to send hints to {}, we have {} segment to replay", end_point_key(), _segments_to_replay.size() + _foreign_segments_to_replay.size());
+    manager_logger.trace("hint_sender[{}]:send_hints_maybe: Going to send hints. We have {} segment to replay",
+            end_point_key(), _segments_to_replay.size() + _foreign_segments_to_replay.size());

    int replayed_segments_count = 0;

    try {
        while (true) {
            if (canceled_draining()) {
-                manager_logger.debug("[{}] Exiting loop in send_hints_maybe because of canceled draining", _ep_key);
+                manager_logger.debug("hint_sender[{}]:send_hints_maybe: Exiting loop in send_hints_maybe because of canceled draining", _ep_key);
                break;
            }
            const sstring* seg_name = name_of_current_segment();
@@ -598,7 +603,7 @@ void hint_sender::send_hints_maybe() noexcept {
    // Ignore exceptions, we will retry sending this file from where we left off the next time.
    // Exceptions are not expected here during the regular operation, so just log them.
    } catch (...) {
-        manager_logger.trace("send_hints(): got the exception: {}", std::current_exception());
+        manager_logger.debug("hint_sender[{}]:send_hints_maybe: Exception occurred while sending: {}", _ep_key, std::current_exception());
    }

    if (have_segments()) {
@@ -609,7 +614,7 @@ void hint_sender::send_hints_maybe() noexcept {
        _next_send_retry_tp = _next_flush_tp;
    }

-    manager_logger.trace("send_hints(): we handled {} segments", replayed_segments_count);
+    manager_logger.debug("hint_sender[{}]:send_hints_maybe: We handled {} segments", _ep_key, replayed_segments_count);
 }

 hint_stats& hint_sender::shard_stats() {
--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -505,20 +505,20 @@ bool manager::can_hint_for(endpoint_id ep) const noexcept {
    // hints where N is the total number nodes in the cluster.
    const auto hipf = hints_in_progress_for(ep);
    if (_stats.size_of_hints_in_progress > max_size_of_hints_in_progress() && hipf > 0) {
-        manager_logger.trace("size_of_hints_in_progress {} hints_in_progress_for({}) {}",
+        manager_logger.trace("can_hint_for: size_of_hints_in_progress {} hints_in_progress_for({}) {}",
                _stats.size_of_hints_in_progress, ep, hipf);
        return false;
    }

    // Check that the destination DC is "hintable".
    if (!check_dc_for(ep)) {
-        manager_logger.trace("{}'s DC is not hintable", ep);
+        manager_logger.trace("can_hint_for: {}'s DC is not hintable", ep);
        return false;
    }

    const bool node_is_alive = local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
    if (!node_is_alive) {
-        manager_logger.trace("{} has been down for too long, not hinting", ep);
+        manager_logger.trace("can_hint_for: {} has been down for too long, not hinting", ep);
        return false;
    }

--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -148,7 +148,7 @@ cql_table_large_data_handler::cql_table_large_data_handler(gms::feature_service&

 template <typename... Args>
 future<> cql_table_large_data_handler::try_record(std::string_view large_table, const sstables::sstable& sst,  const sstables::key& partition_key, int64_t size,
-        std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const {
+        std::string_view size_desc, std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const {
    auto sys_ks = _sys_ks.get_permit();
    if (!sys_ks) {
        co_return;
@@ -168,7 +168,7 @@ future<> cql_table_large_data_handler::try_record(std::string_view large_table,
    const auto sstable_name = large_data_handler::sst_filename(sst);
    std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
    auto timestamp = db_clock::now();
-    large_data_logger.warn("Writing large {} {}/{}: {} ({} bytes) to {}", desc, ks_name, cf_name, extra_path, size, sstable_name);
+    large_data_logger.warn("Writing large {} {}/{}: {} ({}) to {}", desc, ks_name, cf_name, extra_path, size_desc, sstable_name);
    co_await sys_ks->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
            .discard_result()
            .handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
@@ -184,12 +184,14 @@ future<> cql_table_large_data_handler::record_large_partitions(const sstables::s

 future<> cql_table_large_data_handler::internal_record_large_partitions(const sstables::sstable& sst, const sstables::key& key,
        uint64_t partition_size, uint64_t rows) const {
-    return try_record("partition", sst, key, int64_t(partition_size), "partition", "", {"rows"}, data_value((int64_t)rows));
+    const sstring size_desc = seastar::format("{} bytes/{} rows", partition_size, rows);
+    return try_record("partition", sst, key, int64_t(partition_size), size_desc, "partition", "", {"rows"}, data_value((int64_t)rows));
 }

 future<> cql_table_large_data_handler::internal_record_large_partitions_all_data(const sstables::sstable& sst, const sstables::key& key,
        uint64_t partition_size, uint64_t rows, uint64_t range_tombstones, uint64_t dead_rows) const {
-    return try_record("partition", sst, key, int64_t(partition_size), "partition", "", {"rows", "range_tombstones", "dead_rows"},
+    const sstring size_desc = seastar::format("{} bytes/{} rows", partition_size, rows);
+    return try_record("partition", sst, key, int64_t(partition_size), size_desc, "partition", "", {"rows", "range_tombstones", "dead_rows"},
                data_value((int64_t)rows), data_value((int64_t)range_tombstones), data_value((int64_t)dead_rows));
 }

@@ -203,13 +205,14 @@ future<> cql_table_large_data_handler::internal_record_large_cells(const sstable
    auto column_name = cdef.name_as_text();
    std::string_view cell_type = cdef.is_atomic() ? "cell" : "collection";
    static const std::vector<sstring> extra_fields{"clustering_key", "column_name"};
+    const sstring size_desc = seastar::format("{} bytes", cell_size);
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, cell_type, column_name, extra_fields, ck_str, column_name);
    } else {
        auto desc = seastar::format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -217,26 +220,28 @@ future<> cql_table_large_data_handler::internal_record_large_cells_and_collectio
        const clustering_key_prefix* clustering_key, const column_definition& cdef, uint64_t cell_size, uint64_t collection_elements) const {
    auto column_name = cdef.name_as_text();
    std::string_view cell_type = cdef.is_atomic() ? "cell" : "collection";
+    const sstring size_desc = seastar::format("{} bytes", cell_size);
    static const std::vector<sstring> extra_fields{"clustering_key", "column_name", "collection_elements"};
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
    } else {
        auto desc = seastar::format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
    }
 }

 future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable& sst, const sstables::key& partition_key,
        const clustering_key_prefix* clustering_key, uint64_t row_size) const {
    static const std::vector<sstring> extra_fields{"clustering_key"};
+    const sstring size_desc = seastar::format("{} bytes", row_size);
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        std::string ck_str = key_to_str(*clustering_key, s);
-        return try_record("row", sst, partition_key, int64_t(row_size), "row", "", extra_fields, ck_str);
+        return try_record("row", sst, partition_key, int64_t(row_size), size_desc, "row", "", extra_fields, ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
+        return try_record("row", sst, partition_key, int64_t(row_size), size_desc, "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -188,7 +188,7 @@ private:
 private:
    template <typename... Args>
    future<> try_record(std::string_view large_table, const sstables::sstable& sst,  const sstables::key& partition_key, int64_t size,
-            std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const;
+            std::string_view size_desc, std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const;
 };

 class nop_large_data_handler : public large_data_handler {
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -11,9 +11,11 @@
 #include <boost/functional/hash.hpp>
 #include <boost/icl/interval_map.hpp>
 #include <fmt/ranges.h>
+#include <ranges>

 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/parallel_for_each.hh>
+#include <seastar/core/loop.hh>
 #include <seastar/core/on_internal_error.hh>
 #include "system_keyspace.hh"
 #include "cql3/untyped_result_set.hh"
@@ -36,6 +38,7 @@
 #include "db/schema_tables.hh"
 #include "gms/generation-number.hh"
 #include "service/storage_service.hh"
+#include "service/storage_proxy.hh"
 #include "service/paxos/paxos_state.hh"
 #include "query-result-set.hh"
 #include "idl/frozen_mutation.dist.hh"
@@ -763,6 +766,35 @@ schema_ptr system_keyspace::large_cells() {
    return large_cells;
 }

+schema_ptr system_keyspace::corrupt_data() {
+    static thread_local auto corrupt_data = [] {
+        auto id = generate_legacy_id(NAME, CORRUPT_DATA);
+        return schema_builder(NAME, CORRUPT_DATA, id)
+                // partition key
+                .with_column("keyspace_name", utf8_type, column_kind::partition_key)
+                .with_column("table_name", utf8_type, column_kind::partition_key)
+                // clustering key
+                .with_column("id", timeuuid_type, column_kind::clustering_key)
+                // regular rows
+                // Storing keys as bytes: having a corrupt key might be the reason
+                // to record the row as corrupt, so we just dump what we have and
+                // leave interpreting to the lucky person investigating the disaster.
+                .with_column("partition_key", bytes_type)
+                .with_column("clustering_key", bytes_type)
+                // Note: mutation-fragment v2
+                .with_column("mutation_fragment_kind", utf8_type)
+                .with_column("frozen_mutation_fragment", bytes_type)
+                .with_column("origin", utf8_type)
+                .with_column("sstable_name", utf8_type)
+                // options
+                .set_comment("mutation-fragments found to be corrupted")
+                .set_gc_grace_seconds(0)
+                .with_hash_version()
+                .build();
+    }();
+    return corrupt_data;
+}
+
 static constexpr auto schema_gc_grace = std::chrono::duration_cast<std::chrono::seconds>(days(7)).count();

 /*static*/ schema_ptr system_keyspace::scylla_local() {
@@ -1649,6 +1681,12 @@ future<> system_keyspace::peers_table_read_fixup() {
            continue;
        }
        const auto host_id = row.get_as<utils::UUID>("host_id");
+        if (!host_id) {
+            slogger.error("Peer {} has null host_id in system.{}, the record is broken, removing it",
+                peer, system_keyspace::PEERS);
+            co_await remove_endpoint(gms::inet_address{peer});
+            continue;
+        }
        const auto ts = row.get_as<int64_t>("ts");
        const auto it = map.find(host_id);
        if (it == map.end()) {
@@ -1712,8 +1750,15 @@ future<> system_keyspace::drop_truncation_rp_records() {
    auto rs = co_await execute_cql(req);

    bool any = false;
-    co_await coroutine::parallel_for_each(*rs, [&] (const cql3::untyped_result_set_row& row) -> future<> {
+    std::unordered_set<table_id> to_delete;
+    auto db = _qp.db();
+    auto max_concurrency = std::min(1024u, smp::count * 8);
+    co_await seastar::max_concurrent_for_each(*rs, max_concurrency, [&] (const cql3::untyped_result_set_row& row) -> future<> {
        auto table_uuid = table_id(row.get_as<utils::UUID>("table_uuid"));
+        if (!db.try_find_table(table_uuid)) {
+            to_delete.emplace(table_uuid);
+            co_return;
+        }
        auto shard = row.get_as<int32_t>("shard");
        auto segment_id = row.get_as<int64_t>("segment_id");

@@ -1723,11 +1768,26 @@ future<> system_keyspace::drop_truncation_rp_records() {
            co_await execute_cql(req);
        }
    });
+    if (!to_delete.empty()) {
+        // IN has a limit to how many values we can put into it.
+        for (auto&& chunk : to_delete | std::views::transform(&table_id::to_sstring) | std::views::chunk(100)) {
+            auto str = std::ranges::to<std::string>(chunk | std::views::join_with(','));
+            auto req = fmt::format("DELETE FROM system.{} WHERE table_uuid IN ({})", TRUNCATED, str);
+            co_await execute_cql(req);
+        }
+        any = true;
+    }
    if (any) {
        co_await force_blocking_flush(TRUNCATED);
    }
 }

+future<> system_keyspace::remove_truncation_records(table_id id) {
+    auto req = format("DELETE FROM system.{} WHERE table_uuid = {}", TRUNCATED, id);
+    co_await execute_cql(req);
+    co_await force_blocking_flush(TRUNCATED);
+}
+
 future<> system_keyspace::save_truncation_record(const replica::column_family& cf, db_clock::time_point truncated_at, db::replay_position rp) {
    sstring req = format("INSERT INTO system.{} (table_uuid, shard, position, segment_id, truncated_at) VALUES(?,?,?,?,?)", TRUNCATED);
    co_await _qp.execute_internal(req, {cf.schema()->id().uuid(), int32_t(rp.shard_id()), int32_t(rp.pos), int64_t(rp.base_id()), truncated_at}, cql3::query_processor::cache_internal::yes);
@@ -2110,7 +2170,59 @@ future<> system_keyspace::update_peer_info(gms::inet_address ep, locator::host_i

    slogger.debug("{}: values={}", query, values);

-    co_await _qp.execute_internal(query, db::consistency_level::ONE, values, cql3::query_processor::cache_internal::yes);
+    const auto guard = co_await get_units(_peers_cache_lock, 1);
+    try {
+        co_await _qp.execute_internal(query, db::consistency_level::ONE, values, cql3::query_processor::cache_internal::yes);
+        if (auto* cache = get_peers_cache()) {
+            cache->host_id_to_inet_ip[hid] = ep;
+            cache->inet_ip_to_host_id[ep] = hid;
+        }
+    } catch (...) {
+        _peers_cache = nullptr;
+        throw;
+    }
+}
+
+system_keyspace::peers_cache* system_keyspace::get_peers_cache() {
+    auto* cache = _peers_cache.get();
+    if (cache && (lowres_clock::now() > cache->expiration_time)) {
+        _peers_cache = nullptr;
+        return nullptr;
+    }
+    return cache;
+}
+
+future<lw_shared_ptr<const system_keyspace::peers_cache>> system_keyspace::get_or_load_peers_cache() {
+    const auto guard = co_await get_units(_peers_cache_lock, 1);
+    if (auto* cache = get_peers_cache()) {
+        co_return cache->shared_from_this();
+    }
+    auto cache = make_lw_shared<peers_cache>();
+    cache->inet_ip_to_host_id = co_await load_host_ids();
+    cache->host_id_to_inet_ip.reserve(cache->inet_ip_to_host_id.size());
+    for (const auto [ip, id]: cache->inet_ip_to_host_id) {
+        const auto [it, inserted] = cache->host_id_to_inet_ip.insert({id, ip});
+        if (!inserted) {
+            on_internal_error(slogger, ::format("duplicate IP for host_id {}, first IP {}, second IP {}",
+                id, it->second, ip));
+        }
+    }
+    cache->expiration_time = lowres_clock::now() + std::chrono::milliseconds(200);
+    _peers_cache = cache;
+    co_return std::move(cache);
+}
+
+future<std::optional<gms::inet_address>> system_keyspace::get_ip_from_peers_table(locator::host_id id) {
+    const auto cache = co_await get_or_load_peers_cache();
+    if (const auto it = cache->host_id_to_inet_ip.find(id); it != cache->host_id_to_inet_ip.end()) {
+        co_return it->second;
+    }
+    co_return std::nullopt;
+}
+
+future<system_keyspace::host_id_to_ip_map_t> system_keyspace::get_host_id_to_ip_map() {
+    const auto cache = co_await get_or_load_peers_cache();
+    co_return cache->host_id_to_inet_ip;
 }

 template <typename T>
@@ -2160,7 +2272,22 @@ future<> system_keyspace::update_schema_version(table_schema_version version) {
 future<> system_keyspace::remove_endpoint(gms::inet_address ep) {
    const sstring req = format("DELETE FROM system.{} WHERE peer = ?", PEERS);
    slogger.debug("DELETE FROM system.{} WHERE peer = {}", PEERS, ep);
-    co_await execute_cql(req, ep.addr()).discard_result();
+
+    const auto guard = co_await get_units(_peers_cache_lock, 1);
+    try {
+        co_await execute_cql(req, ep.addr()).discard_result();
+        if (auto* cache = get_peers_cache()) {
+            const auto it = cache->inet_ip_to_host_id.find(ep);
+            if (it != cache->inet_ip_to_host_id.end()) {
+                const auto id = it->second;
+                cache->inet_ip_to_host_id.erase(it);
+                cache->host_id_to_inet_ip.erase(id);
+            }
+        }
+    } catch (...) {
+        _peers_cache = nullptr;
+        throw;
+    }
 }

 future<> system_keyspace::update_tokens(const std::unordered_set<dht::token>& tokens) {
@@ -2312,6 +2439,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
                    peers(), peer_events(), range_xfers(),
                    compactions_in_progress(), compaction_history(),
                    sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
+                    corrupt_data(),
                    scylla_local(), db::schema_tables::scylla_table_schema_history(),
                    repair_history(),
                    v3::views_builds_in_progress(), v3::built_views(),
@@ -3573,4 +3701,12 @@ future<::shared_ptr<cql3::untyped_result_set>> system_keyspace::execute_cql(cons
    return _qp.execute_internal(query_string, values, cql3::query_processor::cache_internal::yes);
 }

+future<> system_keyspace::apply_mutation(mutation m) {
+    if (m.schema()->ks_name() != NAME) {
+        on_internal_error(slogger, fmt::format("system_keyspace::apply_mutation(): attempted to apply mutation belonging to table {}.{}", m.schema()->cf_name(), m.schema()->ks_name()));
+    }
+
+    return _qp.proxy().mutate_locally(m, {}, db::commitlog::force_sync(m.schema()->static_props().wait_for_sync_to_commitlog), db::no_timeout);
+}
+
 } // namespace db
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -142,6 +142,7 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
    static schema_ptr large_partitions();
    static schema_ptr large_rows();
    static schema_ptr large_cells();
+    static schema_ptr corrupt_data();
    static schema_ptr scylla_local();
    future<> force_blocking_flush(sstring cfname);
    // This function is called when the system.peers table is read,
@@ -153,6 +154,17 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
    //  and this node crashes after adding a new IP but before removing the old one. The
    //  record with older timestamp is removed, the warning is written to the log.
    future<> peers_table_read_fixup();
+
+    struct peers_cache: public enable_lw_shared_from_this<peers_cache> {
+        std::unordered_map<gms::inet_address, locator::host_id> inet_ip_to_host_id;
+        std::unordered_map<locator::host_id, gms::inet_address> host_id_to_inet_ip;
+        lowres_clock::time_point expiration_time;
+    };
+    lw_shared_ptr<peers_cache> _peers_cache;
+    semaphore _peers_cache_lock{1};
+    peers_cache* get_peers_cache();
+    future<lw_shared_ptr<const peers_cache>> get_or_load_peers_cache();
+
 public:
    static schema_ptr size_estimates();
 public:
@@ -174,6 +186,7 @@ public:
    static constexpr auto LARGE_PARTITIONS = "large_partitions";
    static constexpr auto LARGE_ROWS = "large_rows";
    static constexpr auto LARGE_CELLS = "large_cells";
+    static constexpr auto CORRUPT_DATA = "corrupt_data";
    static constexpr auto SCYLLA_LOCAL = "scylla_local";
    static constexpr auto RAFT = "raft";
    static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
@@ -317,6 +330,12 @@ public:

    future<> update_peer_info(gms::inet_address ep, locator::host_id hid, const peer_info& info);

+    // Return ip of the peers table entry with given host id
+    future<std::optional<gms::inet_address>> get_ip_from_peers_table(locator::host_id id);
+
+    using host_id_to_ip_map_t = std::unordered_map<locator::host_id, gms::inet_address>;
+    future<host_id_to_ip_map_t> get_host_id_to_ip_map();
+
    future<> remove_endpoint(gms::inet_address ep);

    // Saves the key-value pair into system.scylla_local table.
@@ -424,6 +443,7 @@ public:
    future<> save_truncation_record(const replica::column_family&, db_clock::time_point truncated_at, db::replay_position);
    future<replay_positions> get_truncated_positions(table_id);
    future<> drop_truncation_rp_records();
+    future<> remove_truncation_records(table_id);

    // Converts a `dht::token_range` object to the left-open integer range (x,y] form.
    //
@@ -692,6 +712,10 @@ public:
        return execute_cql(req, { data_value(std::forward<Args>(args))... });
    }

+    // Apply write as mutation to the system keyspace.
+    // Mutation has to belong to a table int he system keyspace.
+    future<> apply_mutation(mutation m);
+
    friend future<column_mapping> db::schema_tables::get_column_mapping(db::system_keyspace& sys_ks, ::table_id table_id, table_schema_version version);
    friend future<bool> db::schema_tables::column_mapping_exists(db::system_keyspace& sys_ks, table_id table_id, table_schema_version version);
    friend future<> db::schema_tables::drop_column_mapping(db::system_keyspace& sys_ks, table_id table_id, table_schema_version version);
--- a/db/virtual_tables.cc
+++ b/db/virtual_tables.cc
@@ -165,7 +165,7 @@ public:
        });
    }

-    future<> emit_ring(result_collector& result, const dht::decorated_key& dk, const sstring& table_name, std::vector<dht::token_range_endpoints> ranges) {
+    future<> emit_ring(result_collector& result, const dht::decorated_key& dk, const sstring& table_name, utils::chunked_vector<dht::token_range_endpoints> ranges) {

        co_await result.emit_partition_start(dk);
        std::ranges::sort(ranges, std::ranges::less(), std::mem_fn(&dht::token_range_endpoints::_start_token));
@@ -219,11 +219,11 @@ public:
                        co_return;
                    }
                    const auto& table_name = table->schema()->cf_name();
-                    std::vector<dht::token_range_endpoints> ranges = co_await _ss.describe_ring_for_table(e.name, table_name);
+                    utils::chunked_vector<dht::token_range_endpoints> ranges = co_await _ss.describe_ring_for_table(e.name, table_name);
                    co_await emit_ring(result, e.key, table_name, std::move(ranges));
                });
            } else {
-                std::vector<dht::token_range_endpoints> ranges = co_await _ss.describe_ring(e.name);
+                utils::chunked_vector<dht::token_range_endpoints> ranges = co_await _ss.describe_ring(e.name);
                co_await emit_ring(result, e.key, "<ALL>", std::move(ranges));
            }
        }
--- a/dht/i_partitioner_fwd.hh
+++ b/dht/i_partitioner_fwd.hh
@@ -10,6 +10,7 @@
 #pragma once
 #include <vector>
 #include "interval.hh"
+#include "utils/chunked_vector.hh"

 namespace sstables {

@@ -29,7 +30,7 @@ using partition_range = interval<ring_position>;
 using token_range = interval<token>;

 using partition_range_vector = std::vector<partition_range>;
-using token_range_vector = std::vector<token_range>;
+using token_range_vector = utils::chunked_vector<token_range>;

 class decorated_key;

--- a/dist/common/scripts/scylla_sysconfig_setup
+++ b/dist/common/scripts/scylla_sysconfig_setup
@@ -86,9 +86,9 @@ if __name__ == '__main__':
    ethpciid = ''
    if network_mode == 'dpdk':
        dpdk_status = out('/opt/scylladb/scripts/dpdk-devbind.py --status')
-        match = re.search('if={} drv=(\S+)'.format(ifname), dpdk_status, flags=re.MULTILINE)
+        match = re.search(r'if={} drv=(\S+)'.format(ifname), dpdk_status, flags=re.MULTILINE)
        ethdrv = match.group(1)
-        match = re.search('^(\\S+:\\S+:\\S+\.\\S+) [^\n]+ if={} '.format(ifname), dpdk_status, flags=re.MULTILINE)
+        match = re.search(r'^(\S+:\S+:\S+\.\S+) [^\n]+ if={} '.format(ifname), dpdk_status, flags=re.MULTILINE)
        ethpciid = match.group(1)

    if args.mode:
--- a/dist/debian/control.template
+++ b/dist/debian/control.template
@@ -18,7 +18,7 @@ Breaks: scylla-enterprise-conf (<< 2025.1.0~)

 Package: %{product}-server
 Architecture: any
-Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version})
+Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version}), procps
 Replaces: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
 Breaks: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
 Description: Scylla database server binaries
--- a/dist/docker/redhat/build_docker.sh
+++ b/dist/docker/redhat/build_docker.sh
@@ -14,6 +14,15 @@ product="$(<build/SCYLLA-PRODUCT-FILE)"
 version="$(sed 's/-/~/' <build/SCYLLA-VERSION-FILE)"
 release="$(<build/SCYLLA-RELEASE-FILE)"

+original_version="$(<build/SCYLLA-VERSION-FILE)"
+if [[ "$original_version" == *"-dev"* ]]; then
+    repo_file_url="https://downloads.scylladb.com/unstable/scylla/master/rpm/centos/latest/scylla.repo"
+else
+    # Remove the last dot-separated component
+    repo_version="${original_version%.*}"
+    repo_file_url="https://downloads.scylladb.com/rpm/centos/scylla-$repo_version.repo"
+fi
+
 mode="release"

 arch="$(uname -m)"
@@ -88,8 +97,8 @@ bcp LICENSE-ScyllaDB-Source-Available.md /licenses/

 run microdnf clean all
 run microdnf --setopt=tsflags=nodocs -y update
-run microdnf --setopt=tsflags=nodocs -y install hostname python3 python3-pip kmod
-run microdnf clean all
+run microdnf --setopt=tsflags=nodocs -y install hostname kmod procps-ng python3 python3-pip
+run curl -L --output /etc/yum.repos.d/scylla.repo ${repo_file_url}
 run pip3 install --no-cache-dir --prefix /usr supervisor
 run bash -ec "echo LANG=C.UTF-8 > /etc/locale.conf"
 run bash -ec "rpm -ivh packages/*.rpm"
--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -71,6 +71,7 @@ Group:          Applications/Databases
 Summary:        The Scylla database server
 Requires:       %{product}-conf = %{version}-%{release}
 Requires:       %{product}-python3 = %{version}-%{release}
+Requires:       procps-ng
 AutoReqProv:    no
 Provides:       %{product}-tools:%{_bindir}/nodetool
 Provides:       %{product}-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
--- a/docs/_ext/utils.py
+++ b/docs/_ext/utils.py
@@ -22,6 +22,8 @@ def readable_desc_rst(description):

        cleaned_line = line.replace('\\n', '\n')

+        cleaned_line = cleaned_line.replace('\\t', '\n' + indent * 2)
+        
        if line.endswith('"'):
            cleaned_line = cleaned_line[:-1] + ' '

--- a/docs/_static/data/os-support.json
+++ b/docs/_static/data/os-support.json
@@ -1,15 +1,24 @@
 {
    "Linux Distributions": {
-      "Ubuntu": ["20.04", "22.04", "24.04"],
+      "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
      "Debian": ["11"],
      "Rocky / CentOS / RHEL": ["8", "9"],
      "Amazon Linux": ["2023"]
    },
    "ScyllaDB Versions": [
      {
-        "version": "Enterprise 2025.1",
+        "version": "ScyllaDB 2025.2",
        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04"],
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
+          "Debian": ["11"],
+          "Rocky / CentOS / RHEL": ["8", "9"],
+          "Amazon Linux": ["2023"]
+        }
+      },
+      {
+        "version": "ScyllaDB 2025.1",
+        "supported_OS": {
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
          "Debian": ["11"],
          "Rocky / CentOS / RHEL": ["8", "9"],
          "Amazon Linux": ["2023"]
@@ -18,7 +27,7 @@
      {
        "version": "Enterprise 2024.2",
        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04"],
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
          "Debian": ["11"],
          "Rocky / CentOS / RHEL": ["8", "9"],
          "Amazon Linux": ["2023"]
@@ -27,20 +36,11 @@
      {
        "version": "Enterprise 2024.1",
        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04*"],
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04*"],
          "Debian": ["11"],
          "Rocky / CentOS / RHEL": ["8", "9"],
          "Amazon Linux": []
        }
-      },
-      {
-        "version": "Open Source 6.2",
-        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04"],
-          "Debian": ["11"],
-          "Rocky / CentOS / RHEL": ["8", "9"],
-          "Amazon Linux": ["2023"]
-        }
      }
    ]
  }
--- a/docs/_utils/redirects.yaml
+++ b/docs/_utils/redirects.yaml
@@ -2,6 +2,11 @@
 #old path: new path


+# Remove reduntant pages
+
+/stable/getting-started/tutorials: https://docs.scylladb.com/stable/get-started/develop-with-scylladb/tutorials-example-projects.html
+/stable/contribute: https://github.com/scylladb/scylladb/blob/master/CONTRIBUTING.md
+
 # Remove an oudated article

 /stable/troubleshooting/nodetool-memory-read-timeout.html: /stable/troubleshooting/index.html
@@ -157,3 +162,5 @@
 /stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-debian-10.html: /stable/upgrade/index.html
 /stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/metric-update-4.5-to-4.6.html: /stable/upgrade/index.html

+# Fixed typo in the file name
+/stable/operating-scylla/nodetool-commands/enbleautocompaction.html: /stable/operating-scylla/nodetool-commands/enableautocompaction.html
--- a/docs/architecture/raft.rst
+++ b/docs/architecture/raft.rst
@@ -58,112 +58,12 @@ of nodes in the cluster is available. The following examples illustrate how Raft

 In summary, Raft makes schema changes safe, but it requires that a quorum of nodes in the cluster is available.

-.. _verify-raft-procedure:
-
-Verifying that the Raft upgrade procedure finished successfully
-========================================================================
-
-You may need to perform the following procedure as part of
-the :ref:`manual recovery procedure <recovery-procedure>`.
-
-The Raft upgrade procedure requires **full cluster availability** to correctly setup the Raft algorithm; after the setup finishes, Raft can proceed with only a majority of nodes, but this initial setup is an exception.
-An unlucky event, such as a hardware failure, may cause one of your nodes to fail. If this happens before the Raft upgrade procedure finishes, the procedure will get stuck and your intervention will be required.
-
-To verify that the procedure finishes, look at the log of every ScyllaDB node (using ``journalctl _COMM=scylla``). Search for the following patterns:
-
-* ``Starting internal upgrade-to-raft procedure`` denotes the start of the procedure,
-* ``Raft upgrade finished`` denotes the end.
-
-The following is an example of a log from a node which went through the procedure correctly. Some parts were truncated for brevity:
-
-.. code-block:: console
-
-    features - Feature SUPPORTS_RAFT_CLUSTER_MANAGEMENT is enabled
-    raft_group0 - finish_setup_after_join: SUPPORTS_RAFT feature enabled. Starting internal upgrade-to-raft procedure.
-    raft_group0_upgrade - starting in `use_pre_raft_procedures` state.
-    raft_group0_upgrade - Waiting until everyone is ready to start upgrade...
-    raft_group0_upgrade - Joining group 0...
-    raft_group0 - server 624fa080-8c0e-4e3d-acf6-10af473639ca joined group 0 with group id 8f8a1870-5c4e-11ed-bb13-fe59693a23c9
-    raft_group0_upgrade - Waiting until every peer has joined Raft group 0...
-    raft_group0_upgrade - Every peer is a member of Raft group 0.
-    raft_group0_upgrade - Waiting for schema to synchronize across all nodes in group 0...
-    raft_group0_upgrade - synchronize_schema: my version: a37a3b1e-5251-3632-b6b4-a9468a279834
-    raft_group0_upgrade - synchronize_schema: schema mismatches: {}. 3 nodes had a matching version.
-    raft_group0_upgrade - synchronize_schema: finished.
-    raft_group0_upgrade - Entering synchronize state.
-    raft_group0_upgrade - Schema changes are disabled in synchronize state. If a failure makes us unable to proceed, manual recovery will be required.
-    raft_group0_upgrade - Waiting for all peers to enter synchronize state...
-    raft_group0_upgrade - All peers in synchronize state. Waiting for schema to synchronize...
-    raft_group0_upgrade - synchronize_schema: collecting schema versions from group 0 members...
-    raft_group0_upgrade - synchronize_schema: collected remote schema versions.
-    raft_group0_upgrade - synchronize_schema: my version: a37a3b1e-5251-3632-b6b4-a9468a279834
-    raft_group0_upgrade - synchronize_schema: schema mismatches: {}. 3 nodes had a matching version.
-    raft_group0_upgrade - synchronize_schema: finished.
-    raft_group0_upgrade - Schema synchronized.
-    raft_group0_upgrade - Raft upgrade finished.
-
-In a functioning cluster with good network connectivity the procedure should take no more than a few seconds.
-Network issues may cause the procedure to take longer, but if all nodes are alive and the network is eventually functional (each pair of nodes is eventually connected), the procedure will eventually finish.
-
-Note the following message, which appears in the log presented above:
-
-.. code-block:: console
-
-    Schema changes are disabled in synchronize state. If a failure makes us unable to proceed, manual recovery will be required.
-
-During the procedure, there is a brief window while schema changes are disabled. This is when the schema change mechanism switches from the older unsafe algorithm to the safe Raft-based algorithm. If everything runs smoothly, this window will be unnoticeable; the procedure is designed to minimize that window's length. However, if the procedure gets stuck e.g. due to network connectivity problem, ScyllaDB will return the following error when trying to perform a schema change during this window:
-
-.. code-block:: console
-
-    Cannot perform schema or topology changes during this time; the cluster is currently upgrading to use Raft for schema operations.
-    If this error keeps happening, check the logs of your nodes to learn the state of upgrade. The upgrade procedure may get stuck
-    if there was a node failure.
-
-In the next example, one of the nodes had a power outage before the procedure could finish. The following shows a part of another node's logs:
-
-.. code-block:: console
-
-    raft_group0_upgrade - Entering synchronize state.
-    raft_group0_upgrade - Schema changes are disabled in synchronize state. If a failure makes us unable to proceed, manual recovery will be required.
-    raft_group0_upgrade - Waiting for all peers to enter synchronize state...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.3 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.1 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: retrying in a while...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.1 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: retrying in a while...
-    ...
-    raft_group0_upgrade - Raft upgrade procedure taking longer than expected. Please check if all nodes are live and the network is healthy. If the upgrade procedure does not progress even though the cluster is healthy, try performing a rolling restart of the cluster. If that doesn 't help or some nodes are dead and irrecoverable, manual recovery may be required. Consult the relevant documentation.
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.1 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: retrying in a while...
-
-.. TODO: the 'Consult the relevant documentation' message must be updated to point to this doc.
-
-Note the following message:
-
-.. code-block:: console
-
-    raft_group0_upgrade - Raft upgrade procedure taking longer than expected. Please check if all nodes are live and the network is healthy. If the upgrade procedure does not progress even though the cluster is healthy, try performing a rolling restart of the cluster. If that doesn 't help or some nodes are dead and irrecoverable, manual recovery may be required. Consult the relevant documentation.
-
-If the Raft upgrade procedure is stuck, this message will appear periodically in each node's logs.
-
-The message suggests the initial course of action:
-
-* Check if all nodes are alive.
-* If a node is down but can be restarted, restart it.
-* If all nodes are alive, ensure that the network is healthy: that every node is reachable from every other node.
-* If all nodes are alive and the network is healthy, perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of the cluster.
-
-One of the reasons why the procedure may get stuck is a pre-existing problem in schema definitions which causes schema to be unable to synchronize in the cluster. The procedure cannot proceed unless it ensures that schema is synchronized.
-If **all nodes are alive and the network is healthy**, you performed a rolling restart, but the issue still persists, contact `ScyllaDB support <https://www.scylladb.com/product/support/>`_ for assistance.
-
-If some nodes are **dead and irrecoverable**, you'll need to perform a manual recovery procedure. Consult :ref:`the section about Raft recovery <recovery-procedure>`.
-
 .. _raft-topology-changes:

 Consistent Topology with Raft
 -----------------------------------------------------------------

-ScyllaDB can use Raft to manage cluster topology. With Raft-managed topology 
+ScyllaDB uses Raft to manage cluster topology. With Raft-managed topology 
 enabled, all topology operations are internally sequenced in a consistent 
 way. A centralized coordination process ensures that topology metadata is 
 synchronized across the nodes on each step of a topology change procedure. 
@@ -173,42 +73,18 @@ will safely drive all of them to completion. For example, multiple nodes can
 be bootstrapped concurrently, which couldn't be done with the old 
 gossip-based topology.

-The feature is automatically enabled in new clusters.
+.. note::

-Verifying that Raft is Enabled
----------------------------------
+    Enabling consistent topology changes is mandatory in versions 2025.2 and later. If consistent topology changes are
+    disabled in your cluster, you need to follow the instructions in
+    `Enable Consistent Topology Updates <https://docs.scylladb.com/manual/branch-2025.1/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.html>`_.

-.. _schema-on-raft-enabled:
-
-**Schema on Raft**
-
-You can verify that Raft is enabled on your cluster by performing the following query on each node:
-
-.. code-block:: sql
-
-   cqlsh> SELECT * FROM system.scylla_local WHERE key = 'group0_upgrade_state';
-
-The query should return:
-
-   .. code-block:: console
-
-     key                  | value
-    ----------------------+--------------------------
-     group0_upgrade_state | use_post_raft_procedures
-
-    (1 rows)
-
-on every node.
-
-If the query returns 0 rows, or ``value`` is ``synchronize`` or ``use_pre_raft_procedures``, it means that the cluster is in the middle of the Raft upgrade procedure; consult the :ref:`relevant section <verify-raft-procedure>`.
-
-If ``value`` is ``recovery``, it means that the cluster is in the middle of the manual recovery procedure. The procedure must be finished. Consult :ref:`the section about Raft recovery <recovery-procedure>`.
-
-If ``value`` is anything else, it might mean data corruption or a mistake when performing the manual recovery procedure. The value will be treated as if it was equal to ``recovery`` when the node is restarted.
+    If you are uncertain whether consistent topology changes are enabled, refer to the guide below.

 .. _verifying-consistent-topology-changes-enabled:

-**Consistent topology changes**
+Verifying that consistent topology changes are enabled
+-----------------------------------------------------------------

 You can verify that consistent topology management is enabled on your cluster in two ways:

--- a/docs/architecture/tablets.rst
+++ b/docs/architecture/tablets.rst
@@ -147,24 +147,19 @@ Limitations and Unsupported Features
    performance problems, or other issues.

 The following ScyllaDB features are not supported if a keyspace has tablets
-enabled:
+enabled. If you plan to use any of the features listed below, CREATE your keyspace
+:ref:`with tablets disabled <tablets-enable-tablets>`.

 * Counters
 * Change Data Capture (CDC)
 * Lightweight Transactions (LWT)
 * Alternator (as it uses LWT)
+* Materialized Views (MV) ``*``
+* Secondary indexes (SI, as it depends on MV) ``*``

-If you plan to use any of the above features, CREATE your keyspace
-:ref:`with tablets disabled <tablets-enable-tablets>`.
-
-The following ScyllaDB features are disabled by default when used with a keyspace
-that has tablets enabled:
-
-* Materialized Views (MV)
-* Secondary indexes (SI, as it depends on MV)
-
-To enable MV and SI for tablet keyspaces, use the `--experimental-features=views-with-tablets`
-configuration option.  See :ref:`Views with tablets <admin-views-with-tablets>` for details.
+``*`` You can enable experimental support for MV and SI using
+the ``--experimental-features=views-with-tablets`` configuration option. 
+See :ref:`Views with tablets <admin-views-with-tablets>` for details.

 Resharding in keyspaces with tablets enabled has the following limitations:

--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -1,31 +0,0 @@
-Contribute to ScyllaDB
-=======================
-
-Thank you for your interest in making ScyllaDB better!
-We appreciate your help and look forward to welcoming you to the ScyllaDB Community.
-There are two ways you can contribute:
-
-* Send a patch to the ScyllaDB source code
-* Write documentation for ScyllaDB Docs
-
-
-Contribute to ScyllaDB's Source Code
------------------------------------
-ScyllaDB developers use patches and email to share and discuss changes.
-Setting up can take a little time, but once you have done it the first time, it’s easy.
-
-The basic steps are:
-
-* Join the ScyllaDB community
-* Create a Git branch to work on
-* Commit your work with clear commit messages and sign-offs.
-* Send a PR or use ``git format-patch`` and ``git send-email`` to send to the list
-
-
-The entire process is `documented here <https://github.com/scylladb/scylla/blob/master/CONTRIBUTING.md>`_.
-
-Contribute to ScyllaDB Docs
---------------------------
-
-Each ScyllaDB project has accompanying documentation. For information about contributing documentation to a specific ScyllaDB project, refer to the README file for the individual project.
-For general information or to contribute to the ScyllaDB Sphinx theme, read the `Contributor's Guide <https://sphinx-theme.scylladb.com/stable/contribute/>`_.
--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -60,11 +60,11 @@ Keyspace and table names are defined by the following grammar:
   keyspace_name: `name`
   table_name: [ `keyspace_name` '.' ] `name`
   name: `unquoted_name` | `quoted_name`
-   unquoted_name: re('[a-zA-Z_0-9]{1, 48}')
+   unquoted_name: re('[a-zA-Z_0-9]{1, 192}')
   quoted_name: '"' `unquoted_name` '"'

 Both keyspace and table names consist of only alphanumeric characters, cannot be empty, and are limited in
-size to 48 characters (that limit exists mostly to avoid filenames, which may include the keyspace and table name, to go
+size to 192 characters (that limit exists mostly to avoid filenames, which may include the keyspace and table name, to go
 over the limits of certain file systems). By default, keyspace and table names are case insensitive (``myTable`` is
 equivalent to ``mytable``), but case sensitivity can be forced by using double-quotes (``"myTable"`` is different from
 ``mytable``).
--- a/docs/cql/types.rst
+++ b/docs/cql/types.rst
@@ -481,7 +481,8 @@ Creating a new user-defined type is done using a ``CREATE TYPE`` statement defin
   field_definition: `identifier` `cql_type`

 A UDT has a name (``udt_name``), which is used to declare columns of that type and is a set of named and typed fields. The ``udt_name`` can be any
-type, including collections or other UDTs. UDTs and collections inside collections must always be frozen (no matter which version of ScyllaDB you are using). 
+type, including collections or other UDTs.
+Similar to collections, a UDT can be frozen or non-frozen. A frozen UDT is immutable and can only be updated as a whole. Nested UDTs or UDTs used in keys must always be frozen.

 For example::

@@ -506,26 +507,15 @@ For example::

  CREATE TABLE superheroes (
       name frozen<full_name> PRIMARY KEY,
-       home frozen<address>
+       home address
  );

 .. note::

   - Attempting to create an already existing type will result in an error unless the ``IF NOT EXISTS`` option is used. If it is used, the statement will be a no-op if the type already exists.
   - A type is intrinsically bound to the keyspace in which it is created and can only be used in that keyspace. At creation, if the type name is prefixed by a keyspace name, it is created in that keyspace. Otherwise, it is created in the current keyspace.
-   - As of ScyllaDB Open Source 3.2, UDTs not inside collections do not have to be frozen, but in all versions prior to ScyllaDB Open Source 3.2, and in all ScyllaDB Enterprise versions, UDTs **must** be frozen. 


-A non-frozen UDT example with ScyllaDB Open Source 3.2 and higher::
-
-   CREATE TYPE ut (a int, b int);
-   CREATE TABLE cf (a int primary key, b ut);
-
-Same UDT in versions prior::
-
-   CREATE TYPE ut (a int, b int);
-   CREATE TABLE cf (a int primary key, b frozen<ut>);
-
 UDT literals
 ~~~~~~~~~~~~

--- a/docs/dev/system_keyspace.md
+++ b/docs/dev/system_keyspace.md
@@ -121,6 +121,29 @@ SELECT * FROM system.large_cells;
 SELECT * FROM system.large_cells WHERE keyspace_name = 'ks1' and table_name = 'standard1';
 ~~~

+## system.corrupt\_data
+
+Stores data found to be corrupt during internal operations. This data cannot be written to sstables because then it will be spread around by repair and compaction. It will also possibly cause failures in sstable parsing.
+At the same time, the data should be kept around so that it can be inspected and possibly restored by the database operator.
+This table is used to store such data. Data is saved at the mutation-fragment level.
+
+Schema:
+```cql
+CREATE TABLE system.corrupt_data (
+    keyspace_name text,              # keyspace name of source table
+    table_name text,                 # table name of source table
+    id timeuuid,                     # id of the corrupt mutation fragment, assigned by the database when the corrupt data entry is created
+    partition_key blob,              # partition key of partition in the source table, can be incomplete or null due to corruption
+    clustering_key text,             # clustering key of mutation-fragment in the source table, can be null for some mutation-fragment kinds, can be incomplete or null due to corruption
+    mutation_fragment_kind text,     # kind of the mutation fragment, one of 'partition start', 'partition end', 'static row', 'clustering row', 'range tombstone change'; only the latter two can have clustering_key set
+    frozen_mutation_fragment blob,   # the serialized mutation fragment itself
+    origin text,                     # the name of the process that found the corruption, e.g. 'sstable-writer'
+    sstable_name text,               # the name of the sstable that contains the corrupt data, if known; sstable is not kept around, it could be compacted or deleted
+    PRIMARY KEY ((keyspace_name, table_name), id)
+) WITH CLUSTERING ORDER BY (id ASC)
+    AND gc_grace_seconds = 0;
+```
+
 ## system.raft

 Holds information about Raft
--- a/docs/features/cdc/cdc-intro.rst
+++ b/docs/features/cdc/cdc-intro.rst
@@ -67,9 +67,6 @@ You can enable CDC when creating or altering a table using the ``cdc`` option, f

    CREATE TABLE ks.t (pk int, ck int, v int, PRIMARY KEY (pk, ck, v)) WITH cdc = {'enabled':true};

-.. note::
-   If you enabled CDC and later decide to disable it, you need to **stop all writes** to the base table before issuing the ``ALTER TABLE ... WITH cdc = {'enabled':false};`` command.
-
 .. include:: /features/cdc/_common/cdc-params.rst

 Using CDC with Applications
--- a/docs/features/local-secondary-indexes.rst
+++ b/docs/features/local-secondary-indexes.rst
@@ -6,9 +6,9 @@ Local Secondary Indexes is an enhancement to :doc:`Global Secondary Indexes <sec
 which allows ScyllaDB to optimize workloads where the partition key of the base table and the index are the same key.

 .. note::
-   As of ScyllaDB Open Source 4.0, updates for local secondary indexes are performed **synchronously**. When updates are synchronous, the client acknowledges the write
+   Updates for local secondary indexes are performed **synchronously**. When updates are synchronous, the client acknowledges the write
   operation only **after both** the base table modification **and** the view update are written.
-   This is important to note because the process is no longer asynchronous and the modifications are immediately reflected in the index.
+   This is important to note because the process is no longer asynchronous, and the modifications are immediately reflected in the index.
   In addition, if the view update fails, the client receives a write error.

 Example:
--- a/docs/getting-started/cloud-instance-recommendations.rst
+++ b/docs/getting-started/cloud-instance-recommendations.rst
@@ -113,7 +113,38 @@ Pick a zone where Haswell CPUs are found. Local SSD performance offers, accordin
 Image with NVMe disk interface is recommended.
 (`More info <https://cloud.google.com/compute/docs/disks/local-ssd>`_)

-Recommended instances types are `n1-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n1_machines>`_ and `n2-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n2_machines>`_
+Recommended instances types are `z3-highmem-highlssd <https://cloud.google.com/compute/docs/storage-optimized-machines#z3_machine_types>`_,
+`n1-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n1_machines>`_, and `n2-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n2_machines>`_
+
+
+.. list-table::
+   :widths: 30 20 20 30
+   :header-rows: 1
+
+   * - Model
+     - vCPU
+     - Mem (GB)
+     - Storage (GB)
+   * - z3-highmem-8-highlssd
+     - 8
+     - 64
+     - 3,000
+   * - z3-highmem-16-highlssd
+     - 16
+     - 128
+     - 6,000
+   * - z3-highmem-22-highlssd	
+     - 22
+     - 176
+     - 9,000
+   * - z3-highmem-32-highlssd	
+     - 32
+     - 256
+     - 12,000
+   * - z3-highmem-44-highlssd	
+     - 44
+     - 352
+     - 18,000

 .. list-table::
   :widths: 30 20 20 30
--- a/docs/getting-started/index.rst
+++ b/docs/getting-started/index.rst
@@ -11,7 +11,6 @@ Getting Started
   requirements
   Migrate to ScyllaDB </using-scylla/migrate-scylla>
   Integration Solutions </using-scylla/integrations/index>
-   tutorials

 .. panel-box::
  :title: ScyllaDB Requirements
--- a/docs/getting-started/install-scylla/launch-on-gcp.rst
+++ b/docs/getting-started/install-scylla/launch-on-gcp.rst
@@ -30,7 +30,7 @@ Launching ScyllaDB on GCP

   .. code-block:: console
      
-        gcloud compute instances create <name of new instance> --image <ScyllaDB image name> --image-project < ScyllaDB project name> --local-ssd interface=nvme --zone <GCP zone - optional> --machine-type=<machine type>
+        gcloud compute instances create <name of new instance> --image <ScyllaDB image name> --image-project < ScyllaDB project name> --local-ssd interface=nvme --zone=<GCP zone - optional> --machine-type=<machine type>
   
   For example:

--- a/docs/getting-started/os-support.rst
+++ b/docs/getting-started/os-support.rst
@@ -4,6 +4,9 @@ OS Support by Linux Distributions and Version
 The following matrix shows which Linux distributions, containers, and images
 are :ref:`supported <os-support-definition>` with which versions of ScyllaDB.

+Note that support for Ubuntu 20.04 is deprecated and will be removed in
+a future release.
+
 .. datatemplate:json:: /_static/data/os-support.json
  :template: platforms.tmpl

--- a/docs/getting-started/tutorials.rst
+++ b/docs/getting-started/tutorials.rst
@@ -1,21 +0,0 @@
-============
-Tutorials
-============
-
-The tutorials will show you how to use ScyllaDB as a data source for an application.
-
-
-ScyllaDB Tutorial
-===================
-
-`Build an IoT App with sensor simulator and a REST API <https://iot.scylladb.com/stable/>`_
-
-ScyllaDB Cloud Tutorial
-=======================
-
-`Implement CRUD operations with a TODO App <https://github.com/scylladb/scylla-cloud-getting-started/>`_
-
-ScyllaDB Cloud Feature Store Tutorial
-=====================================
-
-`Build a machine learning (ML) feature store with ScyllaDB <https://feature-store.scylladb.com/stable/>`_
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -73,6 +73,5 @@ In addition, you can read our `blog <https://www.scylladb.com/blog/>`_ and atten
  kb/index
  reference/index
  faq
-  Contribute to ScyllaDB <contribute>
  2024.2 and earlier documentation <https://enterprise.docs.scylladb.com/branch-2024.2/>

--- a/docs/kb/consistency.rst
+++ b/docs/kb/consistency.rst
@@ -83,7 +83,7 @@ Additional References

 * `Jepsen and ScyllaDB: Putting Consistency to the Test blog post <https://www.scylladb.com/2020/12/23/jepsen-and-scylla-putting-consistency-to-the-test/>`_ 
 * `Nauto: Achieving Consistency in an Eventually Consistent Environment blog post <https://www.scylladb.com/2020/02/20/nauto-achieving-consistency-in-an-eventually-consistent-environment/>`_ 
-* `Consistency Levels documentation <https://docs.scylladb.com/stable/cql/consistency.html>`_ 
+* `Consistency Levels documentation <https://docs.scylladb.com/manual/stable/cql/consistency.html>`_ 
 * `High Availability lesson on ScyllaDB University <https://university.scylladb.com/courses/scylla-essentials-overview/lessons/high-availability/>`_ 
 * `Lightweight Transactions lesson on ScyllaDB University <https://university.scylladb.com/courses/data-modeling/lessons/lightweight-transactions/>`_ 
 * `Getting the Most out of Lightweight Transactions in ScyllaDB blog post <https://www.scylladb.com/2020/07/15/getting-the-most-out-of-lightweight-transactions-in-scylla/>`_ 
--- a/docs/kb/rf-increase.rst
+++ b/docs/kb/rf-increase.rst
@@ -2,40 +2,65 @@
 How to Safely Increase the Replication Factor
 =======================================================

+A replication factor (RF) is configured per keyspace. You can change the RF
+using the :ref:`ALTER KEYSPACE <alter-keyspace-statement>` command. 

-**Topic: What can happen when you increase RF**
+To increase the RF safely, ensure you follow the guidelines below.
+The guidelines differ depending on whether your a keyspace is tablets-based
+(the default) or has tablets disabled. See :doc:`Data Distribution with Tablets </architecture/tablets>`
+for more information about tablets.

+Increasing the RF in Tablets-based Keyspaces
+-------------------------------------------------

-**Audience: ScyllaDB administrators**
+If a keyspace has tablets enabled (the default), changing the RF does not
+impact data consistency in the cluster.

+However, due to limitations in the current protocol used to pass tablet data
+to drivers, drivers will not pick up new replicas after the RF is increased.
+As a result, drivers will not route requests to new replicas, causing imbalance.

-Issues
------
+To avoid this issue, restart the client applications after the ALTER statement
+that changes the RF completes successfully.

-When a Replication Factor (RF) is increased, using the :ref:`ALTER KEYSPACE <alter-keyspace-statement>` command, the data consistency is effectively dropped
-by the difference of the RF_new value and the RF_old value for all pre-existing data.
+Increasing the RF in Keyspaces with Tablets Disabled
+----------------------------------------------------------
+
+If you :ref:`opted out of tablets when creating a keyspace <tablets-enable-tablets>`,
+so your keyspace is vnodes-based, increasing the RF will impact data consistency.
+
+Data consistency in your cluster is effectively dropped by the difference
+between the RF_new value and the RF_old value for all pre-existing data.
 Consistency will only be restored after running a repair.

-Another issue occurs in keyspaces with tablets enabled and is driver-related. Due to limitations in the current protocol used to pass tablet data to drivers, drivers will not pick
-up new replicas after replication factor is increased. This will cause them to avoid routing requests to those replicas, causing imbalance.

 Resolution
----------
+========================

-When one increases an RF, one should consider that the pre-existing data will **not be streamed** to new replicas (a common misconception).
+When you increase the RF, you should be aware that the pre-existing data will
+**not be streamed** to new replicas (a common misconception).

-As a result, in order to make sure that you can keep on reading the old data with the same level of consistency, increase the read Consistency Level (CL) according to the following formula:
+As a result, in order to make sure that you can keep on reading the old data
+with the same level of consistency:

-``CL_new = CL_old + RF_new - RF_old``
+#. Increase the read Consistency Level (CL) according to the following formula:

-After you run a repair, you can decrease the CL. If RF has only been changed in a particular Data Center (DC) only the nodes in that DC have to be repaired.
+   .. code::
+
+      CL_new = CL_old + RF_new - RF_old
+
+#. Run repair.
+#. Decrease the CL.
+
+
+If RF has only been changed in a particular Datacenter (DC), only the nodes in
+that DC have to be repaired.

-To resolve the driver-related issue, restart the client applications after the ALTER statement that changes the RF completes successfully.

 Example
 =======

-In this example your five node cluster RF is 3 and your CL is TWO. You want to increase your RF from 3 to 5.
+In this example, your five-node cluster RF is 3 and your CL is TWO. You want to increase your RF from 3 to 5.

 #. Increase the read CL by a RF_new - RF_old value.
   Following the example the RF_new is 5 and the RF_old is 3 so, 5-3 =2. You need to increase the CL by 2.
@@ -45,9 +70,9 @@ In this example your five node cluster RF is 3 and your CL is TWO. You want to i
 #. Restore the reads CL to the originally intended value. For this example, QUORUM.


-If you do not follow the procedure above you may start reading stale or null data after increasing the RF.
+If you do not follow the procedure above, you may start reading stale or null data after increasing the RF.

-More Information
+References
 ----------------

 * :doc:`Fault Tolerance </architecture/architecture-fault-tolerance/>`
--- a/docs/operating-scylla/admin-tools/cassandra-stress.rst
+++ b/docs/operating-scylla/admin-tools/cassandra-stress.rst
@@ -5,4 +5,3 @@ The cassandra-stress tool is used for benchmarking and load testing both ScyllaD

 Cassandra Stress is not part of ScyllaDB and it is not distributed along side it anymore. It has it's own separate repository and release cycle. More information about it can be found on `GitHub <https://github.com/scylladb/cassandra-stress>`_ or on `DockerHub <https://hub.docker.com/r/scylladb/cassandra-stress>`_.

-.. include:: /rst_include/apache-copyrights.rst
--- a/docs/operating-scylla/nodetool-commands/backup.rst
+++ b/docs/operating-scylla/nodetool-commands/backup.rst
@@ -18,13 +18,14 @@ Syntax
               [--snapshot <snapshot>]
               --endpoint <endpoint> --bucket <bucket> --prefix <prefix>
               [--nowait]
+               [--move-files]

 Example
 -------

 .. code-block:: console

-    nodetool backup --endpoint s3.us-east-2.amazonaws.com  --bucket bucket-foo --prefix foo/bar/baz --keyspace ks --table table --snapshot ss
+    nodetool backup --endpoint s3.us-east-2.amazonaws.com  --bucket bucket-foo --prefix foo/bar/baz --keyspace ks --table table --snapshot ss --move-files

 Options
 -------
@@ -38,6 +39,7 @@ Options
 * ``--bucket`` - Name of the bucket to backup SSTables to
 * ``--prefix`` - Prefix to backup SSTables to
 * ``--nowait`` - Don't wait on the backup process
+* ``--move-files`` - Move files instead of copying them. This will delete the files from the local disk after they are uploaded to the object storage.

 See also

--- a/docs/operating-scylla/nodetool-commands/cleanup.rst
+++ b/docs/operating-scylla/nodetool-commands/cleanup.rst
@@ -1,3 +1,5 @@
+.. _nodetool-cleanup-cmd:
+
 Nodetool cleanup
 ================
 **cleanup** ``[<keyspace> <tablename ...>]``- triggers the immediate removal of data from node(s) that "lose" part of their token range due to a range movement operation (node addition or node replacement).
--- a/docs/operating-scylla/nodetool-commands/enableautocompaction.rst
+++ b/docs/operating-scylla/nodetool-commands/enableautocompaction.rst
--- a/docs/operating-scylla/nodetool-commands/refresh.rst
+++ b/docs/operating-scylla/nodetool-commands/refresh.rst
@@ -29,7 +29,7 @@ Load and Stream

 .. code::

-   nodetool refresh <my_keyspace> <my_table> [--load-and-stream | -las]
+   nodetool refresh <my_keyspace> <my_table> [--load-and-stream | -las] [--scope <scope>]

 The Load and Stream feature extends nodetool refresh. The new ``-las`` option loads arbitrary sstables that do not belong to a node into the cluster. It loads the sstables from the disk and calculates the data's owning nodes, and streams automatically.
 For example, say the old cluster has 6 nodes and the new cluster has 3 nodes. We can copy the sstables from the old cluster to any of the new nodes and trigger the load and stream process.
@@ -39,5 +39,42 @@ Load and Stream make restores and migrations much easier:
 * You can place sstable from every node to every node
 * No need to run nodetool cleanup to remove unused data

+Scope
+-----
+
+The `scope` parameter describes the subset of cluster nodes where you want to load data:
+
+* `node` - On the local node.
+* `rack` - On the local rack.
+* `dc` - In the datacenter (DC) where the local node lives.
+* `all` (default) - Everywhere across the cluster.
+
+Scope supports a variety of options for filtering out the destination nodes.
+On one extreme, one node is given all SStables with the scope ``all``; on the other extreme, all
+nodes are loading only their own SStables with the scope ``node``. In between, you can choose
+a subset of nodes to load only SStables that belong to the rack or DC.
+
+This option is only valid when using the ``--load-and-stream`` option.
+
+
+Skip cleanup
+---------------
+
+.. code::
+
+   nodetool refresh <my_keyspace> <my_table> [--skip-cleanup]
+
+When loading an SSTable, Scylla will cleanup it from keys that the node is not responsible for. To skip this step, use the `--skip-cleanup` option.
+See :ref:`nodetool cleanup <nodetool-cleanup-cmd>`.
+
+
+Skip reshape
+---------------
+
+.. code::
+
+   nodetool refresh <my_keyspace> <my_table> [--skip-reshape]
+
+When refreshing, the SSTables to load might be out of shape, Scylla will attempt to reshape them if that's the case. To skip this step, use the `--skip-reshape` option.

 .. include:: nodetool-index.rst
--- a/docs/operating-scylla/nodetool.rst
+++ b/docs/operating-scylla/nodetool.rst
@@ -14,9 +14,9 @@ Nodetool
   nodetool-commands/cleanup
   nodetool-commands/clearsnapshot
   nodetool-commands/cluster/index
+   nodetool-commands/compact
   nodetool-commands/compactionhistory
   nodetool-commands/compactionstats
-   nodetool-commands/compact
   nodetool-commands/decommission
   nodetool-commands/describecluster
   nodetool-commands/describering
@@ -25,13 +25,15 @@ Nodetool
   nodetool-commands/disablebinary
   nodetool-commands/disablegossip
   nodetool-commands/drain
-   nodetool-commands/enbleautocompaction   
+   nodetool-commands/enableautocompaction
   nodetool-commands/enablebackup
   nodetool-commands/enablebinary
   nodetool-commands/enablegossip
   nodetool-commands/flush
+   nodetool-commands/getcompactionthroughput
   nodetool-commands/getendpoints
   nodetool-commands/getsstables
+   nodetool-commands/getstreamthroughput
   nodetool-commands/gettraceprobability
   nodetool-commands/gossipinfo
   nodetool-commands/help
@@ -46,25 +48,23 @@ Nodetool
   nodetool-commands/restore
   nodetool-commands/ring
   nodetool-commands/scrub
-   nodetool-commands/settraceprobability
+   nodetool-commands/setcompactionthroughput
   nodetool-commands/setlogginglevel
+   nodetool-commands/setstreamthroughput
+   nodetool-commands/settraceprobability
   nodetool-commands/snapshot
   nodetool-commands/sstableinfo
+   nodetool-commands/status
   nodetool-commands/statusbackup
   nodetool-commands/statusbinary
   nodetool-commands/statusgossip
-   nodetool-commands/status
   Nodetool stop compaction <nodetool-commands/stop>
   nodetool-commands/tablestats
   nodetool-commands/tasks/index
   nodetool-commands/toppartitions
   nodetool-commands/upgradesstables
-   nodetool-commands/viewbuildstatus
   nodetool-commands/version
-   nodetool-commands/getcompactionthroughput
-   nodetool-commands/setcompactionthroughput
-   nodetool-commands/getstreamthroughput
-   nodetool-commands/setstreamthroughput
+   nodetool-commands/viewbuildstatus

 The ``nodetool`` utility provides a simple command-line interface to the following exposed operations and attributes.

@@ -87,9 +87,9 @@ Operations that are not listed below are currently not available.
 * :doc:`cleanup </operating-scylla/nodetool-commands/cleanup/>` - Triggers the immediate cleanup of keys no longer belonging to a node.
 * :doc:`clearsnapshot </operating-scylla/nodetool-commands/clearsnapshot/>` - This command removes snapshots.
 * :doc:`cluster <nodetool-commands/cluster/index>` - Run a cluster operation.
+* :doc:`compact </operating-scylla/nodetool-commands/compact/>`- Force a (major) compaction on one or more column families.
 * :doc:`compactionhistory </operating-scylla/nodetool-commands/compactionhistory/>` - Provides the history of compactions.
 * :doc:`compactionstats </operating-scylla/nodetool-commands/compactionstats/>`- Print statistics on compactions.
-* :doc:`compact </operating-scylla/nodetool-commands/compact/>`- Force a (major) compaction on one or more column families.
 * :doc:`decommission </operating-scylla/nodetool-commands/decommission/>` - Decommission the node.
 * :doc:`describecluster </operating-scylla/nodetool-commands/describecluster/>` - Print the name, snitch, partitioner and schema version of a cluster.
 * :doc:`describering </operating-scylla/nodetool-commands/describering/>` - :code:`<keyspace>`- Shows the partition ranges of a given keyspace.
@@ -98,14 +98,16 @@ Operations that are not listed below are currently not available.
 * :doc:`disablebinary </operating-scylla/nodetool-commands/disablebinary/>` - Disable native transport (binary protocol).
 * :doc:`disablegossip </operating-scylla/nodetool-commands/disablegossip/>` - Disable gossip (effectively marking the node down).
 * :doc:`drain </operating-scylla/nodetool-commands/drain/>` - Drain the node (stop accepting writes and flush all column families).
-* :doc:`enableautocompaction </operating-scylla/nodetool-commands/enbleautocompaction/>` - Enable automatic compaction of a keyspace or table.
+* :doc:`enableautocompaction </operating-scylla/nodetool-commands/enableautocompaction/>` - Enable automatic compaction of a keyspace or table.
 * :doc:`enablebackup </operating-scylla/nodetool-commands/enablebackup/>` - Enable incremental backup.
 * :doc:`enablebinary </operating-scylla/nodetool-commands/enablebinary/>` - Re-enable native transport (binary protocol).
 * :doc:`enablegossip </operating-scylla/nodetool-commands/enablegossip/>` - Re-enable gossip.
 * :doc:`flush </operating-scylla/nodetool-commands/flush/>` - Flush one or more column families.
+* :doc:`getcompactionthroughput </operating-scylla/nodetool-commands/getcompactionthroughput>` - Print the throughput cap for compaction in the system
 * :doc:`getendpoints <nodetool-commands/getendpoints/>` :code:`<keyspace>` :code:`<table>` :code:`<key>`- Print the end points that owns the key.
 * **getlogginglevels** - Get the runtime logging levels.
 * :doc:`getsstables </operating-scylla/nodetool-commands/getsstables>` - Print the sstable filenames that own the key.
+* :doc:`getstreamthroughput </operating-scylla/nodetool-commands/getstreamthroughput>` - Print the throughput cap for SSTables streaming in the system
 * :doc:`gettraceprobability </operating-scylla/nodetool-commands/gettraceprobability>` - Displays the current trace probability value. 0 is disabled 1 is enabled.
 * :doc:`gossipinfo </operating-scylla/nodetool-commands/gossipinfo/>` - Shows the gossip information for the cluster.
 * :doc:`help </operating-scylla/nodetool-commands/help/>` - Display list of available nodetool commands.
@@ -118,28 +120,26 @@ Operations that are not listed below are currently not available.
 * :doc:`refresh </operating-scylla/nodetool-commands/refresh/>`- Load newly placed SSTables to the system without restart
 * :doc:`removenode </operating-scylla/nodetool-commands/removenode/>`- Remove node with the provided ID
 * :doc:`repair <nodetool-commands/repair/>`  :code:`<keyspace>` :code:`<table>` - Repair one or more vnode tables.
-* :doc:`restore </operating-scylla/nodetool-commands/restore/>` - Load SSTables from a designated bucket in object store into a specified keyspace or table
 * :doc:`resetlocalschema </operating-scylla/nodetool-commands/resetlocalschema/>` - Reset the node's local schema.
+* :doc:`restore </operating-scylla/nodetool-commands/restore/>` - Load SSTables from a designated bucket in object store into a specified keyspace or table
 * :doc:`ring <nodetool-commands/ring/>` - The nodetool ring command display the token ring information.
 * :doc:`scrub </operating-scylla/nodetool-commands/scrub>` :code:`[-m mode] [--no-snapshot] <keyspace> [<table>...]` - Scrub the SSTable files in the specified keyspace or table(s)
+* :doc:`setcompactionthroughput </operating-scylla/nodetool-commands/setcompactionthroughput>` - Set the throughput cap for compaction in the system
 * :doc:`setlogginglevel</operating-scylla/nodetool-commands/setlogginglevel>` - sets the logging level threshold for ScyllaDB classes
+* :doc:`setstreamthroughput </operating-scylla/nodetool-commands/setstreamthroughput>` - Set the throughput cap for SSTables streaming in the system
 * :doc:`settraceprobability </operating-scylla/nodetool-commands/settraceprobability/>` ``<value>`` - Sets the probability for tracing a request. race probability value
 * :doc:`snapshot </operating-scylla/nodetool-commands/snapshot>` :code:`[-t tag] [-cf column_family] <keyspace>`  - Take a snapshot of specified keyspaces or a snapshot of the specified table.
 * :doc:`sstableinfo </operating-scylla/nodetool-commands/sstableinfo>` - Get information about sstables per keyspace/table.
+* :doc:`status </operating-scylla/nodetool-commands/status/>` - Print cluster information.
 * :doc:`statusbackup </operating-scylla/nodetool-commands/statusbackup/>` - Status of incremental backup.
 * :doc:`statusbinary </operating-scylla/nodetool-commands/statusbinary/>` - Status of native transport (binary protocol).
 * :doc:`statusgossip </operating-scylla/nodetool-commands/statusgossip/>` - Status of gossip.
-* :doc:`status </operating-scylla/nodetool-commands/status/>` - Print cluster information.
 * :doc:`stop </operating-scylla/nodetool-commands/stop/>` - Stop compaction operation.
 * **tablehistograms** see :doc:`cfhistograms <nodetool-commands/cfhistograms/>`
 * :doc:`tablestats </operating-scylla/nodetool-commands/tablestats/>` - Provides in-depth diagnostics regard table. 
 * :doc:`tasks </operating-scylla/nodetool-commands/tasks/index>` - Manage tasks manager tasks.
 * :doc:`toppartitions </operating-scylla/nodetool-commands/toppartitions/>` - Samples cluster writes and reads and reports the most active partitions in a specified table and time frame.
 * :doc:`upgradesstables </operating-scylla/nodetool-commands/upgradesstables>` - Upgrades each table that is not running the latest ScyllaDB version, by rewriting SSTables.
-* :doc:`viewbuildstatus </operating-scylla/nodetool-commands/viewbuildstatus/>` - Shows the progress of a materialized view build.
 * :doc:`version </operating-scylla/nodetool-commands/version>` - Print the DB version.
-* :doc:`getcompactionthroughput </operating-scylla/nodetool-commands/getcompactionthroughput>` - Print the throughput cap for compaction in the system
-* :doc:`setcompactionthroughput </operating-scylla/nodetool-commands/setcompactionthroughput>` - Set the throughput cap for compaction in the system
-* :doc:`getstreamthroughput </operating-scylla/nodetool-commands/getstreamthroughput>` - Print the throughput cap for SSTables streaming in the system
-* :doc:`setstreamthroughput </operating-scylla/nodetool-commands/setstreamthroughput>` - Set the throughput cap for SSTables streaming in the system
+* :doc:`viewbuildstatus </operating-scylla/nodetool-commands/viewbuildstatus/>` - Shows the progress of a materialized view build.

--- a/docs/operating-scylla/procedures/cluster-management/_common/membership-change-failures-note.rst
+++ b/docs/operating-scylla/procedures/cluster-management/_common/membership-change-failures-note.rst
@@ -1,10 +0,0 @@
-.. note::
-
-    This page only applies to clusters where consistent topology updates are not enabled.
-    Consistent topology updates are mandatory, so **this page serves troubleshooting purposes**.
-
-    The page does NOT apply if you:
-
-    * Created a cluster with ScyllaDB 6.0 or later (consistent topology updates are automatically enabled).
-    * `Manually enabled consistent topology updates <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_
-      after upgrading to 6.0 or before upgrading to 6.1 (required).
--- a/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
+++ b/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
@@ -1,166 +0,0 @@
-Handling Cluster Membership Change Failures
-*******************************************
-
-.. scylladb_include_flag:: membership-change-failures-note.rst
-
-A failure may happen in the middle of a cluster membership change (that is bootstrap, decommission, removenode, or replace), such as loss of power. If that happens, you should ensure that the cluster is brought back to a consistent state as soon as possible. Further membership changes might be impossible until you do so.
-
-For example, a node that crashed in the middle of decommission might leave the cluster in a state where it considers the node to still be a member, but the node itself will refuse to restart and communicate with the cluster. This particular case is very unlikely - it requires a specifically timed crash to happen, after the data streaming phase of decommission finishes but before the node commits that it left. But if it happens, you won't be able to bootstrap other nodes (they will try to contact the partially-decommissioned node and fail) until you remove the remains of the node that crashed.
-
---------------------------
-Handling a Failed Bootstrap
---------------------------
-
-If a failure happens when trying to bootstrap a new node to the cluster, you can try bootstrapping the node again by restarting it.
-
-If the failure persists or you decided that you don't want to bootstrap the node anymore, follow the instructions in the :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section to remove the remains of the bootstrapping node. You can then clear the node's data directories and attempt to bootstrap it again.
-
------------------------------
-Handling a Failed Decommission
------------------------------
-
-There are two cases.
-
-Most likely the failure happened during the data repair/streaming phase - before the node tried to leave the token ring. Look for a log message containing "leaving token ring" in the logs of the node that you tried to decommission. For example:
-
-.. code-block:: console
-
-    INFO  2023-03-14 13:08:38,323 [shard 0] storage_service - decommission[5b2e752e-964d-4f36-871f-254491f4e8cc]: leaving token ring
-
-If the message is **not** present, the failure happened before the node tried to leave the token ring. In that case you can simply restart the node and attempt to decommission it again.
-
-If the message is present, the node attempted to leave the token ring, but it might have left the cluster only partially before the failure. **Do not try to restart the node**. Instead, you must make sure that the node is dead and remove any leftovers using the :doc:`removenode operation </operating-scylla/nodetool-commands/removenode/>`. See :ref:`cleaning up after a failed membership change <cleaning-up-after-change>`. Trying to restart the node after such failure results in unpredictable behavior - it may restart normally, it may refuse to restart, or it may even try to rebootstrap.
-
-If you don't have access to the node's logs anymore, assume the second case (the node might have attempted to leave the token ring), **do not try to restart the node**, instead follow the :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section.
-
----------------------------
-Handling a Failed Removenode
----------------------------
-
-Simply retry the removenode operation.
-
-If you somehow lost the host ID of the node that you tried to remove, follow the instructions in :ref:`cleaning up after a failed membership change <cleaning-up-after-change>`.
-
--------------------------
-Handling a Failed Replace
--------------------------
-
-Replace is a special case of bootstrap, but the bootstrapping node tries to take the place of another dead node. You can retry a failed replace operation by restarting the replacing node.
-
-If the failure persists or you decided that you don't want to perform the replace anymore, follow the instructions in  :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section to remove the remains of the replacing node. You can then clear the node's data directories and attempt to replace again. Alternatively, you can remove the dead node which you initially tried to replace using :doc:`removenode </operating-scylla/nodetool-commands/removenode/>`, and perform a regular bootstrap.
-
-.. _cleaning-up-after-change:
-
--------------------------------------------
-Cleaning up after a Failed Membership Change
--------------------------------------------
-
-After a failed membership change, the cluster may contain remains of a node that tried to leave or join - other nodes may consider the node a member, possibly in a transitioning state. It is important to remove any such "ghost" members. Their presence may reduce the cluster's availability, performance, or prevent further membership changes.
-
-You need to determine the host IDs of any potential ghost members, then remove them using the :doc:`removenode operation </operating-scylla/nodetool-commands/removenode/>`. Note that after a failed replace, there may be two different host IDs that you'll want to find and run ``removenode`` on: the new replacing node and the old node that you tried to replace. (Or you can remove the new node only, then try to replace the old node again.)
-
-Step One: Determining Host IDs of Ghost Members
-===============================================
-
-* After a failed bootstrap, you need to determine the host ID of the node that tried to bootstrap, if it managed to generate a host ID (it might not have chosen the host ID yet if it failed very early in the procedure, in which case there's nothing to remove). Look for a message containing ``system_keyspace - Setting local host id to`` in the node's logs, which will contain the node's host ID. For example: ``system_keyspace - Setting local host id to f180b78b-6094-434d-8432-7327f4d4b38d``. If you don't have access to the node's logs, read the generic method below.
-* After a failed decommission, you need to determine the host ID of the node that tried to decommission. You can search the node's logs as in the failed bootstrap case (see above), or you can use the generic method below.
-* After a failed removenode, you need to determine the host ID of the node that you tried to remove. You should already have it, since executing a removenode requires the host ID in the first place. But if you lost it somehow, read the generic method below.
-* After a failed replace, you need to determine the host ID of the replacing node. Search the node's logs as in the failed bootstrap case (see above), or you can use the generic method below. You may also want to determine the host ID of the replaced node - either to attempt replacing it again after removing the remains of the previous replacing node, or to remove it using :doc:`nodetool removenode </operating-scylla/nodetool-commands/removenode/>`. You should already have the host ID of the replaced node if you used the ``replace_node_first_boot`` option to perform the replace.
-
-If you cannot determine the ghost members' host ID using the suggestions above, use the method described below.
-
-#. Make sure there are no ongoing membership changes.
-
-#. Execute the following CQL query on one of your nodes to retrieve the Raft group 0 ID:
-
-   .. code-block:: cql
-    
-    select value from system.scylla_local where key = 'raft_group0_id'
-
-   For example:
-
-   .. code-block:: cql
-    
-    cqlsh> select value from system.scylla_local where key = 'raft_group0_id';
-
-     value
-    --------------------------------------
-     607fef80-c276-11ed-a6f6-3075f294cc65
-
-#. Use the obtained Raft group 0 ID to query the set of all cluster members' host IDs (which includes the ghost members), by executing the following query:
-
-   .. code-block:: cql
-    
-    select server_id from system.raft_state where group_id = <group0_id>
-
-   replace ``<group0_id>`` with the group 0 ID that you obtained. For example:
-
-   .. code-block:: cql
-    
-    cqlsh> select server_id from system.raft_state where group_id = 607fef80-c276-11ed-a6f6-3075f294cc65;
-
-     server_id
-    --------------------------------------
-     26a9badc-6e96-4b86-a8df-5173e5ab47fe
-     7991e7f5-692e-45a0-8ae5-438be5bc7c4f
-     aff11c6d-fbe7-4395-b7ca-3912d7dba2c6
-
-#. Execute the following CQL query to obtain the host IDs of all token ring members:
-
-   .. code-block:: cql
-    
-    select host_id, up from system.cluster_status;
-
-   For example:
-
-   .. code-block:: cql
-    
-    cqlsh> select peer, host_id, up from system.cluster_status;
-
-     peer      | host_id                              | up
-    -----------+--------------------------------------+-------
-     127.0.0.3 |                                 null | False
-     127.0.0.1 | 26a9badc-6e96-4b86-a8df-5173e5ab47fe |  True
-     127.0.0.2 | 7991e7f5-692e-45a0-8ae5-438be5bc7c4f |  True
-
-   The output of this query is similar to the output of ``nodetool status``.
-
-   We included the ``up`` column to see which nodes are down and the ``peer`` column to see their IP addresses.
-
-   In this example, one of the nodes tried to decommission and crashed as soon as it left the token ring but before it left the Raft group. Its entry will show up in ``system.cluster_status`` queries with ``host_id = null``, like above, until the cluster is restarted.
-
-#. A host ID belongs to a ghost member if:
-
-   * It appears in the ``system.raft_state`` query but not in the ``system.cluster_status`` query,
-   * Or it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
-
-   In our example, the ghost member's host ID was ``aff11c6d-fbe7-4395-b7ca-3912d7dba2c6`` because it appeared in the ``system.raft_state`` query but not in the ``system.cluster_status`` query.
-
-   If you're unsure whether a given row in the ``system.cluster_status`` query corresponds to a node in your cluster, you can connect to each node in the cluster and execute ``select host_id from system.local`` (or search the node's logs) to obtain that node's host ID, collecting the host IDs of all nodes in your cluster. Then check if each host ID from the ``system.cluster_status`` query appears in your collected set; if not, it's a ghost member.
-
-   A good rule of thumb is to look at the members marked as down (``up = False`` in ``system.cluster_status``) - ghost members are eventually marked as down by the remaining members of the cluster. But remember that a real member might also be marked as down if it was shutdown or partitioned away from the rest of the cluster. If in doubt, connect to each node and collect their host IDs, as described in the previous paragraph.
-
-In some cases, even after a failed topology change, there may be no ghost members left - for example, if a bootstrapping node crashed very early in the procedure or a decommissioning node crashed after it committed the membership change but before it finalized its own shutdown steps.
-
-If any ghost members are present, proceed to the next step.
-
-Step Two: Removing the Ghost Members
-====================================
-
-Given the host IDs of ghost members, you can remove them using ``removenode``; follow the :doc:`documentation for removenode operation </operating-scylla/nodetool-commands/removenode/>`.
-
-If you're executing ``removenode`` too quickly after a failed membership change, an error similar to the following might pop up:
-
-.. code-block:: console
-
-    nodetool: ScyllaDB API server HTTP POST to URL '/storage_service/remove_node' failed: seastar::rpc::remote_verb_error (node_ops_cmd_check: Node 127.0.0.2 rejected node_ops_cmd=removenode_abort from node=127.0.0.1 with ops_uuid=0ba0a5ab-efbd-4801-a31c-034b5f55487c, pending_node_ops={b47523f2-de6a-4c38-8490-39127dba6b6a}, pending node ops is in progress)
-
-In that case simply wait for 2 minutes before trying ``removenode`` again.
-
-If ``removenode`` returns an error like:
-
-.. code-block:: console
-
-    nodetool: ScyllaDB API server HTTP POST to URL '/storage_service/remove_node' failed: std::runtime_error (removenode[12e7e05b-d1ae-4978-b6a6-de0066aa80d8]: Host ID 42405b3b-487e-4759-8590-ddb9bdcebdc5 not found in the cluster)
-
-and you're sure that you're providing the correct Host ID, it means that the member was already removed and you don't have to clean up after it.
--- a/docs/operating-scylla/procedures/cluster-management/index.rst
+++ b/docs/operating-scylla/procedures/cluster-management/index.rst
@@ -24,7 +24,6 @@ Cluster Management Procedures
   Update Topology Strategy From Simple to Network <update-topology-strategy-from-simple-to-network>
   Safely Shutdown Your Cluster <safe-shutdown>
   Safely Restart Your Cluster <safe-start>
-   Handling Membership Change Failures <handling-membership-change-failures>
   repair-based-node-operation
   Prevent Quorum Loss in Symmetrical Multi-DC Clusters <arbiter-dc>

@@ -80,8 +79,6 @@ Cluster Management Procedures

  * :doc:`Add a Decommissioned Node Back to a ScyllaDB Cluster </operating-scylla/procedures/cluster-management/revoke-decommission/>`

-  * :doc:`Handling Membership Change Failures </operating-scylla/procedures/cluster-management/handling-membership-change-failures>`
-
  * :ref:`Add Bigger Nodes to a Cluster <add-bigger-nodes-to-a-cluster>`

  * :doc:`Repair Based Node Operations (RBNO) </operating-scylla/procedures/cluster-management/repair-based-node-operation>`
--- a/docs/reference/limits.rst
+++ b/docs/reference/limits.rst
@@ -54,10 +54,8 @@ CQL Limits
       Hundreds of kilobytes (good latency) or megabytes (mediocre latency)
   * - Key length
     - 65533
-   * - Table / CF name length
-     - 48 characters
-   * - Keyspace name length
-     - 48 characters
+   * - Keyspace / Table / View / Index name length
+     - 192 characters
   * - Query parameters in a query
     - 65535 (2^16-1)
   * - Statements in a batch
--- a/docs/troubleshooting/_common/enable-consistent-topology.rst
+++ b/docs/troubleshooting/_common/enable-consistent-topology.rst
@@ -1 +0,0 @@
-Perform `the procedure for enabling consistent topology changes <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_.
--- a/docs/troubleshooting/_common/enabling-consistent-topology-failure.rst
+++ b/docs/troubleshooting/_common/enabling-consistent-topology-failure.rst
@@ -1,3 +0,0 @@
-:ref:`The Raft upgrade procedure <verify-raft-procedure>`
-or `the procedure for enabling consistent topology changes <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_
-got stuck because one of the nodes failed in the middle of the procedure and is irrecoverable.
--- a/docs/troubleshooting/handling-node-failures.rst
+++ b/docs/troubleshooting/handling-node-failures.rst
@@ -67,29 +67,21 @@ Examples
 Manual Recovery Procedure
 ===========================

-You can follow the manual recovery procedure when:
+.. note::

-* The majority of nodes (for example, 2 out of 3) failed and are irrecoverable.
-* .. scylladb_include_flag:: enabling-consistent-topology-failure.rst
+   This recovery procedure assumes that consistent topology changes are enabled for your cluster, which is mandatory in
+   versions 2025.2 and later. If you failed to enable consistent topology changes during the upgrade to 2025.2, you need
+   to follow the `previous recovery procedure <https://docs.scylladb.com/manual/branch-2025.1/troubleshooting/handling-node-failures.html#manual-recovery-procedure>`_.

-.. warning::
+   See :ref:`Verifying that consistent topology changes are enabled <verifying-consistent-topology-changes-enabled>`.

-   Perform the manual recovery procedure **only** if you're dealing with 
-   **irrecoverable** nodes. If possible, restart your nodes, and use the manual 
-   recovery procedure as a last resort.
+You can follow the manual recovery procedure when the majority of nodes (for example, 2 out of 3) failed and are irrecoverable.

-.. warning::
-
-  The manual recovery procedure is not supported :doc:`if tablets are enabled on any of your keyspaces </architecture/tablets/>`. 
-  In such a case, you need to :doc:`restore from backup </operating-scylla/procedures/backup-restore/restore>`. 
-
-During the manual recovery procedure you'll enter a special ``RECOVERY`` mode, remove 
-all faulty nodes (using the standard :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`), 
-delete the internal Raft data, and restart the cluster. This will cause the cluster to 
-perform the Raft upgrade procedure again, initializing the Raft algorithm from scratch.
-
-The manual recovery procedure is applicable both to clusters that were not running Raft 
-in the past and then had Raft enabled, and to clusters that were bootstrapped using Raft.
+During the manual recovery procedure you'll restart live nodes in a special recovery mode, which will cause the
+cluster to initialize the Raft algorithm from scratch. However, this time, faulty nodes will not participate in the
+algorithm. Then, you will replace all faulty nodes (using the standard
+:doc:`node replacement procedure </operating-scylla/procedures/cluster-management/replace-dead-node/>`). Finally, you
+will leave the recovery mode and remove the obsolete internal Raft data.

 **Prerequisites**

@@ -102,53 +94,95 @@ in the past and then had Raft enabled, and to clusters that were bootstrapped us
  to life and communicate with the rest of the cluster, setup firewall rules or otherwise 
  isolate your alive nodes to reject any communication attempts from these dead nodes.

-* Prepare your service for downtime before proceeding.
-  Entering ``RECOVERY`` mode requires a node restart. Restarting an additional node while 
-  some nodes are already dead may lead to unavailability of data queries (assuming that 
-  you haven't lost it already). For example, if you're using the standard RF=3, 
-  CL=QUORUM setup, and you're recovering from a stuck upgrade procedure because one 
-  of your nodes is dead, restarting another node will cause temporary data query 
-  unavailability (until the node finishes restarting). 
+* Ensure all live nodes are in the normal state using
+  :doc:`nodetool status </operating-scylla/nodetool-commands/status>`. If there is a node
+  that is joining or leaving, it cannot be recovered. You must permanently stop it. After
+  performing the recovery procedure, use
+  :doc:`nodetool status </operating-scylla/nodetool-commands/status>` ony any other node.
+  If the stopped node appears in the output, it means that other nodes still consider it
+  a member of the cluster, and you should remove it with the
+  :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`.
+
+* Check if the cluster lost data. If the number of dead nodes is equal or larger than your
+  keyspaces RF, then some of the data is lost, and you need to retrieve it from backup. After
+  completing the manual recovery procedure
+  :doc:`restore the data from backup </operating-scylla/procedures/backup-restore/restore/>`.
+
+* Decide whether to shut down your service for the manual recovery procedure. ScyllaDB
+  serves data queries during the procedure, however, you may not want to rely on it if:
+
+  * you lost some data, or
+
+  * restarting a single node could lead to unavailability of data queries (the procedure involves
+    a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart>`). For
+    example, if you are using the standard RF=3, CL=QUORUM setup, you have two datacenters, all
+    nodes in one of the datacenters are dead and one node in the other datacenter is dead,
+    restarting another node in the other datacenter will cause temporary data query
+    unavailability (until the node finishes restarting).

 **Procedure**

-#. Perform the following query on **every alive node** in the cluster, using e.g. ``cqlsh``:
+#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of your live nodes.
+
+#. Find the group 0 ID by performing the following query on any live node, using e.g. ``cqlsh``:

   .. code-block:: cql

-        cqlsh> UPDATE system.scylla_local SET value = 'recovery' WHERE key = 'group0_upgrade_state';
+        cqlsh> SELECT value FROM system.scylla_local WHERE key = 'raft_group0_id';

-#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of your alive nodes.
+   The group 0 ID is needed in the following steps.

-#. Verify that all the nodes have entered ``RECOVERY`` mode when restarting; look for one of the following messages in their logs:
+#. Find ``commit_idx`` of all live nodes by performing the following query on **every live node**:
+
+   .. code-block:: cql
+
+        cqlsh> SELECT commit_idx FROM system.raft WHERE group_id = <group 0 ID>;
+
+   Choose a node with the largest ``commit_idx``. If there are multiple such nodes, choose any of them.
+   The chosen node will be the *recovery leader*.
+
+#. Perform the following queries on **every live node**:
+
+   .. code-block:: cql
+
+        cqlsh> TRUNCATE TABLE system.discovery;
+        cqlsh> DELETE value FROM system.scylla_local WHERE key = 'raft_group0_id';
+
+#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of all live nodes,
+   but:
+
+   * **restart the recovery leader first**,
+
+   * before restarting each node, add the ``recovery_leader`` property to its ``scylla.yaml`` file and set it to the
+     host ID of the recovery leader,
+
+   * after restarting each node, make sure it participated in Raft recovery; look for one of the following messages
+     in its logs:

    .. code-block:: console

-        group0_client - RECOVERY mode.
-        raft_group0 - setup_group0: Raft RECOVERY mode, skipping group 0 setup.
-        raft_group0_upgrade - RECOVERY mode. Not attempting upgrade.
+        storage_service - Performing Raft-based recovery procedure with recovery leader <host ID of the recovery leader>/<IP address of the recovery leader>
+        storage_service - Raft-based recovery procedure - found group 0 with ID <ID of the new group 0; different from the one used in other steps>

-#. Remove all your dead nodes using the :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`.
+   After completing this step, Raft should be fully functional.

-#. Remove existing Raft cluster data by performing the following queries on **every alive node** in the cluster, using e.g. ``cqlsh``:
+#. Replace all dead nodes in the cluster using the
+   :doc:`node replacement procedure </operating-scylla/procedures/cluster-management/replace-dead-node/>`.
+
+   .. note::
+
+        Removing some of the dead nodes with the
+        :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>` is also possible,
+        but it may require decreasing RF of your keyspaces. With tablets enabled, ``nodetool removenode`` is rejected
+        if there are not enough nodes to satisfy RF of any tablet keyspace in the node's datacenter.
+
+#. Remove the ``recovery_leader`` property from the ``scylla.yaml`` file on all nodes. Send the ``SIGHUP`` signal to all
+   ScyllaDB processes to ensure the change is applied.
+
+#. Perform the following queries on **every live node**:

   .. code-block:: cql

-        cqlsh> TRUNCATE TABLE system.topology;
-        cqlsh> TRUNCATE TABLE system.discovery;
-        cqlsh> TRUNCATE TABLE system.group0_history;
-        cqlsh> DELETE value FROM system.scylla_local WHERE key = 'raft_group0_id';
-
-#. Make sure that schema is synchronized in the cluster by executing :doc:`nodetool describecluster </operating-scylla/nodetool-commands/describecluster>` on each node and verifying that the schema version is the same on all nodes.
-
-#. We can now leave ``RECOVERY`` mode. On **every alive node**, perform the following query:
-
-   .. code-block:: cql
-
-        cqlsh> DELETE FROM system.scylla_local WHERE key = 'group0_upgrade_state';
-
-#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of your alive nodes.
-
-#. The Raft upgrade procedure will start anew. :ref:`Verify <verify-raft-procedure>` that it finishes successfully.
-
-#. .. scylladb_include_flag:: enable-consistent-topology.rst
+        cqlsh> DELETE FROM system.raft WHERE group_id = <group 0 ID>;
+        cqlsh> DELETE FROM system.raft_snapshots WHERE group_id = <group 0 ID>;
+        cqlsh> DELETE FROM system.raft_snapshot_config WHERE group_id = <group 0 ID>;
--- a/docs/upgrade/upgrade-guides/index.rst
+++ b/docs/upgrade/upgrade-guides/index.rst
@@ -3,9 +3,10 @@ Upgrade ScyllaDB
 ====================

 .. toctree::
+   :maxdepth: 1
   
-   ScyllaDB Open Source 6.2 to ScyllaDB 2025.1 <upgrade-guide-from-6.2-to-2025.1/index>
-   ScyllaDB Enterprise 2024.x to ScyllaDB 2025.1 <upgrade-guide-from-2024.x-to-2025.1/index>
+   ScyllaDB 2025.1 to ScyllaDB 2025.2 <upgrade-guide-from-2025.1-to-2025.2/index>
+   ScyllaDB 2025.2 Patch Upgrades <upgrade-guide-from-2025.2.x.y-to-2025.2.x.z>
   ScyllaDB Image <ami-upgrade>


--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.rst
@@ -1,129 +0,0 @@
-=====================================
-Enable Consistent Topology Updates
-=====================================
-
-.. note::
-
-    The following procedure only applies if:
-
-    * You're upgrading **from ScyllaDB Enterprise 2024.1** to ScyllaDB 2025.1.
-    * You previously upgraded from 2024.1 to 2024.2 without enabling consistent
-      topology updates (see the `2024.2 upgrade guide <https://enterprise.docs.scylladb.com/branch-2024.2/upgrade/upgrade-enterprise/upgrade-guide-from-2024.1-to-2024.2/enable-consistent-topology.html>`_
-      for reference). 
-
-Introduction
-============
-
-ScyllaDB 2025.1 has :ref:`consistent topology changes based on Raft <raft-topology-changes>`.
-Clusters created with version 2025.1 use consistent topology changes right
-from the start. However, consistent topology changes are *not* automatically
-enabled in clusters upgraded from version 2024.1. In such clusters, you need to
-enable consistent topology changes manually by following the procedure described in this article.
-
-Before you start, you **must** check that the cluster meets the prerequisites
-and ensure that some administrative procedures will not be run while
-the procedure is in progress.
-
-.. _enable-raft-topology-2025.1-prerequisites:
-
-Prerequisites
-=============
-
-* Make sure that all nodes in the cluster are upgraded to ScyllaDB 2025.1.
-* Verify that :ref:`schema on raft is enabled <schema-on-raft-enabled>`.
-* Make sure that all nodes enabled ``SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES`` cluster feature.
-  One way to verify it is to look for the following message in the log:
-
-  .. code-block:: none
-
-    features - Feature SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES is enabled
-
-  Alternatively, it can be verified programmatically by checking whether the ``value``
-  column under the ``enabled_features`` key contains the name of the feature in
-  the ``system.scylla_local`` table. One way to do it is with the following bash script:
-
-  .. code-block:: bash
-
-    until cqlsh -e "select value from system.scylla_local where key = 'enabled_features'" | grep "SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES"
-    do
-        echo "Upgrade didn't finish yet on the local node, waiting 10 seconds before checking again..."
-        sleep 10
-    done
-    echo "Upgrade completed on the local node"
-
-* Make sure that all nodes are alive for the duration of the procedure.
-
-.. _enable-raft-topology-2025.1-forbidden-operations:
-
-Administrative operations that must not be running during the procedure
-=========================================================================
-
-Make sure that administrative operations will not be running while
-the procedure is in progress. In particular, you must abstain from:
-
-* :doc:`Cluster management procedures </operating-scylla/procedures/cluster-management/index>`
-  (adding, replacing, removing, decommissioning nodes, etc.).
-* Running :doc:`nodetool repair </operating-scylla/nodetool-commands/repair>`.
-* Running :doc:`nodetool checkAndRepairCdcStreams </operating-scylla/nodetool-commands/checkandrepaircdcstreams>`.
-* Any modifications of :doc:`authentication </operating-scylla/security/authentication>` and :doc:`authorization </operating-scylla/security/enable-authorization>` settings.
-* Any change of authorization via :doc:`CQL API </operating-scylla/security/authorization>`.
-* Schema changes.
-
-Running the procedure
-=====================
-
-.. warning::
-
-  Before proceeding, make sure that all the :ref:`prerequisites <enable-raft-topology-2025.1-prerequisites>` are met
-  and no :ref:`forbidden administrative operations <enable-raft-topology-2025.1-forbidden-operations>` will run
-  during the procedure. Failing to do so may put the cluster in an inconsistent state.
-
-#. Issue a POST HTTP request to the ``/storage_service/raft_topology/upgrade``
-   endpoint to any of the nodes in the cluster.
-   For example, you can do it with ``curl``:
-
-   .. code-block:: bash
-
-	   curl -X POST "http://127.0.0.1:10000/storage_service/raft_topology/upgrade"
-
-#. Wait until all nodes report that the procedure is complete. You can check
-   whether a node finished the procedure in one of two ways:
-
-   * By sending a HTTP ``GET`` request on the ``/storage_service/raft_topology/upgrade``
-     endpoint. For example, you can do it with ``curl``:
-
-     .. code-block:: bash
-      
-      curl -X GET "http://127.0.0.1:10000/storage_service/raft_topology/upgrade"
-
-     It will return a JSON string that will be equal to ``done`` after the procedure is complete on that node.
-
-   * By querying the ``upgrade_state`` column in the ``system.topology`` table.
-     You can use ``cqlsh`` to get the value of the column:
-
-     .. code-block:: bash
-      
-      cqlsh -e "select upgrade_state from system.topology"
-
-     The ``upgrade_state`` column should be set to ``done`` after the procedure
-     is complete on that node:
-
-After the procedure is complete on all nodes, wait at least one minute before
-issuing any topology changes in order to avoid data loss from writes that were
-started before the procedure.
-
-What if the procedure gets stuck?
-===================================
-
-If the procedure gets stuck at some point, first check the status of your cluster:
-
- If there are some nodes that are not alive, try to restart them.
- If all nodes are alive, ensure that the network is healthy and every node can reach all other nodes.
- If all nodes are alive and the network is healthy, perform
-  a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of the cluster.
-
-If none of the above solves the issue, perform :ref:`the Raft recovery procedure <recovery-procedure>`.
-During recovery, the cluster will switch back to the gossip-based topology management mechanism.
-
-After exiting recovery, you should retry enabling consistent topology updates using
-the procedure described in this document.
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/index.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/index.rst
@@ -1,17 +0,0 @@
-==========================================================
-Upgrade - ScyllaDB Enterprise 2024.x to ScyllaDB 2025.1
-==========================================================
-
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   ScyllaDB <upgrade-guide-from-2024.x-to-2025.1>
-   Enable Consistent Topology Updates <enable-consistent-topology>
-   Metrics <metric-update-2024.x-to-2025.1>
-
-* :doc:`Upgrade from ScyllaDB Enterprise 2024.x.y to ScyllaDB 2025.1.y <upgrade-guide-from-2024.x-to-2025.1>`
-* :doc:`Enable Consistent Topology Updates <enable-consistent-topology>`
-* :doc:`Metrics Update Between 2024.x and 2025.1 <metric-update-2024.x-to-2025.1>`
-
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/metric-update-2024.x-to-2025.1.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/metric-update-2024.x-to-2025.1.rst
@@ -1,74 +0,0 @@
-.. |SRC_VERSION| replace:: 2024.x
-.. |NEW_VERSION| replace:: 2025.1
-
-=======================================================================================
-Metrics Update Between |SRC_VERSION| and |NEW_VERSION|
-=======================================================================================
-
-ScyllaDB Enterprise |NEW_VERSION| Dashboards are available as part of the latest |mon_root|.
-
-
-New Metrics
------------
-
-The following metrics are new in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
-
-.. list-table::
-   :widths: 25 150
-   :header-rows: 1
-
-   * - Metric
-     - Description
-   * - scylla_alternator_batch_item_count
-     - The total number of items processed across all batches.
-   * - scylla_hints_for_views_manager_sent_bytes_total
-     - The total size of the sent hints (in bytes).
-   * - scylla_hints_manager_sent_bytes_total
-     - The total size of the sent hints (in bytes).
-   * - scylla_io_queue_activations
-     - The number of times the class was woken up from idle.
-   * - scylla_raft_apply_index
-     - The applied index.
-   * - scylla_raft_commit_index
-     - The commit index.
-   * - scylla_raft_log_last_index
-     - The index of the last log entry.
-   * - scylla_raft_log_last_term
-     - The term of the last log entry.
-   * - scylla_raft_snapshot_last_index
-     - The index of the snapshot.
-   * - scylla_raft_snapshot_last_term
-     - The term of the snapshot.
-   * - scylla_raft_state
-     - The current state: 0 - follower, 1 - candidate, 2 - leader
-   * - scylla_rpc_client_delay_samples
-     - The total number of delay samples.
-   * - scylla_rpc_client_delay_total
-     - The total delay in seconds.
-   * - scylla_storage_proxy_replica_received_hints_bytes_total
-     - The total size of hints and MV hints received by this node.
-   * - scylla_storage_proxy_replica_received_hints_total
-     - The number of hints and MV hints received by this node.
-
-Renamed Metrics
------------------
-
-The following metrics are renamed in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
-
-.. list-table::
-   :widths: 25 150
-   :header-rows: 1
-
-   * - 2024.2
-     - 2025.1
-   * - scylla_hints_for_views_manager_sent
-     - scylla_hints_for_views_manager_sent_total
-   * - scylla_hints_manager_sent
-     - scylla_hints_manager_sent_total
-   * - scylla_forward_service_requests_dispatched_to_other_nodes
-     - scylla_mapreduce_service_requests_dispatched_to_other_nodes
-   * - scylla_forward_service_requests_dispatched_to_own_shards
-     - scylla_mapreduce_service_requests_dispatched_to_own_shards
-   * - scylla_forward_service_requests_executed
-     - scylla_mapreduce_service_requests_executed
-  
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/upgrade-guide-from-2024.x-to-2025.1.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/upgrade-guide-from-2024.x-to-2025.1.rst
@@ -1,395 +0,0 @@
-.. |SCYLLA_NAME| replace:: ScyllaDB
-
-.. |SRC_VERSION| replace:: 2024.x
-.. |NEW_VERSION| replace:: 2025.1
-
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: ./#rollback-procedure
-
-.. |SCYLLA_METRICS| replace:: ScyllaDB Metrics Update - ScyllaDB 2024.x to 2025.1
-.. _SCYLLA_METRICS: ../metric-update-2024.x-to-2025.1
-
-=======================================================================================
-Upgrade from |SCYLLA_NAME| Enterprise |SRC_VERSION| to |SCYLLA_NAME| |NEW_VERSION|
-=======================================================================================
-
-This document is a step-by-step procedure for upgrading from |SCYLLA_NAME| |SRC_VERSION| 
-to |NEW_VERSION|, and rollback to version |SRC_VERSION| if required.
-
-This guide covers upgrading ScyllaDB on Red Hat Enterprise Linux (RHEL) CentOS, Debian, 
-and Ubuntu. See :doc:`OS Support by Platform and Version </getting-started/os-support>` 
-for information about supported versions.
-
-This guide also applies when you're upgrading ScyllaDB official image on EC2, 
-GCP, or Azure.
-
-
-Before You Upgrade ScyllaDB
-================================
-
-**Upgrade Your Driver**
-
-If you're using a :doc:`ScyllaDB driver </using-scylla/drivers/cql-drivers/index>`, 
-upgrade the driver before you upgrade ScyllaDB. The latest two versions of each driver 
-are supported.
-
-**Upgrade ScyllaDB Monitoring Stack**
-
-If you're using the ScyllaDB Monitoring Stack, verify that your Monitoring Stack 
-version supports the ScyllaDB version to which you want to upgrade. See 
-`ScyllaDB Monitoring Stack Support Matrix <https://monitoring.docs.scylladb.com/stable/reference/matrix.html>`_.
-  
-We recommend upgrading the Monitoring Stack to the latest version.
-
-**Check Feature Updates**
-
-See the ScyllaDB Release Notes for the latest updates. The Release Notes are published 
-at the `ScyllaDB Community Forum <https://forum.scylladb.com/>`_.
-
-Upgrade Procedure
-=================
-
-A ScyllaDB upgrade is a rolling procedure that does **not** require full cluster shutdown.
-For each of the nodes in the cluster, you will:
-
-* Check that the cluster's schema is synchronized
-* Drain the node and backup the data
-* Backup the configuration file
-* Stop ScyllaDB
-* Download and install new ScyllaDB packages
-* Start ScyllaDB
-* Validate that the upgrade was successful
-
-
-.. caution:: 
-
-   Apply the procedure **serially** on each node. Do not move to the next node before 
-   validating that the node you upgraded is up and running the new version.
-
-**During** the rolling upgrade, it is highly recommended:
-
-* Not to use the new |NEW_VERSION| features.
-* Not to run administration functions, like repairs, refresh, rebuild, or add or remove 
-  nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/>`_ for suspending 
-  ScyllaDB Manager's scheduled or running repairs.
-* Not to apply schema changes.
-
-**After** the upgrade, you may need to enable consistent topology updates.
-See :ref:`After Upgrading Every Node <upgrade-2024.x-2025.1-after-upgrading-nodes>` for details.
-
-
-Upgrade Steps
-=============
-
-Check the cluster schema
-------------------------
-Make sure that all nodes have the schema synchronized before upgrade. The upgrade 
-procedure will fail if there is a schema disagreement between nodes.
-
-.. code:: sh
-
-   nodetool describecluster
-
-Backup the data
-----------------------------------
-
-Before any major procedure, like an upgrade, it is recommended to backup all the data 
-to an external device. 
-We recommend using `ScyllaDB Manager <https://manager.docs.scylladb.com/stable/backup/index.html>`_
-to create backups.
-
-Alternatively, you can use the ``nodetool snapshot`` command. For **each** node in the cluster, run 
-the following command:
-
-.. code:: sh
-
-   nodetool drain
-   nodetool snapshot
-
-Take note of the directory name that nodetool gives you, and copy all the directories 
-having that name under ``/var/lib/scylla`` to a backup device.
-
-When the upgrade is completed on all nodes, remove the snapshot with the 
-``nodetool clearsnapshot -t <snapshot>`` command to prevent running out of space.
-
-Backup the configuration file
------------------------------
-
-Back up the ``scylla.yaml`` configuration file and the ScyllaDB packages
-in case you need to rollback the upgrade.
-
-.. tabs::
-
-   .. group-tab:: Debian/Ubuntu
-
-      .. code:: sh
-         
-         sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup
-         sudo cp /etc/apt/sources.list.d/scylla.list ~/scylla.list-backup
-
-   .. group-tab:: RHEL/CentOS
-
-      .. code:: sh
-         
-         sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup
-         sudo cp /etc/yum.repos.d/scylla.repo ~/scylla.repo-backup
-
-Gracefully stop the node
------------------------
-
-.. code:: sh
-
-   sudo service scylla-server stop
-
-Download and install the new release
------------------------------------
-
-Before upgrading, check what version you are running now using ``scylla --version``. 
-You should use the same version as this version in case you want to |ROLLBACK|_ 
-the upgrade. 
-
-.. tabs::
-
-   .. group-tab:: Debian/Ubuntu
-
-        #. Update the ScyllaDB deb repo to |NEW_VERSION|.
-
-            .. code-block:: console
-
-               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/scylla-2025.1.list
-
-        #. Install the new ScyllaDB version:
-
-            .. code-block:: console
-
-               sudo apt-get clean all
-               sudo apt-get update
-               sudo apt-get dist-upgrade scylla
-
-        Answer ‘y’ to the first two questions.
-
-   .. group-tab:: RHEL/CentOS
-
-        #. Update the ScyllaDB rpm repo to |NEW_VERSION|.
-
-            .. code-block:: console
-
-               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/scylla-2025.1.repo
-
-        #. Install the new ScyllaDB version:
-
-            .. code:: sh
-
-               sudo yum clean all
-               sudo yum update scylla\* -y
-
-   .. group-tab:: EC2/GCP/Azure Ubuntu Image
-      
-      If you’re using the ScyllaDB official image (recommended), see
-      the **Debian/Ubuntu** tab for upgrade instructions. If you’re using your
-      own image and have installed ScyllaDB packages for Ubuntu or Debian,
-      you need to apply an extended upgrade procedure:
-      
-      #. Update the ScyllaDB deb repo (see the **Debian/Ubuntu** tab).
-      #. Install the new ScyllaDB version with the additional 
-         ``scylla-machine-image`` package:
-
-        .. code::
-         
-         sudo apt-get clean all
-         sudo apt-get update
-         sudo apt-get dist-upgrade scylla
-         sudo apt-get dist-upgrade scylla-machine-image
-
-      #. Run ``scylla_setup`` without running ``io_setup``.
-      #. Run ``sudo /opt/scylladb/scylla-machine-image/scylla_cloud_io_setup``.
-
-If you need JMX server, see
-:doc:`Install scylla-jmx Package </getting-started/installation-common/install-jmx>`
-and get new version. 
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo service scylla-server start
-
-Validate
--------
-#. Check cluster status with ``nodetool status`` and make sure **all** nodes, including 
-   the one you just upgraded, are in ``UN`` status.
-#. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` 
-   to check the ScyllaDB version. Validate that the version matches the one you upgraded to.
-#. Check scylla-server log (using ``journalctl _COMM=scylla``) and ``/var/log/syslog`` 
-   to validate there are no new errors in the log.
-#. Check again after two minutes to validate that no new issues are introduced.
-
-Once you are sure the node upgrade was successful, move to the next node in the cluster.
-
-.. _upgrade-2024.x-2025.1-after-upgrading-nodes:
-
-After Upgrading Every Node
-===============================
-
-This step applies if:
-
-* You're upgrading from ScyllaDB Enterprise **2024.1** to ScyllaDB 2025.1.
-* You previously upgraded from 2024.1 to 2024.2 without enabling consistent
-  topology updates (see the `2024.2 upgrade guide <https://enterprise.docs.scylladb.com/branch-2024.2/upgrade/upgrade-enterprise/upgrade-guide-from-2024.1-to-2024.2/enable-consistent-topology.html>`_
-  for reference).
-
-After you have upgraded every node, you must enable the Raft-based consistent
-topology updates feature. See 
-:doc:`Enable Consistent Topology Updates </upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology>`
-for instructions.
-
-Rollback Procedure
-==================
-
-.. warning::
-
-   The rollback procedure can only be applied if some nodes have **not** been upgraded 
-   to |NEW_VERSION| yet. As soon as the last node in the rolling upgrade procedure is 
-   started with |NEW_VERSION|, rollback becomes impossible. At that point, the only way 
-   to restore a cluster to |SRC_VERSION| is by restoring it from backup.
-
-The following procedure describes a rollback from |SCYLLA_NAME| |NEW_VERSION|.x to 
-|SRC_VERSION|.y. Apply this procedure if an upgrade from |SRC_VERSION| to |NEW_VERSION| 
-failed before completing on all nodes.
-
-* Use this procedure only for nodes you upgraded to |NEW_VERSION|.
-* Execute the commands one node at a time, moving to the next node
-  only after the rollback procedure is completed successfully.
-
-ScyllaDB rollback is a rolling procedure that does **not** require a full cluster shutdown.
-For each of the nodes you rollback to |SRC_VERSION|, you will:
-
-* Drain the node and stop ScyllaDB
-* Retrieve the old ScyllaDB packages
-* Restore the configuration file
-* Reload systemd configuration
-* Restart ScyllaDB
-* Validate the rollback success
-
-Apply the procedure **serially** on each node. Do not move to the next node
-before validating that the rollback was successful and the node is up and
-running the old version.
-
-Rollback Steps
-==============
-
-Drain and gracefully stop the node
----------------------------------
-
-.. code:: sh
-
-   nodetool drain
-   sudo service scylla-server stop
-
-Download and install the old release
------------------------------------
-
-.. tabs::
-
-   .. group-tab:: Debian/Ubuntu
-
-        #. Remove the old repo file.
-
-            .. code:: sh
-
-               sudo rm -rf /etc/apt/sources.list.d/scylla.list
-
-        
-        #. Restore the |SRC_VERSION| packages backed up during the upgrade.
-
-            .. code:: sh
-
-               sudo cp ~/scylla.list-backup /etc/apt/sources.list.d/scylla.list
-               sudo chown root.root /etc/apt/sources.list.d/scylla.list
-               sudo chmod 644 /etc/apt/sources.list.d/scylla.list
-
-        #. Install:
-
-            .. code-block::
-
-               sudo apt-get update
-               sudo apt-get remove scylla\* -y
-               sudo apt-get install scylla-enterprise
-
-        Answer ‘y’ to the first two questions.
-
-   .. group-tab:: RHEL/CentOS
-
-        #. Remove the old repo file.
-
-            .. code:: sh
-
-               sudo rm -rf /etc/yum.repos.d/scylla.repo
-
-        #. Restore the |SRC_VERSION| packages backed up during the upgrade procedure.
-
-            .. code:: sh
-
-               sudo cp ~/scylla.repo-backup /etc/yum.repos.d/scylla.repo
-               sudo chown root.root /etc/yum.repos.d/scylla.repo
-               sudo chmod 644 /etc/yum.repos.d/scylla.repo
-
-        #. Install:
-
-            .. code:: console
-
-               sudo yum clean all
-               sudo yum remove scylla\*
-               sudo yum install scylla-enterprise
-
-   .. group-tab:: EC2/GCP/Azure Ubuntu Image
-
-      If you’re using the ScyllaDB official image (recommended), see the **Debian/Ubuntu** 
-      tab for upgrade instructions.
-
-      If you’re using your own image and installed ScyllaDB packages for Ubuntu or Debian, 
-      you need to additionally restore the ``scylla-machine-image`` package.
-
-      #. Restore the |SRC_VERSION| packages backed up during the upgrade
-         (see the **Debian/Ubuntu** tab).
-      #. Install:
-
-            .. code-block::
-
-               sudo apt-get update
-               sudo apt-get remove scylla\* -y
-               sudo apt-get install scylla-enterprise 
-               sudo apt-get install scylla-enterpraise-machine-image
-
-        Answer ‘y’ to the first two questions.
-
-Restore the configuration file
------------------------------
-
-.. code:: sh
-
-   sudo rm -rf /etc/scylla/scylla.yaml
-   sudo cp /etc/scylla/scylla.yaml-backup /etc/scylla/scylla.yaml
-
-Reload systemd configuration
----------------------------
-
-You must reload the unit file if the systemd unit file is changed.
-
-.. code:: sh
-
-   sudo systemctl daemon-reload
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo service scylla-server start
-
-Validate
--------
-
-Check the upgrade instructions above for validation. Once you are sure the node rollback 
-is successful, move to the next node in the cluster.
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/index.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/index.rst
@@ -0,0 +1,13 @@
+==========================================================
+Upgrade - ScyllaDB 2025.1 to ScyllaDB 2025.2
+==========================================================
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   Upgrade ScyllaDB <upgrade-guide-from-2025.1-to-2025.2>
+   Metrics Update <metric-update-2025.1-to-2025.2>
+
+* :doc:`Upgrade from ScyllaDB 2025.1.x to ScyllaDB 2025.2.y <upgrade-guide-from-2025.1-to-2025.2>`
+* :doc:`Metrics Update Between 2025.1 and 2025.2 <metric-update-2025.1-to-2025.2>`
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/metric-update-2025.1-to-2025.2.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/metric-update-2025.1-to-2025.2.rst
@@ -0,0 +1,61 @@
+.. |SRC_VERSION| replace:: 2025.1
+.. |NEW_VERSION| replace:: 2025.2
+
+Metrics Update Between |SRC_VERSION| and |NEW_VERSION|
+================================================================
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+ScyllaDB |NEW_VERSION| Dashboards are available as part of the latest |mon_root|.
+
+New Metrics
+------------
+
+The following metrics are new in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
+
+
+.. list-table::
+   :widths: 25 150
+   :header-rows: 1
+
+   * - Metric
+     - Description
+   * - scylla_alternator_batch_item_count_histogram
+     - A histogram of the number of items in a batch request.
+   * - scylla_database_total_view_updates_failed_pairing
+     - Total number of view updates for which we failed base/view pairing.
+   * - scylla_group_name_cross_rack_collocations
+     - The number of co-locating migrations that move replica across racks.
+   * - scylla_network_bytes_received
+     - The number of bytes received from network sockets.
+   * - scylla_network_bytes_sent
+     - The number of bytes written to network sockets.
+   * - scylla_reactor_awake_time_ms_total
+     - Total reactor awake time (wall_clock).
+   * - scylla_reactor_cpu_used_time_ms
+     - Total reactor thread CPU time (from CLOCK_THREAD_CPUTIME).
+   * - scylla_reactor_sleep_time_ms_total
+     - Total reactor sleep time (wall clock).
+   * - scylla_sstable_compression_dicts_total_live_memory_bytes
+     - Total amount of memory consumed by SSTable compression dictionaries in RAM.
+   * - scylla_transport_connections_blocked
+     - Holds an incrementing counter with the CQL connections that were blocked
+       before being processed due to threshold configured via
+       uninitialized_connections_semaphore_cpu_concurrency.Blocks are normal
+       when we have multiple connections initialized at once. If connectionsare
+       timing out and this value is high it indicates either connections storm
+       or unusually slow processing.
+   * - scylla_transport_connections_shed
+     - Holds an incrementing counter with the CQL connections that were shed
+       due to concurrency semaphore timeout (threshold configured via
+       uninitialized_connections_semaphore_cpu_concurrency). This typically can
+       happen during connection.
+   
+  
+
+
+
+
+
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/upgrade-guide-from-2025.1-to-2025.2.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/upgrade-guide-from-2025.1-to-2025.2.rst
@@ -1,16 +1,16 @@
 .. |SCYLLA_NAME| replace:: ScyllaDB

-.. |SRC_VERSION| replace:: 6.2
-.. |NEW_VERSION| replace:: 2025.1
+.. |SRC_VERSION| replace:: 2025.1
+.. |NEW_VERSION| replace:: 2025.2

 .. |ROLLBACK| replace:: rollback
 .. _ROLLBACK: ./#rollback-procedure

-.. |SCYLLA_METRICS| replace:: ScyllaDB Metrics Update - ScyllaDB 6.2 to 2025.1
-.. _SCYLLA_METRICS: ../metric-update-6.2-to-2025.1
+.. |SCYLLA_METRICS| replace:: ScyllaDB Metrics Update - ScyllaDB 2025.1 to 2025.2
+.. _SCYLLA_METRICS: ../metric-update-2025.1-to-2025.2

 =======================================================================================
-Upgrade from |SCYLLA_NAME| Open Source |SRC_VERSION| to |SCYLLA_NAME| |NEW_VERSION|
+Upgrade from |SCYLLA_NAME| |SRC_VERSION| to |SCYLLA_NAME| |NEW_VERSION|
 =======================================================================================

 This document describes a step-by-step procedure for upgrading from |SCYLLA_NAME| |SRC_VERSION| 
@@ -20,7 +20,7 @@ This guide covers upgrading ScyllaDB on Red Hat Enterprise Linux (RHEL), CentOS,
 and Ubuntu. See :doc:`OS Support by Platform and Version </getting-started/os-support>` 
 for information about supported versions.

-It also applies when using ScyllaDB official image on EC2, GCP, or Azure.
+It also applies when using the ScyllaDB official image on EC2, GCP, or Azure.

 Before You Upgrade ScyllaDB
 ==============================
@@ -28,7 +28,7 @@ Before You Upgrade ScyllaDB
 **Upgrade Your Driver**

 If you're using a :doc:`ScyllaDB driver </using-scylla/drivers/cql-drivers/index>`, 
-upgrade the driver before you upgrade ScyllaDB. The latest two versions of each driver 
+upgrade the driver before upgrading ScyllaDB. The latest two versions of each driver 
 are supported.

 **Upgrade ScyllaDB Monitoring Stack**
@@ -44,6 +44,14 @@ We recommend upgrading the Monitoring Stack to the latest version.
 See the ScyllaDB Release Notes for the latest updates. The Release Notes are published 
 at the `ScyllaDB Community Forum <https://forum.scylladb.com/>`_.

+.. note::
+
+   If you previously upgraded from 2024.x to 2025.1 without enabling consistent
+   topology updates, ensure you enable the feature before you upgrade to 2025.2.
+   For instructions, see
+   `Enable Consistent Topology Updates <https://docs.scylladb.com/manual/branch-2025.1/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.html>`_
+   in the upgrade guide for version 2025.1.
+
 Upgrade Procedure
 =================

@@ -150,7 +158,7 @@ You should take note of the current version in case you want to |ROLLBACK|_ the

            .. code-block:: console

-               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/scylla-2025.1.list
+               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/scylla-2025.2.list

        #. Install the new ScyllaDB version:

@@ -168,7 +176,7 @@ You should take note of the current version in case you want to |ROLLBACK|_ the

            .. code-block:: console

-               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/scylla-2025.1.repo
+               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/scylla-2025.2.repo

        #. Install the new ScyllaDB version:

--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.2.x.y-to-2025.2.x.z.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.2.x.y-to-2025.2.x.z.rst
@@ -0,0 +1,270 @@
+.. |SCYLLA_NAME| replace:: ScyllaDB
+
+.. |SRC_VERSION| replace:: 2025.2.x.y
+.. |NEW_VERSION| replace:: 2025.2.x.z
+
+==========================================================================
+Upgrade - |SCYLLA_NAME| |SRC_VERSION| to |NEW_VERSION| (Patch Upgrades)
+==========================================================================
+
+This document describes a step-by-step procedure for upgrading from
+|SCYLLA_NAME| |SRC_VERSION|  to |SCYLLA_NAME| |NEW_VERSION| (where "z" is
+the latest available version), and rolling back to version |SRC_VERSION|
+if necessary.
+
+This guide covers upgrading ScyllaDB on Red Hat Enterprise Linux (RHEL),
+CentOS, Debian, and Ubuntu.
+See :doc:`OS Support by Platform and Version </getting-started/os-support>`
+for information about supported versions.
+
+It also applies to the ScyllaDB official image on EC2, GCP, or Azure.
+
+Upgrade Procedure
+=================
+
+.. note::
+   Apply the following procedure **serially** on each node. Do not move to the next
+   node before validating that the node is up and running the new version.
+
+A ScyllaDB upgrade is a rolling procedure that does **not** require a full cluster
+shutdown. For each of the nodes in the cluster, you will:
+
+#. Drain the node and back up the data.
+#. Backup configuration file.
+#. Stop ScyllaDB.
+#. Download and install new ScyllaDB packages.
+#. Start ScyllaDB.
+#. Validate that the upgrade was successful.
+
+**Before** upgrading, check which version you are running now using
+``scylla --version``. Note the current version in case you want to roll back
+the upgrade.
+
+**During** the rolling upgrade it is highly recommended:
+
+* Not to use new |NEW_VERSION| features.
+* Not to run administration functions, like repairs, refresh, rebuild or add
+  or remove nodes. See
+  `sctool <https://manager.docs.scylladb.com/stable/sctool/>`_ for suspending
+  ScyllaDB Manager's scheduled or running repairs.
+* Not to apply schema changes.
+
+Upgrade Steps
+=============
+
+Back up the data
+------------------------------
+
+Back up all the data to an external device. We recommend using
+`ScyllaDB Manager <https://manager.docs.scylladb.com/stable/backup/index.html>`_
+to create backups.
+
+Alternatively, you can use the ``nodetool snapshot`` command.
+For **each** node in the cluster, run the following:
+
+.. code:: sh
+
+   nodetool drain
+   nodetool snapshot
+
+Take note of the directory name that nodetool gives you, and copy all
+the directories with this name under ``/var/lib/scylla`` to a backup device.
+
+When the upgrade is completed on all nodes, remove the snapshot with the 
+``nodetool clearsnapshot -t <snapshot>`` command to prevent running out of space.
+
+Back up the configuration file
+------------------------------
+
+Back up the ``scylla.yaml`` configuration file and the ScyllaDB packages
+in case you need to roll back the upgrade.
+
+.. tabs::
+
+   .. group-tab:: Debian/Ubuntu
+
+      .. code:: sh
+
+         sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup
+         sudo cp /etc/apt/sources.list.d/scylla.list ~/scylla.list-backup
+
+
+   .. group-tab:: RHEL/CentOS
+
+      .. code:: sh 
+         
+         sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup
+         sudo cp /etc/yum.repos.d/scylla.repo ~/scylla.repo-backup
+
+Gracefully stop the node
+------------------------
+
+.. code:: sh
+
+   sudo service scylla-server stop
+
+
+Download and install the new release
+------------------------------------
+
+You don’t need to update the ScyllaDB DEB or RPM repo when you upgrade to
+a patch release.
+
+.. tabs::
+
+   .. group-tab:: Debian/Ubuntu
+
+        To install a patch version on Debian or Ubuntu, run:
+        
+        .. code:: sh
+            
+            sudo apt-get clean all
+            sudo apt-get update
+            sudo apt-get dist-upgrade scylla
+
+        Answer ‘y’ to the first two questions.
+        
+   .. group-tab:: RHEL/CentOS
+
+        To install a patch version on RHEL or CentOS, run:
+
+        .. code:: sh
+            
+            sudo yum clean all
+            sudo yum update scylla\* -y
+
+   .. group-tab:: EC2/GCP/Azure Ubuntu Image
+
+        If you're using the ScyllaDB official image (recommended), see 
+        the **Debian/Ubuntu** tab for upgrade instructions.
+
+        If you're using your own image and have installed ScyllaDB packages for 
+        Ubuntu or Debian, you need to apply an extended upgrade procedure:
+
+        #. Install the new ScyllaDB version with the additional
+           ``scylla-machine-image`` package:
+
+            .. code-block:: console
+
+               sudo apt-get clean all
+               sudo apt-get update
+               sudo apt-get dist-upgrade scylla
+               sudo apt-get dist-upgrade scylla-machine-image
+
+        #. Run ``scylla_setup`` without ``running io_setup``.
+        #. Run ``sudo /opt/scylladb/scylla-machine-image/scylla_cloud_io_setup``.
+
+Start the node
+--------------
+
+.. code:: sh
+
+   sudo service start scylla-server
+
+Validate
+--------
+#. Check cluster status with ``nodetool status`` and make sure **all** nodes,
+   including the one you just upgraded, are in UN status.
+#. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"``
+   to check the ScyllaDB version.
+#. Use ``journalctl _COMM=scylla`` to check there are no new errors in the log.
+#. Check again after 2 minutes to validate that no new issues are introduced.
+
+Once you are sure the node upgrade is successful, move to the next node in
+the cluster.
+
+Rollback Procedure
+==================
+
+The following procedure describes a rollback from ScyllaDB release
+|NEW_VERSION| to |SRC_VERSION|. Apply this procedure if an upgrade from
+|SRC_VERSION| to |NEW_VERSION| failed before completing on all nodes. 
+
+* Use this procedure only on nodes you upgraded to |NEW_VERSION|.
+* Execute the following commands one node at a time, moving to the next node only
+  after the rollback procedure is completed successfully.
+
+ScyllaDB rollback is a rolling procedure that does **not** require a full
+cluster shutdown. For each of the nodes to roll back to |SRC_VERSION|, you will:
+
+#. Drain the node and stop ScyllaDB.
+#. Downgrade to the previous release.
+#. Restore the configuration file.
+#. Restart ScyllaDB.
+#. Validate the rollback success.
+
+Rollback Steps
+==============
+
+Gracefully shutdown ScyllaDB
+-----------------------------
+
+.. code:: sh
+
+   nodetool drain
+   sudo service stop scylla-server
+
+Downgrade to the previous release
+----------------------------------
+
+.. tabs::
+
+   .. group-tab:: Debian/Ubuntu
+
+        To downgrade to |SRC_VERSION| on Debian or Ubuntu, run:
+    
+        .. code-block:: console
+            :substitutions:
+
+            sudo apt-get install scylla=|SRC_VERSION|\* scylla-server=|SRC_VERSION|\* scylla-tools=|SRC_VERSION|\* scylla-tools-core=|SRC_VERSION|\* scylla-kernel-conf=|SRC_VERSION|\* scylla-conf=|SRC_VERSION|\*
+
+
+        Answer ‘y’ to the first two questions.
+
+   .. group-tab:: RHEL/CentOS
+    
+        To downgrade to |SRC_VERSION| on RHEL or CentOS, run:
+
+        .. code-block:: console
+            :substitutions:
+
+            sudo yum downgrade scylla\*-|SRC_VERSION|-\* -y
+
+   .. group-tab:: EC2/GCP/Azure Ubuntu Image
+
+        If you’re using the ScyllaDB official image (recommended), see
+        the **Debian/Ubuntu** tab for upgrade instructions.
+
+        If you’re using your own image and have installed ScyllaDB packages for
+        Ubuntu or Debian, you need to additionally downgrade
+        the ``scylla-machine-image`` package.
+
+        .. code-block:: console
+            :substitutions:
+
+            sudo apt-get install scylla=|SRC_VERSION|\* scylla-server=|SRC_VERSION|\* scylla-tools=|SRC_VERSION|\* scylla-tools-core=|SRC_VERSION|\* scylla-kernel-conf=|SRC_VERSION|\* scylla-conf=|SRC_VERSION|\*
+            sudo apt-get install scylla-machine-image=|SRC_VERSION|\*
+
+
+        Answer ‘y’ to the first two questions.
+
+
+Restore the configuration file
+------------------------------
+
+.. code:: sh
+   
+   sudo rm -rf /etc/scylla/scylla.yaml
+   sudo cp -a /etc/scylla/scylla.yaml.backup /etc/scylla/scylla.yaml
+
+Start the node
+--------------
+
+.. code:: sh
+
+   sudo service scylla-server start
+
+Validate
+--------
+Check upgrade instruction above for validation. Once you are sure the node
+rollback is successful, move to the next node in the cluster.
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-6.2-to-2025.1/index.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-6.2-to-2025.1/index.rst
@@ -1,13 +0,0 @@
-==========================================================
-Upgrade - ScyllaDB Open Source 6.2 to ScyllaDB 2025.1 
-==========================================================
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   Upgrade ScyllaDB <upgrade-guide-from-6.2-to-2025.1>
-   Metrics Update <metric-update-6.2-to-2025.1>
-
-* :doc:`Upgrade from ScyllaDB Open Source 6.2 .x to ScyllaDB 2025.1.y <upgrade-guide-from-6.2-to-2025.1>`
-* :doc:`Metrics Update Between 6.2 and 2025.1 <metric-update-6.2-to-2025.1>`
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				Perform `the procedure for enabling consistent topology changes <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_.