test: test_zero_token_nodes_multidc: properly handle reads with CL=ONE

The test could fail with RF={DC1: 2, DC2: 0} and CL=ONE when: - both writes succeeded with the same replica responding first, - one of the following reads succeeded with the other replica responding before it applied mutations from any of the writes. We fix the test by not expecting reads with CL=ONE to return a row. We also harden the test by inserting different rows for every pair (CL, coordinator), where one of the two coordinators is a normal node from DC1, and the other one is a zero-token node from DC2. This change makes sure that, for example, every write really inserts a row. Fixes scylladb/scylladb#22967 The fix addresses CI flakiness and only changes the test, so it should be backported. Closes scylladb/scylladb#23518 (cherry picked from commit 21edec1ace) Closes scylladb/scylladb#24984
test/cluster/test_read_repair: write 100 rows in trace test
2025-07-15 15:50:21 +02:00 · 2025-07-15 13:27:31 +03:00 · 2025-07-15 13:26:39 +03:00 · 2025-07-15 13:25:38 +03:00 · 2025-07-15 13:24:49 +03:00 · 2025-07-15 13:23:12 +03:00
277 changed files with 5374 additions and 2373 deletions
--- a/.github/scripts/auto-backport.py
+++ b/.github/scripts/auto-backport.py
@@ -112,10 +112,15 @@ def backport(repo, pr, version, commits, backport_base_branch, is_collaborator):
                    is_draft = True
                    repo_local.git.add(A=True)
                    repo_local.git.cherry_pick('--continue')
-            repo_local.git.push(fork_repo, new_branch_name, force=True)
-            create_pull_request(repo, new_branch_name, backport_base_branch, pr, backport_pr_title, commits,
-                                is_draft, is_collaborator)
-
+            # Check if the branch already exists in the remote fork
+            remote_refs = repo_local.git.ls_remote('--heads', fork_repo, new_branch_name)
+            if not remote_refs:
+                # Branch does not exist, create it with a regular push
+                repo_local.git.push(fork_repo, new_branch_name)
+                create_pull_request(repo, new_branch_name, backport_base_branch, pr, backport_pr_title, commits,
+                                    is_draft, is_collaborator)
+            else:
+                logging.info(f"Remote branch {new_branch_name} already exists in fork. Skipping push.")
        except GitCommandError as e:
            logging.warning(f"GitCommandError: {e}")

--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2025.2.0-dev
+VERSION=2025.2.1

 if test -f version
 then
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -1006,6 +1006,17 @@ void rmw_operation::set_default_write_isolation(std::string_view value) {
    default_write_isolation = parse_write_isolation(value);
 }

+// Alternator uses tags whose keys start with the "system:" prefix for
+// internal purposes. Those should not be readable by ListTagsOfResource,
+// nor writable with TagResource or UntagResource (see #24098).
+// Only a few specific system tags, currently only system:write_isolation,
+// are deliberately intended to be set and read by the user, so are not
+// considered "internal".
+static bool tag_key_is_internal(std::string_view tag_key) {
+    return tag_key.starts_with("system:") &&
+        tag_key != rmw_operation::WRITE_ISOLATION_TAG_KEY;
+}
+
 enum class update_tags_action { add_tags, delete_tags };
 static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>& tags_map, update_tags_action action) {
    if (action == update_tags_action::add_tags) {
@@ -1030,6 +1041,9 @@ static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>
            if (!validate_legal_tag_chars(tag_key)) {
                throw api_error::validation("A tag Key can only contain letters, spaces, and [+-=._:/]");
            }
+            if (tag_key_is_internal(tag_key)) {
+                throw api_error::validation(fmt::format("Tag key '{}' is reserved for internal use", tag_key));
+            }
            // Note tag values are limited similarly to tag keys, but have a
            // longer length limit, and *can* be empty.
            if (tag_value.size() > 256) {
@@ -1042,7 +1056,11 @@ static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>
        }
    } else if (action == update_tags_action::delete_tags) {
        for (auto it = tags.Begin(); it != tags.End(); ++it) {
-            tags_map.erase(sstring(it->GetString(), it->GetStringLength()));
+            auto tag_key = rjson::to_string_view(*it);
+            if (tag_key_is_internal(tag_key)) {
+                throw api_error::validation(fmt::format("Tag key '{}' is reserved for internal use", tag_key));
+            }
+            tags_map.erase(sstring(tag_key));
        }
    }

@@ -1117,6 +1135,9 @@ future<executor::request_return_type> executor::list_tags_of_resource(client_sta

    rjson::value& tags = ret["Tags"];
    for (auto& tag_entry : tags_map) {
+        if (tag_key_is_internal(tag_entry.first)) {
+            continue;
+        }
        rjson::value new_entry = rjson::empty_object();
        rjson::add(new_entry, "Key", rjson::from_string(tag_entry.first));
        rjson::add(new_entry, "Value", rjson::from_string(tag_entry.second));
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -2144,6 +2144,31 @@
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
+                  },
+                  {
+                     "name":"skip_cleanup",
+                     "description":"Don't cleanup keys from loaded sstables. Invalid if load_and_stream is true",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"skip_reshape",
+                     "description":"Don't reshape the loaded sstables. Invalid if load_and_stream is true",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"scope",
+                     "description":"Defines the set of nodes to which mutations can be streamed",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query",
+                     "enum": ["all", "dc", "rack", "node"]
                  }
               ]
            }
@@ -3136,6 +3161,22 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/storage_service/raft_topology/cmd_rpc_status",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get information about currently running topology cmd rpc",
+               "type":"string",
+               "nickname":"raft_topology_get_cmd_status",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
      }
   ],
   "models":{
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -453,17 +453,26 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
        auto cf = req->get_query_param("cf");
        auto stream = req->get_query_param("load_and_stream");
        auto primary_replica = req->get_query_param("primary_replica_only");
+        auto skip_cleanup_p = req->get_query_param("skip_cleanup");
        boost::algorithm::to_lower(stream);
        boost::algorithm::to_lower(primary_replica);
        bool load_and_stream = stream == "true" || stream == "1";
        bool primary_replica_only = primary_replica == "true" || primary_replica == "1";
+        bool skip_cleanup = skip_cleanup_p == "true" || skip_cleanup_p == "1";
+        auto scope = parse_stream_scope(req->get_query_param("scope"));
+        auto skip_reshape_p = req->get_query_param("skip_reshape");
+        auto skip_reshape = skip_reshape_p == "true" || skip_reshape_p == "1";
+
+        if (scope != sstables_loader::stream_scope::all && !load_and_stream) {
+            throw httpd::bad_param_exception("scope takes no effect without load-and-stream");
+        }
        // No need to add the keyspace, since all we want is to avoid always sending this to the same
        // CPU. Even then I am being overzealous here. This is not something that happens all the time.
        auto coordinator = std::hash<sstring>()(cf) % smp::count;
        return sst_loader.invoke_on(coordinator,
                [ks = std::move(ks), cf = std::move(cf),
-                load_and_stream, primary_replica_only] (sstables_loader& loader) {
-            return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, sstables_loader::stream_scope::all);
+                load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope] (sstables_loader& loader) {
+            return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope);
        }).then_wrapped([] (auto&& f) {
            if (f.failed()) {
                auto msg = fmt::format("Failed to load new sstables: {}", f.get_exception());
@@ -1667,6 +1676,18 @@ rest_raft_topology_upgrade_status(sharded<service::storage_service>& ss, std::un
        co_return sstring(format("{}", ustate));
 }

+static
+future<json::json_return_type>
+rest_raft_topology_get_cmd_status(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
+        const auto status = co_await ss.invoke_on(0, [] (auto& ss) {
+            return ss.get_topology_cmd_status();
+        });
+        if (status.active_dst.empty()) {
+            co_return sstring("none");
+        }
+        co_return sstring(fmt::format("{}[{}]: {}", status.current, status.index, fmt::join(status.active_dst, ",")));
+}
+
 static
 future<json::json_return_type>
 rest_move_tablet(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -1898,6 +1919,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
    ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
    ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
+    ss::raft_topology_get_cmd_status.set(r, rest_bind(rest_raft_topology_get_cmd_status, ss));
    ss::move_tablet.set(r, rest_bind(rest_move_tablet, ctx, ss));
    ss::add_tablet_replica.set(r, rest_bind(rest_add_tablet_replica, ctx, ss));
    ss::del_tablet_replica.set(r, rest_bind(rest_del_tablet_replica, ctx, ss));
@@ -1979,6 +2001,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
    ss::reload_raft_topology_state.unset(r);
    ss::upgrade_to_raft_topology.unset(r);
    ss::raft_topology_upgrade_status.unset(r);
+    ss::raft_topology_get_cmd_status.unset(r);
    ss::move_tablet.unset(r);
    ss::add_tablet_replica.unset(r);
    ss::del_tablet_replica.unset(r);
--- a/api/token_metadata.cc
+++ b/api/token_metadata.cc
@@ -74,6 +74,9 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
    });

    ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
+        if (!g.local().is_enabled()) {
+            throw std::runtime_error("The gossiper is not ready yet");
+        }
        std::vector<ss::mapper> res;
        auto map = tm.local().get()->get_host_ids() |
            std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -119,6 +119,11 @@ future<> create_legacy_metadata_table_if_missing(
    return qs;
 }

+::service::raft_timeout get_raft_timeout() noexcept {
+    auto dur = internal_distributed_query_state().get_client_state().get_timeout_config().other_timeout;
+    return ::service::raft_timeout{.value = lowres_clock::now() + dur};
+}
+
 static future<> announce_mutations_with_guard(
        ::service::raft_group0_client& group0_client,
        std::vector<canonical_mutation> muts,
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -17,6 +17,7 @@

 #include "types/types.hh"
 #include "service/raft/raft_group0_client.hh"
+#include "timeout_config.hh"

 using namespace std::chrono_literals;

@@ -77,6 +78,8 @@ future<> create_legacy_metadata_table_if_missing(
 ///
 ::service::query_state& internal_distributed_query_state() noexcept;

+::service::raft_timeout get_raft_timeout() noexcept;
+
 // Execute update query via group0 mechanism, mutations will be applied on all nodes.
 // Use this function when need to perform read before write on a single guard or if
 // you have more than one mutation and potentially exceed single command size limit.
--- a/auth/ldap_role_manager.cc
+++ b/auth/ldap_role_manager.cc
@@ -338,8 +338,7 @@ future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants()
 }

 future<> ldap_role_manager::ensure_superuser_is_created() {
-    // ldap is responsible for users
-    co_return;
+    return _std_mgr.ensure_superuser_is_created();
 }

 } // namespace auth
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -117,7 +117,8 @@ future<> password_authenticator::migrate_legacy_metadata() const {
    });
 }

-future<> password_authenticator::create_default_if_missing() {
+future<> password_authenticator::legacy_create_default_if_missing() {
+    SCYLLA_ASSERT(legacy_mode(_qp));
    const auto exists = co_await default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
    if (exists) {
        co_return;
@@ -127,18 +128,75 @@ future<> password_authenticator::create_default_if_missing() {
        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
    }
    const auto query = update_row_query();
-    if (legacy_mode(_qp)) {
-        co_await _qp.execute_internal(
+    co_await _qp.execute_internal(
            query,
            db::consistency_level::QUORUM,
            internal_distributed_query_state(),
            {salted_pwd, _superuser},
            cql3::query_processor::cache_internal::no);
-        plogger.info("Created default superuser authentication record.");
-    } else {
-        co_await announce_mutations(_qp, _group0_client, query,
-            {salted_pwd, _superuser}, _as, ::service::raft_timeout{});
-        plogger.info("Created default superuser authentication record.");
+    plogger.info("Created default superuser authentication record.");
+}
+
+future<> password_authenticator::maybe_create_default_password() {
+    auto needs_password = [this] () -> future<bool> {
+        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
+        auto results = co_await _qp.execute_internal(query,
+                db::consistency_level::LOCAL_ONE,
+                internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
+        // Don't add default password if
+        // - there is no default superuser
+        // - there is a superuser with a password.
+        bool has_default = false;
+        bool has_superuser_with_password = false;
+        for (auto& result : *results) {
+            if (result.get_as<sstring>(meta::roles_table::role_col_name) == _superuser) {
+                has_default = true;
+            }
+            if (has_salted_hash(result)) {
+                has_superuser_with_password = true;
+            }
+        }
+        co_return has_default && !has_superuser_with_password;
+    };
+    if (!co_await needs_password()) {
+        co_return;
+    }
+    // We don't want to start operation earlier to avoid quorum requirement in
+    // a common case.
+    ::service::group0_batch batch(
+            co_await _group0_client.start_operation(_as, get_raft_timeout()));
+    // Check again as the state may have changed before we took the guard (batch).
+    if (!co_await needs_password()) {
+        co_return;
+    }
+    // Set default superuser's password.
+    std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
+    if (salted_pwd.empty()) {
+        salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
+    }
+    const auto update_query = update_row_query();
+    co_await collect_mutations(_qp, batch, update_query, {salted_pwd, _superuser});
+    co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
+    plogger.info("Created default superuser authentication record.");
+}
+
+future<> password_authenticator::maybe_create_default_password_with_retries() {
+    size_t retries = _migration_manager.get_concurrent_ddl_retries();
+    while (true)  {
+        try {
+            co_return co_await maybe_create_default_password();
+        } catch (const ::service::group0_concurrent_modification& ex) {
+            plogger.warn("Failed to execute maybe_create_default_password due to guard conflict.{}.", retries ? " Retrying" : " Number of retries exceeded, giving up");
+            if (retries--) {
+                continue;
+            }
+            // Log error but don't crash the whole node startup sequence.
+            plogger.error("Failed to create default superuser password due to guard conflict.");
+            co_return;
+        } catch (const ::service::raft_operation_timeout_error& ex) {
+            plogger.error("Failed to create default superuser password due to exception: {}", ex.what());
+            co_return;
+        }
    }
 }

@@ -164,10 +222,11 @@ future<> password_authenticator::start() {
                        migrate_legacy_metadata().get();
                        return;
                    }
+                    legacy_create_default_if_missing().get();
                }
                utils::get_local_injector().inject("password_authenticator_start_pause", utils::wait_for_message(5min)).get();
-                create_default_if_missing().get();
                if (!legacy_mode(_qp)) {
+                    maybe_create_default_password_with_retries().get();
                    _superuser_created_promise.set_value();
                }
            });
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -41,7 +41,7 @@ class password_authenticator : public authenticator {
    ::service::migration_manager& _migration_manager;
    future<> _stopped;
    abort_source _as;
-    std::string _superuser;
+    std::string _superuser; // default superuser name from the config (may or may not be present in roles table)
    shared_promise<> _superuser_created_promise;

 public:
@@ -89,7 +89,10 @@ private:

    future<> migrate_legacy_metadata() const;

-    future<> create_default_if_missing();
+    future<> legacy_create_default_if_missing();
+
+    future<> maybe_create_default_password();
+    future<> maybe_create_default_password_with_retries();

    sstring update_row_query() const;
 };
--- a/auth/service.cc
+++ b/auth/service.cc
@@ -240,6 +240,13 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
        });
    }
    co_await _role_manager->start();
+    if (this_shard_id() == 0) {
+        // Role manager and password authenticator have this odd startup
+        // mechanism where they asynchronously create the superuser role
+        // in the background. Correct password creation depends on role
+        // creation therefore we need to wait here.
+        co_await _role_manager->ensure_superuser_is_created();
+    }
    co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
    _permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
    co_await once_among_shards([this] {
--- a/auth/standard_role_manager.cc
+++ b/auth/standard_role_manager.cc
@@ -178,7 +178,8 @@ future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const
                    _migration_manager)).discard_result();
 }

-future<> standard_role_manager::create_default_role_if_missing() {
+future<> standard_role_manager::legacy_create_default_role_if_missing() {
+    SCYLLA_ASSERT(legacy_mode(_qp));
    try {
        const auto exists = co_await default_role_row_satisfies(_qp, &has_can_login, _superuser);
        if (exists) {
@@ -188,16 +189,12 @@ future<> standard_role_manager::create_default_role_if_missing() {
                get_auth_ks_name(_qp),
                meta::roles_table::name,
                meta::roles_table::role_col_name);
-        if (legacy_mode(_qp)) {
-            co_await _qp.execute_internal(
-                    query,
-                    db::consistency_level::QUORUM,
-                    internal_distributed_query_state(),
-                    {_superuser},
-                    cql3::query_processor::cache_internal::no).discard_result();
-        } else {
-            co_await announce_mutations(_qp, _group0_client, query, {_superuser}, _as, ::service::raft_timeout{});
-        }
+        co_await _qp.execute_internal(
+                query,
+                db::consistency_level::QUORUM,
+                internal_distributed_query_state(),
+                {_superuser},
+                cql3::query_processor::cache_internal::no).discard_result();
        log.info("Created default superuser role '{}'.", _superuser);
    } catch(const exceptions::unavailable_exception& e) {
        log.warn("Skipped default role setup: some nodes were not ready; will retry");
@@ -205,6 +202,60 @@ future<> standard_role_manager::create_default_role_if_missing() {
    }
 }

+future<> standard_role_manager::maybe_create_default_role() {
+    auto has_superuser = [this] () -> future<bool> {
+        const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
+        auto results = co_await _qp.execute_internal(query, db::consistency_level::LOCAL_ONE,
+                internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
+        for (const auto& result : *results) {
+            if (has_can_login(result)) {
+                co_return true;
+            }
+        }
+        co_return false;
+    };
+    if (co_await has_superuser()) {
+        co_return;
+    }
+    // We don't want to start operation earlier to avoid quorum requirement in
+    // a common case.
+    ::service::group0_batch batch(
+            co_await _group0_client.start_operation(_as, get_raft_timeout()));
+    // Check again as the state may have changed before we took the guard (batch).
+    if (co_await has_superuser()) {
+        co_return;
+    }
+    // There is no superuser which has can_login field - create default role.
+    // Note that we don't check if can_login is set to true.
+    const sstring insert_query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
+            get_auth_ks_name(_qp),
+            meta::roles_table::name,
+            meta::roles_table::role_col_name);
+    co_await collect_mutations(_qp, batch, insert_query, {_superuser});
+    co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
+    log.info("Created default superuser role '{}'.", _superuser);
+}
+
+future<> standard_role_manager::maybe_create_default_role_with_retries() {
+    size_t retries = _migration_manager.get_concurrent_ddl_retries();
+    while (true)  {
+        try {
+            co_return co_await maybe_create_default_role();
+        } catch (const ::service::group0_concurrent_modification& ex) {
+            log.warn("Failed to execute maybe_create_default_role due to guard conflict.{}.", retries ? " Retrying" : " Number of retries exceeded, giving up");
+            if (retries--) {
+                continue;
+            }
+            // Log error but don't crash the whole node startup sequence.
+            log.error("Failed to create default superuser role due to guard conflict.");
+            co_return;
+        } catch (const ::service::raft_operation_timeout_error& ex) {
+            log.error("Failed to create default superuser role due to exception: {}", ex.what());
+            co_return;
+        }
+    }
+}
+
 static const sstring legacy_table_name{"users"};

 bool standard_role_manager::legacy_metadata_exists() {
@@ -266,9 +317,10 @@ future<> standard_role_manager::start() {
                    co_await migrate_legacy_metadata();
                    co_return;
                }
+                co_await legacy_create_default_role_if_missing();
            }
-            co_await create_default_role_if_missing();
            if (!legacy) {
+                co_await maybe_create_default_role_with_retries();
                _superuser_created_promise.set_value();
            }
        };
--- a/auth/standard_role_manager.hh
+++ b/auth/standard_role_manager.hh
@@ -95,7 +95,10 @@ private:

    future<> migrate_legacy_metadata();

-    future<> create_default_role_if_missing();
+    future<> legacy_create_default_role_if_missing();
+
+    future<> maybe_create_default_role();
+    future<> maybe_create_default_role_with_retries();

    future<> create_or_replace(std::string_view role_name, const role_config&, ::service::group0_batch&);

--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -1126,8 +1126,11 @@ future<> compaction_manager::drain() {
        // Disable the state so that it can be enabled later if requested.
        _state = state::disabled;
    }
+    _compaction_submission_timer.cancel();
    // Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
    co_await stop_ongoing_compactions("drain");
+    // Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
+    reevaluate_postponed_compactions();
    cmlog.info("Drained");
 }

--- a/compound.hh
+++ b/compound.hh
@@ -255,6 +255,9 @@ public:
    // Returns true iff given prefix has no missing components
    bool is_full(managed_bytes_view v) const {
        SCYLLA_ASSERT(AllowPrefixes == allow_prefixes::yes);
+        if (_types.size() == 0) {
+            return v.empty();
+        }
        return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
    }
    bool is_empty(managed_bytes_view v) const {
--- a/compress.cc
+++ b/compress.cc
@@ -15,6 +15,8 @@
 #include <seastar/core/metrics.hh>
 #include <seastar/core/sharded.hh>
 #include <seastar/core/weak_ptr.hh>
+#include <seastar/core/thread.hh>
+#include <seastar/core/reactor.hh>
 #include "utils/reusable_buffer.hh"
 #include "sstables/compress.hh"
 #include "sstables/exceptions.hh"
@@ -27,7 +29,7 @@

 // SHA256
 using dict_id = std::array<std::byte, 32>;
-class sstable_compressor_factory_impl;
+class dictionary_holder;

 static seastar::logger compressor_factory_logger("sstable_compressor_factory");

@@ -41,11 +43,11 @@ template <> struct fmt::formatter<compression_parameters::algorithm> : fmt::form
 // raw dicts might be used (and kept alive) directly by compressors (in particular, lz4 decompressor)
 // or referenced by algorithm-specific dicts.
 class raw_dict : public enable_lw_shared_from_this<raw_dict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    dict_id _id;
    std::vector<std::byte> _dict;
 public:
-    raw_dict(sstable_compressor_factory_impl& owner, dict_id key, std::span<const std::byte> dict);
+    raw_dict(dictionary_holder& owner, dict_id key, std::span<const std::byte> dict);
    ~raw_dict();
    const std::span<const std::byte> raw() const { return _dict; }
    dict_id id() const { return _id; }
@@ -79,13 +81,13 @@ struct zstd_callback_allocator {
 // (which internally holds a pointer to the raw dictionary blob
 // and parsed entropy tables).
 class zstd_ddict : public enable_lw_shared_from_this<zstd_ddict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    lw_shared_ptr<const raw_dict> _raw;
    size_t _used_memory = 0;
    zstd_callback_allocator _alloc;
    std::unique_ptr<ZSTD_DDict, decltype(&ZSTD_freeDDict)> _dict;
 public:
-    zstd_ddict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw);
+    zstd_ddict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw);
    ~zstd_ddict();
    auto dict() const { return _dict.get(); }
    auto raw() const { return _raw->raw(); }
@@ -100,14 +102,14 @@ public:
 // so the level of compression is decided at the time of construction
 // of this dict.
 class zstd_cdict : public enable_lw_shared_from_this<zstd_cdict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    lw_shared_ptr<const raw_dict> _raw;
    int _level;
    size_t _used_memory = 0;
    zstd_callback_allocator _alloc;
    std::unique_ptr<ZSTD_CDict, decltype(&ZSTD_freeCDict)> _dict;
 public:
-    zstd_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw, int level);
+    zstd_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw, int level);
    ~zstd_cdict();
    auto dict() const { return _dict.get(); }
    auto raw() const { return _raw->raw(); }
@@ -119,11 +121,11 @@ public:
 // and a hash index over the substrings of the blob).
 //
 class lz4_cdict : public enable_lw_shared_from_this<lz4_cdict> {
-    weak_ptr<sstable_compressor_factory_impl> _owner;
+    weak_ptr<dictionary_holder> _owner;
    lw_shared_ptr<const raw_dict> _raw;
    std::unique_ptr<LZ4_stream_t, decltype(&LZ4_freeStream)> _dict;
 public:
-    lz4_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw);
+    lz4_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw);
    ~lz4_cdict();
    auto dict() const { return _dict.get(); }
    auto raw() const { return _raw->raw(); }
@@ -164,6 +166,7 @@ public:
    size_t compress_max_size(size_t input_len) const override;
    std::map<sstring, sstring> options() const override;
    algorithm get_algorithm() const override;
+    std::optional<unsigned> get_dict_owner_for_test() const override;
 };

 class snappy_processor: public compressor {
@@ -266,6 +269,7 @@ public:
    size_t compress_max_size(size_t input_len) const override;
    algorithm get_algorithm() const override;
    std::map<sstring, sstring> options() const override;
+    std::optional<unsigned> get_dict_owner_for_test() const override;
 };

 zstd_processor::zstd_processor(const compression_parameters& opts, cdict_ptr cdict, ddict_ptr ddict) {
@@ -323,6 +327,16 @@ auto zstd_processor::get_algorithm() const -> algorithm {
    return (_cdict || _ddict) ? algorithm::zstd_with_dicts : algorithm::zstd;
 }

+std::optional<unsigned> zstd_processor::get_dict_owner_for_test() const {
+    if (_cdict) {
+        return _cdict.get_owner_shard();
+    } else if (_ddict) {
+        return _ddict.get_owner_shard();
+    } else {
+        return std::nullopt;
+    }
+}
+
 const std::string_view DICTIONARY_OPTION = ".dictionary.";

 static std::map<sstring, sstring> dict_as_options(std::span<const std::byte> d) {
@@ -384,6 +398,10 @@ std::map<sstring, sstring> compressor::options() const {
    return {};
 }

+std::optional<unsigned> compressor::get_dict_owner_for_test() const {
+    return std::nullopt;
+}
+
 std::string compressor::name() const {
    return compression_parameters::algorithm_to_qualified_name(get_algorithm());
 }
@@ -434,7 +452,7 @@ std::string_view compression_parameters::algorithm_to_name(algorithm alg) {
        case algorithm::snappy: return "SnappyCompressor";
        case algorithm::zstd: return "ZstdCompressor";
        case algorithm::zstd_with_dicts: return "ZstdWithDictsCompressor";
-        case algorithm::none: on_internal_error(compressor_factory_logger, "algorithm_to_name(): called with algorithm::none");
+        case algorithm::none: return "none"; // Name used only for logging purposes, can't be chosen by the user.
    }
    abort();
 }
@@ -518,13 +536,17 @@ compression_parameters::compression_parameters(const std::map<sstring, sstring>&
    }
 }

-void compression_parameters::validate(const gms::feature_service& fs) {
-    if (!fs.sstable_compression_dicts) {
-        if (_algorithm == algorithm::zstd_with_dicts || _algorithm == algorithm::lz4_with_dicts) {
+void compression_parameters::validate(dicts_feature_enabled dicts_enabled, dicts_usage_allowed dicts_allowed) {
+    if (_algorithm == algorithm::zstd_with_dicts || _algorithm == algorithm::lz4_with_dicts) {
+        if (!dicts_enabled) {
            throw std::runtime_error(std::format("sstable_compression {} can't be used before "
                                                 "all nodes are upgraded to a versions which supports it",
                                                 algorithm_to_name(_algorithm)));
        }
+        if (!dicts_allowed) {
+            throw std::runtime_error(std::format("sstable_compression {} has been disabled by `sstable_compression_dictionaries_allow_in_ddl: false`",
+                                                 algorithm_to_name(_algorithm)));
+        }
    }
    if (_chunk_length) {
        auto chunk_length = _chunk_length.value();
@@ -660,6 +682,16 @@ std::map<sstring, sstring> lz4_processor::options() const {
    }
 }

+std::optional<unsigned> lz4_processor::get_dict_owner_for_test() const {
+    if (_cdict) {
+        return _cdict.get_owner_shard();
+    } else if (_ddict) {
+        return _ddict.get_owner_shard();
+    } else {
+        return std::nullopt;
+    }
+}
+
 compressor_ptr make_lz4_sstable_compressor_for_tests() {
    return std::make_unique<lz4_processor>();
 }
@@ -751,21 +783,12 @@ size_t snappy_processor::compress_max_size(size_t input_len) const {
    return snappy_max_compressed_length(input_len);
 }

-// Constructs compressors and decompressors for SSTables,
-// making sure that the expensive identical parts (dictionaries) are shared
-// across nodes.
-//
 // Holds weak pointers to all live dictionaries
 // (so that they can be cheaply shared with new SSTables if an identical dict is requested),
 // and shared (lifetime-extending) pointers to the current writer ("recommended")
 // dict for each table (so that they can be shared with new SSTables without consulting
 // `system.dicts`).
 //
-// To make coordination work without resorting to std::mutex and such, dicts have owner shards,
-// (and are borrowed by foreign shared pointers) and all requests for a given dict ID go through its owner.
-// (Note: this shouldn't pose a performance problem because a dict is only requested once per an opening of an SSTable).
-// (Note: at the moment of this writing, one shard owns all. Later we can spread the ownership. (E.g. shard it by dict hash)).
-//
 // Whenever a dictionary dies (because its refcount reaches 0), its weak pointer
 // is removed from the factory.
 //
@@ -774,10 +797,10 @@ size_t snappy_processor::compress_max_size(size_t input_len) const {
 // Has a configurable memory budget for live dicts. If the budget is exceeded,
 // will return null dicts to new writers (to avoid making the memory usage even worse)
 // and print warnings.
-class sstable_compressor_factory_impl : public sstable_compressor_factory, public weakly_referencable<sstable_compressor_factory_impl> {
+class dictionary_holder : public weakly_referencable<dictionary_holder> {
    mutable logger::rate_limit budget_warning_rate_limit{std::chrono::minutes(10)};
-    shard_id _owner_shard;
-    config _cfg;
+    using config = default_sstable_compressor_factory::config;
+    const config& _cfg;
    uint64_t _total_live_dict_memory = 0;
    metrics::metric_groups _metrics;
    struct zstd_cdict_id {
@@ -789,7 +812,7 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
    std::map<zstd_cdict_id, const zstd_cdict*> _zstd_cdicts;
    std::map<dict_id, const zstd_ddict*> _zstd_ddicts;
    std::map<dict_id, const lz4_cdict*> _lz4_cdicts;
-    std::map<table_id, lw_shared_ptr<const raw_dict>> _recommended;
+    std::map<table_id, lw_shared_ptr<foreign_ptr<lw_shared_ptr<const raw_dict>>>> _recommended;

    size_t memory_budget() const {
        return _cfg.memory_fraction_starting_at_which_we_stop_writing_dicts() * seastar::memory::stats().total_memory();
@@ -806,8 +829,11 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
            memory_budget()
        );
    }
+public:
    lw_shared_ptr<const raw_dict> get_canonical_ptr(std::span<const std::byte> dict) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (dict.empty()) {
+            return nullptr;
+        }
        auto id = get_sha256(dict);
        if (auto it = _raw_dicts.find(id); it != _raw_dicts.end()) {
            return it->second->shared_from_this();
@@ -819,7 +845,9 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
    }
    using foreign_zstd_ddict = foreign_ptr<lw_shared_ptr<const zstd_ddict>>;
    foreign_zstd_ddict get_zstd_dict_for_reading(lw_shared_ptr<const raw_dict> raw, int level) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (!raw) {
+            return nullptr;
+        }
        lw_shared_ptr<const zstd_ddict> ddict;
        // Fo reading, we must allocate a new dict, even if memory budget is exceeded. We have no other choice.
        // In any case, if the budget is exceeded after we print a rate-limited warning about it.
@@ -835,15 +863,11 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
        }
        return make_foreign(std::move(ddict));
    }
-    future<foreign_zstd_ddict> get_zstd_dict_for_reading(std::span<const std::byte> dict, int level) {
-        return smp::submit_to(_owner_shard, [this, dict, level] -> foreign_zstd_ddict {
-            auto raw = get_canonical_ptr(dict);
-            return get_zstd_dict_for_reading(raw, level);
-        });
-    }
    using foreign_zstd_cdict = foreign_ptr<lw_shared_ptr<const zstd_cdict>>;
    foreign_zstd_cdict get_zstd_dict_for_writing(lw_shared_ptr<const raw_dict> raw, int level) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (!_cfg.enable_writing_dictionaries() || !raw) {
+            return nullptr;
+        }
        lw_shared_ptr<const zstd_cdict> cdict;
        // If we can share an already-allocated dict, we do that regardless of memory budget.
        // If we would have to allocate a new dict for writing, we only do that if we haven't exceeded
@@ -859,19 +883,6 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
        }
        return make_foreign(std::move(cdict));
    }
-    future<foreign_zstd_cdict> get_zstd_dict_for_writing(table_id t, int level) {
-        return smp::submit_to(_owner_shard, [this, t, level] -> foreign_zstd_cdict {
-            if (!_cfg.enable_writing_dictionaries()) {
-                return {};
-            }
-            auto rec_it = _recommended.find(t);
-            if (rec_it != _recommended.end()) {
-                return get_zstd_dict_for_writing(rec_it->second, level);
-            } else {
-                return {};
-            }
-        });
-    }
    using lz4_dicts = std::pair<
        foreign_ptr<lw_shared_ptr<const raw_dict>>,
        foreign_ptr<lw_shared_ptr<const lz4_cdict>>
@@ -879,18 +890,12 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
    using foreign_lz4_ddict = foreign_ptr<lw_shared_ptr<const raw_dict>>;
    using foreign_lz4_cdict = foreign_ptr<lw_shared_ptr<const lz4_cdict>>;
    foreign_lz4_ddict get_lz4_dict_for_reading(lw_shared_ptr<const raw_dict> raw) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
-        lw_shared_ptr<const raw_dict> ddict;
        return make_foreign(std::move(raw));
    }
-    future<foreign_lz4_ddict> get_lz4_dicts_for_reading(std::span<const std::byte> dict) {
-        return smp::submit_to(_owner_shard, [this, dict] -> foreign_lz4_ddict {
-            auto raw = get_canonical_ptr(dict);
-            return get_lz4_dict_for_reading(raw);
-        });
-    }
    foreign_lz4_cdict get_lz4_dict_for_writing(lw_shared_ptr<const raw_dict> raw) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
+        if (!_cfg.enable_writing_dictionaries() || !raw) {
+            return nullptr;
+        }
        lw_shared_ptr<const lz4_cdict> cdict;
        // If we can share an already-allocated dict, we do that regardless of memory budget.
        // If we would have to allocate a new dict for writing, we only do that if we haven't exceeded
@@ -905,24 +910,10 @@ class sstable_compressor_factory_impl : public sstable_compressor_factory, publi
        }
        return make_foreign(std::move(cdict));
    }
-    future<foreign_lz4_cdict> get_lz4_dicts_for_writing(table_id t) {
-        return smp::submit_to(_owner_shard, [this, t] -> foreign_lz4_cdict {
-            if (!_cfg.enable_writing_dictionaries()) {
-                return {};
-            }
-            auto rec_it = _recommended.find(t);
-            if (rec_it != _recommended.end()) {
-                return get_lz4_dict_for_writing(rec_it->second);
-            } else {
-                return {};
-            }
-        });
-    }

 public:
-    sstable_compressor_factory_impl(config cfg)
-        : _owner_shard(this_shard_id())
-        , _cfg(std::move(cfg))
+    dictionary_holder(const config& cfg)
+        : _cfg(cfg)
    {
        if (_cfg.register_metrics) {
            namespace sm = seastar::metrics;
@@ -931,8 +922,8 @@ public:
            });
        }
    }
-    sstable_compressor_factory_impl(sstable_compressor_factory_impl&&) = delete;
-    ~sstable_compressor_factory_impl() {
+    dictionary_holder(dictionary_holder&&) = delete;
+    ~dictionary_holder() {
        // Note: `_recommended` might be the only thing keeping some dicts alive,
        // so clearing it will destroy them.
        //
@@ -948,39 +939,39 @@ public:
        _recommended.clear();
    }
    void forget_raw_dict(dict_id id) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _raw_dicts.erase(id);
    }
    void forget_zstd_cdict(dict_id id, int level) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _zstd_cdicts.erase({id, level});
    }
    void forget_zstd_ddict(dict_id id) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _zstd_ddicts.erase(id);
    }
    void forget_lz4_cdict(dict_id id) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        _lz4_cdicts.erase(id);
    }
-    future<> set_recommended_dict(table_id t, std::span<const std::byte> dict) override {
-        return smp::submit_to(_owner_shard, [this, t, dict] {
-            _recommended.erase(t);
-            if (dict.size()) {
-                auto canonical_ptr = get_canonical_ptr(dict);
-                _recommended.emplace(t, canonical_ptr);
-                compressor_factory_logger.debug("set_recommended_dict: table={} size={} id={}",
-                    t, dict.size(), fmt_hex(canonical_ptr->id()));
-            } else {
-                compressor_factory_logger.debug("set_recommended_dict: table={} size=0", t);
-            }
-        });
+    void set_recommended_dict(table_id t, foreign_ptr<lw_shared_ptr<const raw_dict>> dict) {
+        _recommended.erase(t);
+        if (dict) {
+            compressor_factory_logger.debug("set_recommended_dict: table={} size={} id={}",
+                t, dict->raw().size(), fmt_hex(dict->id()));
+            _recommended.emplace(t, make_lw_shared(std::move(dict)));
+        } else {
+            compressor_factory_logger.debug("set_recommended_dict: table={} size=0", t);
+        }
+    }
+    future<foreign_ptr<lw_shared_ptr<const raw_dict>>> get_recommended_dict(table_id t) {
+        auto rec_it = _recommended.find(t);
+        if (rec_it == _recommended.end()) {
+            co_return nullptr;
+        }
+        // Note that rec_it might be invalidated while we are doing the copy(),
+        // so we have to make a copy of the outer shared ptr first.
+        lw_shared_ptr<foreign_ptr<lw_shared_ptr<const raw_dict>>> ptr = rec_it->second;
+        co_return co_await ptr->copy();
    }
-    future<compressor_ptr> make_compressor_for_writing(schema_ptr) override;
-    future<compressor_ptr> make_compressor_for_reading(sstables::compression&) override;

    void account_memory_delta(ssize_t n) {
-        SCYLLA_ASSERT(this_shard_id() == _owner_shard);
        if (static_cast<ssize_t>(_total_live_dict_memory) + n < 0) {
            compressor_factory_logger.error(
                "Error in dictionary memory accounting: delta {} brings live memory {} below 0",
@@ -990,19 +981,85 @@ public:
    }
 };

+default_sstable_compressor_factory::default_sstable_compressor_factory(config cfg)
+    : _cfg(std::move(cfg))
+    , _holder(std::make_unique<dictionary_holder>(_cfg))
+{
+    for (shard_id i = 0; i < smp::count; ++i) {
+        auto numa_id = _cfg.numa_config[i];
+        _numa_groups.resize(std::max<size_t>(_numa_groups.size(), numa_id + 1));
+        _numa_groups[numa_id].push_back(i);
+    }
+}

-future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_writing(schema_ptr s) {
-    const auto params = s->get_compressor_params();
+default_sstable_compressor_factory::~default_sstable_compressor_factory() {
+}
+
+std::vector<unsigned> default_sstable_compressor_factory_config::get_default_shard_to_numa_node_mapping() {
+    auto sp = local_engine->smp().shard_to_numa_node_mapping();
+    return std::vector<unsigned>(sp.begin(), sp.end());
+}
+
+unsigned default_sstable_compressor_factory::local_numa_id() {
+    return _cfg.numa_config[this_shard_id()];
+}
+
+shard_id default_sstable_compressor_factory::get_dict_owner(unsigned numa_id, const sha256_type& sha) {
+    auto hash = read_unaligned<uint64_t>(sha.data());
+    const auto& group = _numa_groups[numa_id];
+    if (group.empty()) {
+        on_internal_error(compressor_factory_logger, "get_dict_owner called on an empty NUMA group");
+    }
+    return group[hash % group.size()];
+}
+
+future<> default_sstable_compressor_factory::set_recommended_dict_local(table_id t, std::span<const std::byte> dict) {
+    if (_leader_shard != this_shard_id()) {
+        on_internal_error(compressor_factory_logger, fmt::format("set_recommended_dict_local called on wrong shard. Expected: {}, got {}", _leader_shard, this_shard_id()));
+    }
+    auto units = co_await get_units(_recommendation_setting_sem, 1);
+    auto sha = get_sha256(dict);
+    for (unsigned numa_id = 0; numa_id < _numa_groups.size(); ++numa_id) {
+        const auto& group = _numa_groups[numa_id];
+        if (group.empty()) {
+            continue;
+        }
+        auto r = get_dict_owner(numa_id, sha);
+        auto d = co_await container().invoke_on(r, [dict](self& local) {
+            return make_foreign(local._holder->get_canonical_ptr(dict));
+        });
+        auto local_coordinator = group[0];
+        co_await container().invoke_on(local_coordinator, coroutine::lambda([t, d = std::move(d)](self& local) mutable {
+            local._holder->set_recommended_dict(t, std::move(d));
+        }));
+    }
+}
+
+future<> default_sstable_compressor_factory::set_recommended_dict(table_id t, std::span<const std::byte> dict) {
+    return container().invoke_on(_leader_shard, &self::set_recommended_dict_local, t, dict);
+}
+
+future<foreign_ptr<lw_shared_ptr<const raw_dict>>> default_sstable_compressor_factory::get_recommended_dict(table_id t) {
+    const auto local_coordinator = _numa_groups[local_numa_id()][0];
+    return container().invoke_on(local_coordinator, [t](self& local) {
+        return local._holder->get_recommended_dict(t);
+    });
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_writing_impl(const compression_parameters& params, table_id id) {
    using algorithm = compression_parameters::algorithm;
    const auto algo = params.get_algorithm();
-    compressor_factory_logger.debug("make_compressor_for_writing: table={} algo={}", s->id(), algo);
+    compressor_factory_logger.debug("make_compressor_for_writing: table={} algo={}", id, algo);
    switch (algo) {
    case algorithm::lz4:
        co_return std::make_unique<lz4_processor>(nullptr, nullptr);
    case algorithm::lz4_with_dicts: {
-        auto cdict = _cfg.enable_writing_dictionaries()
-            ? co_await get_lz4_dicts_for_writing(s->id())
-            : nullptr;
+        holder::foreign_lz4_cdict cdict;
+        if (auto recommended = co_await get_recommended_dict(id)) {
+            cdict = co_await container().invoke_on(recommended.get_owner_shard(), [recommended = std::move(recommended)] (self& local) mutable {
+                return local._holder->get_lz4_dict_for_writing(recommended.release());
+            });
+        }
        if (cdict) {
            compressor_factory_logger.debug("make_compressor_for_writing: using dict id={}", fmt_hex(cdict->id()));
        }
@@ -1015,9 +1072,13 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_writ
    case algorithm::zstd:
        co_return std::make_unique<zstd_processor>(params, nullptr, nullptr);
    case algorithm::zstd_with_dicts: {
-        auto cdict = _cfg.enable_writing_dictionaries()
-            ? co_await get_zstd_dict_for_writing(s->id(), params.zstd_compression_level().value_or(ZSTD_defaultCLevel()))
-            : nullptr;
+        holder::foreign_zstd_cdict cdict;
+        if (auto recommended = co_await get_recommended_dict(id)) {
+            auto level = params.zstd_compression_level().value_or(ZSTD_defaultCLevel());
+            cdict = co_await container().invoke_on(recommended.get_owner_shard(), [level, recommended = std::move(recommended)] (self& local) mutable {
+                return local._holder->get_zstd_dict_for_writing(recommended.release(), level);
+            });
+        }
        if (cdict) {
            compressor_factory_logger.debug("make_compressor_for_writing: using dict id={}", fmt_hex(cdict->id()));
        }
@@ -1029,17 +1090,28 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_writ
    abort();
 }

-future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_reading(sstables::compression& c) {
-    const auto params = compression_parameters(sstables::options_from_compression(c));
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_writing(schema_ptr s) {
+    return make_compressor_for_writing_impl(s->get_compressor_params(), s->id());
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_writing_for_tests(const compression_parameters& params, table_id id) {
+    return make_compressor_for_writing_impl(params, id);
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_reading_impl(const compression_parameters& params, std::span<const std::byte> dict) {
    using algorithm = compression_parameters::algorithm;
    const auto algo = params.get_algorithm();
-    compressor_factory_logger.debug("make_compressor_for_reading: compression={} algo={}", fmt::ptr(&c), algo);
    switch (algo) {
    case algorithm::lz4:
        co_return std::make_unique<lz4_processor>(nullptr, nullptr);
    case algorithm::lz4_with_dicts: {
-        auto dict = dict_from_options(c);
-        auto ddict = co_await get_lz4_dicts_for_reading(std::as_bytes(std::span(*dict)));
+        auto dict_span = dict;
+        auto sha = get_sha256(dict_span);
+        auto dict_owner = get_dict_owner(local_numa_id(), sha);
+        auto ddict = co_await container().invoke_on(dict_owner, [dict_span] (self& local) mutable {
+            auto d = local._holder->get_canonical_ptr(dict_span);
+            return local._holder->get_lz4_dict_for_reading(std::move(d));
+        });
        if (ddict) {
            compressor_factory_logger.debug("make_compressor_for_reading: using dict id={}", fmt_hex(ddict->id()));
        }
@@ -1054,8 +1126,13 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_read
    }
    case algorithm::zstd_with_dicts: {
        auto level = params.zstd_compression_level().value_or(ZSTD_defaultCLevel());
-        auto dict = dict_from_options(c);
-        auto ddict = co_await get_zstd_dict_for_reading(std::as_bytes(std::span(*dict)), level);
+        auto dict_span = dict;
+        auto sha = get_sha256(dict_span);
+        auto dict_owner = get_dict_owner(local_numa_id(), sha);
+        auto ddict = co_await container().invoke_on(dict_owner, [level, dict_span] (self& local) mutable {
+            auto d = local._holder->get_canonical_ptr(dict_span);
+            return local._holder->get_zstd_dict_for_reading(std::move(d), level);
+        });
        if (ddict) {
            compressor_factory_logger.debug("make_compressor_for_reading: using dict id={}", fmt_hex(ddict->id()));
        }
@@ -1067,7 +1144,19 @@ future<compressor_ptr> sstable_compressor_factory_impl::make_compressor_for_read
    abort();
 }

-raw_dict::raw_dict(sstable_compressor_factory_impl& owner, dict_id key, std::span<const std::byte> dict)
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_reading(sstables::compression& c) {
+    const auto params = compression_parameters(sstables::options_from_compression(c));
+    auto dict = dict_from_options(c);
+    const auto algo = params.get_algorithm();
+    compressor_factory_logger.debug("make_compressor_for_reading: compression={} algo={}", fmt::ptr(&c), algo);
+    co_return co_await make_compressor_for_reading_impl(params, std::as_bytes(std::span(*dict)));
+}
+
+future<compressor_ptr> default_sstable_compressor_factory::make_compressor_for_reading_for_tests(const compression_parameters& params, std::span<const std::byte> dict) {
+    return make_compressor_for_reading_impl(params, dict);
+}
+
+raw_dict::raw_dict(dictionary_holder& owner, dict_id key, std::span<const std::byte> dict)
    : _owner(owner.weak_from_this())
    , _id(key)
    , _dict(dict.begin(), dict.end())
@@ -1082,7 +1171,7 @@ raw_dict::~raw_dict() {
    }
 }

-zstd_cdict::zstd_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw, int level)
+zstd_cdict::zstd_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw, int level)
    : _owner(owner.weak_from_this())
    , _raw(raw)
    , _level(level)
@@ -1114,7 +1203,7 @@ zstd_cdict::~zstd_cdict() {
    }
 }

-zstd_ddict::zstd_ddict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw)
+zstd_ddict::zstd_ddict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw)
    : _owner(owner.weak_from_this())
    , _raw(raw)
    , _alloc([this] (ssize_t n) {
@@ -1143,7 +1232,7 @@ zstd_ddict::~zstd_ddict() {
    }
 }

-lz4_cdict::lz4_cdict(sstable_compressor_factory_impl& owner, lw_shared_ptr<const raw_dict> raw)
+lz4_cdict::lz4_cdict(dictionary_holder& owner, lw_shared_ptr<const raw_dict> raw)
    : _owner(owner.weak_from_this())
    , _raw(raw)
    , _dict(LZ4_createStream(), LZ4_freeStream)
@@ -1162,6 +1251,28 @@ lz4_cdict::~lz4_cdict() {
    }
 }

-std::unique_ptr<sstable_compressor_factory> make_sstable_compressor_factory(sstable_compressor_factory::config cfg) {
-    return std::make_unique<sstable_compressor_factory_impl>(std::move(cfg));
+std::unique_ptr<sstable_compressor_factory> make_sstable_compressor_factory_for_tests_in_thread() {
+    SCYLLA_ASSERT(thread::running_in_thread());
+    struct wrapper : sstable_compressor_factory {
+        using impl = default_sstable_compressor_factory;
+        sharded<impl> _impl;
+        future<compressor_ptr> make_compressor_for_writing(schema_ptr s) override {
+            return _impl.local().make_compressor_for_writing(s);
+        }
+        future<compressor_ptr> make_compressor_for_reading(sstables::compression& c) override {
+            return _impl.local().make_compressor_for_reading(c);
+        }
+        future<> set_recommended_dict(table_id t, std::span<const std::byte> d) override {
+            return _impl.local().set_recommended_dict(t, d);
+        };
+        wrapper(wrapper&&) = delete;
+        wrapper() {
+            _impl.start().get();
+        }
+        ~wrapper() {
+            _impl.stop().get();
+        }
+    };
+    return std::make_unique<wrapper>();
 }
+
--- a/compress.hh
+++ b/compress.hh
@@ -13,12 +13,9 @@

 #include <seastar/core/future.hh>
 #include <seastar/core/sstring.hh>
+#include <seastar/util/bool_class.hh>
 #include "seastarx.hh"

-namespace gms {
-class feature_service;
-} // namespace gms
-
 class compression_parameters;

 class compressor {
@@ -64,6 +61,8 @@ public:

    virtual algorithm get_algorithm() const = 0;

+    virtual std::optional<unsigned> get_dict_owner_for_test() const;
+
    using ptr_type = std::unique_ptr<compressor>;
 };

@@ -106,7 +105,10 @@ public:
    algorithm get_algorithm() const { return _algorithm; }
    std::optional<int> zstd_compression_level() const { return _zstd_compression_level; }

-    void validate(const gms::feature_service&);
+    using dicts_feature_enabled = bool_class<struct dicts_feature_enabled_tag>;
+    using dicts_usage_allowed = bool_class<struct dicts_usage_allowed_tag>;
+    void validate(dicts_feature_enabled, dicts_usage_allowed);
+
    std::map<sstring, sstring> get_options() const;

    bool compression_enabled() const { 
--- a/configure.py
+++ b/configure.py
@@ -981,6 +981,7 @@ scylla_core = (['message/messaging_service.cc',
                'cql3/result_set.cc',
                'cql3/prepare_context.cc',
                'db/batchlog_manager.cc',
+                'db/corrupt_data_handler.cc',
                'db/commitlog/commitlog.cc',
                'db/commitlog/commitlog_entry.cc',
                'db/commitlog/commitlog_replayer.cc',
@@ -1338,6 +1339,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/replica_exception.idl.hh',
        'idl/per_partition_rate_limit_info.idl.hh',
        'idl/position_in_partition.idl.hh',
+        'idl/full_position.idl.hh',
        'idl/experimental/broadcast_tables_lang.idl.hh',
        'idl/storage_service.idl.hh',
        'idl/join_node.idl.hh',
@@ -1538,6 +1540,7 @@ deps['test/boost/combined_tests'] += [
    'test/boost/secondary_index_test.cc',
    'test/boost/sessions_test.cc',
    'test/boost/sstable_compaction_test.cc',
+    'test/boost/sstable_compressor_factory_test.cc',
    'test/boost/sstable_directory_test.cc',
    'test/boost/sstable_set_test.cc',
    'test/boost/statement_restrictions_test.cc',
--- a/cql3/statements/cf_prop_defs.cc
+++ b/cql3/statements/cf_prop_defs.cc
@@ -23,6 +23,7 @@
 #include "db/per_partition_rate_limit_options.hh"
 #include "db/tablet_options.hh"
 #include "utils/bloom_calculations.hh"
+#include "db/config.hh"

 #include <boost/algorithm/string/predicate.hpp>

@@ -135,7 +136,9 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
            throw exceptions::configuration_exception(sstring("Missing sub-option '") + compression_parameters::SSTABLE_COMPRESSION + "' for the '" + KW_COMPRESSION + "' option.");
        }
        compression_parameters cp(*compression_options);
-        cp.validate(db.features());
+        cp.validate(
+            compression_parameters::dicts_feature_enabled(bool(db.features().sstable_compression_dicts)),
+            compression_parameters::dicts_usage_allowed(db.get_config().sstable_compression_dictionaries_allow_in_ddl()));
    }

    auto per_partition_rate_limit_options = get_per_partition_rate_limit_options(schema_extensions);
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -113,10 +113,9 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector
        if (rs->uses_tablets()) {
            warnings.push_back(
                "Tables in this keyspace will be replicated using Tablets "
-                "and will not support CDC, LWT and counters features. "
-                "To use CDC, LWT or counters, drop this keyspace and re-create it "
-                "without tablets by adding AND TABLETS = {'enabled': false} "
-                "to the CREATE KEYSPACE statement.");
+                "and will not support Materialized Views, Secondary Indexes, CDC, LWT and counters features. "
+                "To use Materialized Views, Secondary Indexes, CDC, LWT or counters, drop this keyspace and re-create it "
+                "without tablets by adding AND TABLETS = {'enabled': false} to the CREATE KEYSPACE statement.");
            if (ksm->initial_tablets().value()) {
                warnings.push_back("Keyspace `initial` tablets option is deprecated.  Use per-table tablet options instead.");
            }
--- a/db/CMakeLists.txt
+++ b/db/CMakeLists.txt
@@ -27,6 +27,7 @@ target_sources(db
    extensions.cc
    heat_load_balance.cc
    large_data_handler.cc
+    corrupt_data_handler.cc
    marshal/type_parser.cc
    batchlog_manager.cc
    tags/utils.cc
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -36,7 +36,7 @@

 static logging::logger blogger("batchlog_manager");

-const uint32_t db::batchlog_manager::replay_interval;
+const std::chrono::seconds db::batchlog_manager::replay_interval;
 const uint32_t db::batchlog_manager::page_size;

 db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
@@ -116,7 +116,8 @@ future<> db::batchlog_manager::batchlog_replay_loop() {
        } catch (...) {
            blogger.error("Exception in batch replay: {}", std::current_exception());
        }
-        delay = std::chrono::milliseconds(replay_interval);
+        delay = utils::get_local_injector().is_enabled("short_batchlog_manager_replay_interval") ?
+                std::chrono::seconds(1) : replay_interval;
    }
 }

@@ -132,6 +133,8 @@ future<> db::batchlog_manager::drain() {
        _sem.broken();
    }

+    co_await _qp.proxy().abort_batch_writes();
+
    co_await std::move(_loop_done);
    blogger.info("Drained");
 }
@@ -173,6 +176,11 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
            return make_ready_future<stop_iteration>(stop_iteration::no);
        }

+        if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
+            blogger.debug("Skipping batch replay due to skip_batch_replay injection");
+            return make_ready_future<stop_iteration>(stop_iteration::no);
+        }
+
        // check version of serialization format
        if (!row.has("version")) {
            blogger.warn("Skipping logged batch because of unknown version");
@@ -242,7 +250,8 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
                // send to partially or wholly fail in actually sending stuff. Since we don't
                // have hints (yet), send with CL=ALL, and hope we can re-do this soon.
                // See below, we use retry on write failure.
-                return _qp.proxy().mutate(mutations, db::consistency_level::ALL, db::no_timeout, nullptr, empty_service_permit(), db::allow_per_partition_rate_limit::no);
+                auto timeout = db::timeout_clock::now() + write_timeout;
+                return _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
            });
        }).then_wrapped([this, id](future<> batch_result) {
            try {
--- a/db/batchlog_manager.hh
+++ b/db/batchlog_manager.hh
@@ -43,8 +43,9 @@ public:
    using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;

 private:
-    static constexpr uint32_t replay_interval = 60 * 1000; // milliseconds
+    static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
    static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
+    static constexpr std::chrono::seconds write_timeout = std::chrono::seconds(300);

    using clock_type = lowres_clock;

--- a/db/config.cc
+++ b/db/config.cc
@@ -1230,7 +1230,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , sstable_summary_ratio(this, "sstable_summary_ratio", value_status::Used, 0.0005, "Enforces that 1 byte of summary is written for every N (2000 by default)"
        "bytes written to data file. Value must be between 0 and 1.")
    , components_memory_reclaim_threshold(this, "components_memory_reclaim_threshold", liveness::LiveUpdate, value_status::Used, .2, "Ratio of available memory for all in-memory components of SSTables in a shard beyond which the memory will be reclaimed from components until it falls back under the threshold. Currently, this limit is only enforced for bloom filters.")
-    , large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, (size_t(128) << 10) + 1, "Warn about memory allocations above this size; set to zero to disable.")
+    , large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, size_t(1) << 20, "Warn about memory allocations above this size; set to zero to disable.")
    , enable_deprecated_partitioners(this, "enable_deprecated_partitioners", value_status::Used, false, "Enable the byteordered and random partitioners. These partitioners are deprecated and will be removed in a future version.")
    , enable_keyspace_column_family_metrics(this, "enable_keyspace_column_family_metrics", value_status::Used, false, "Enable per keyspace and per column family metrics reporting.")
    , enable_node_aggregated_table_metrics(this, "enable_node_aggregated_table_metrics", value_status::Used, true, "Enable aggregated per node, per keyspace and per table metrics reporting, applicable if enable_keyspace_column_family_metrics is false.")
@@ -1243,6 +1243,13 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format.  Deprecated, please use \"sstable_format\" instead.")
    , enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format.  Deprecated, please use \"sstable_format\" instead.")
    , sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
+    , sstable_compression_dictionaries_allow_in_ddl(this, "sstable_compression_dictionaries_allow_in_ddl", liveness::LiveUpdate, value_status::Used, true,
+        "Allows for configuring tables to use SSTable compression with shared dictionaries. "
+        "If the option is disabled, Scylla will reject CREATE and ALTER statements which try to set dictionary-based sstable compressors.\n"
+        "This is only enforced when this node validates a new DDL statement; disabling the option won't disable dictionary-based compression "
+        "on tables which already have it configured, and won't do anything to existing sstables.\n"
+        "To affect existing tables, you can ALTER them to a non-dictionary compressor, or disable dictionary compression "
+        "for the whole node through `sstable_compression_dictionaries_enable_writing`.")
    , sstable_compression_dictionaries_enable_writing(this, "sstable_compression_dictionaries_enable_writing", liveness::LiveUpdate, value_status::Used, true,
        "Enables SSTable compression with shared dictionaries (for tables which opt in). If set to false, this node won't write any new SSTables using dictionary compression.\n"
        "Option meant not for regular usage, but for unforeseen problems that call for disabling dictionaries without modifying table schema.")
--- a/db/config.hh
+++ b/db/config.hh
@@ -436,6 +436,7 @@ public:
    named_value<bool> enable_sstables_mc_format;
    named_value<bool> enable_sstables_md_format;
    named_value<sstring> sstable_format;
+    named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
    named_value<bool> sstable_compression_dictionaries_enable_writing;
    named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
    named_value<float> sstable_compression_dictionaries_retrain_period_in_seconds;
--- a/db/corrupt_data_handler.cc
+++ b/db/corrupt_data_handler.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "db/corrupt_data_handler.hh"
+#include "reader_concurrency_semaphore.hh"
+#include "replica/database.hh"
+#include "utils/UUID_gen.hh"
+
+static logging::logger corrupt_data_logger("corrupt_data");
+
+namespace sm = seastar::metrics;
+
+namespace db {
+
+corrupt_data_handler::corrupt_data_handler(register_metrics rm) {
+    if (rm) {
+        _metrics.add_group("corrupt_data", {
+                sm::make_counter("entries_reported", _stats.corrupt_data_reported,
+                               sm::description("Counts the number of corrupt data instances reported to the corrupt data handler. "
+                                               "A non-zero value indicates that the database suffered data corruption."))
+                });
+    }
+}
+
+future<corrupt_data_handler::entry_id> corrupt_data_handler::record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    ++_stats.corrupt_data_reported;
+    ++_stats.corrupt_clustering_rows_reported;
+    return do_record_corrupt_clustering_row(s, pk, std::move(cr), std::move(origin), std::move(sstable_name)).then([this] (entry_id id) {
+        if (id) {
+            ++_stats.corrupt_data_recorded;
+            ++_stats.corrupt_clustering_rows_recorded;
+        }
+        return id;
+    });
+}
+
+system_table_corrupt_data_handler::system_table_corrupt_data_handler(config cfg, register_metrics rm)
+    : corrupt_data_handler(rm)
+    , _entry_ttl(cfg.entry_ttl)
+    , _sys_ks("system_table_corrupt_data_handler::system_keyspace")
+{
+}
+
+system_table_corrupt_data_handler::~system_table_corrupt_data_handler() {
+}
+
+reader_permit system_table_corrupt_data_handler::make_fragment_permit(const schema& s) {
+    return _fragment_semaphore->make_tracking_only_permit(s.shared_from_this(), "system_table_corrupt_data_handler::make_fragment_permit", db::no_timeout, {});
+}
+
+future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_mutation_fragment(
+        pluggable_system_keyspace::permit sys_ks,
+        const schema& user_table_schema,
+        const partition_key& pk,
+        const clustering_key& ck,
+        mutation_fragment_v2::kind kind,
+        frozen_mutation_fragment_v2 fmf,
+        sstring origin,
+        std::optional<sstring> sstable_name) {
+    const corrupt_data_handler::entry_id id{utils::UUID_gen::get_time_UUID()};
+
+    const auto corrupt_data_schema = sys_ks->local_db().find_column_family(system_keyspace::NAME, system_keyspace::CORRUPT_DATA).schema();
+
+    // Using the lower-level mutation API to avoid large allocation warnings when linearizing the frozen mutation fragment.
+    mutation entry_mutation(corrupt_data_schema, partition_key::from_exploded(*corrupt_data_schema, {serialized(user_table_schema.ks_name()), serialized(user_table_schema.cf_name())}));
+    auto& entry_row = entry_mutation.partition().clustered_row(*corrupt_data_schema, clustering_key::from_single_value(*corrupt_data_schema, serialized(timeuuid_native_type{id.uuid()})));
+
+    const auto timestamp = api::new_timestamp();
+
+    auto set_cell_raw = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, managed_bytes cell_value) {
+        auto cdef = corrupt_data_schema->get_column_definition(cell_name);
+        SCYLLA_ASSERT(cdef);
+
+        entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value, _entry_ttl));
+    }; 
+
+    auto set_cell = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, data_value cell_value) {
+        auto cdef = corrupt_data_schema->get_column_definition(cell_name);
+        SCYLLA_ASSERT(cdef);
+
+        entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value.serialize_nonnull(), _entry_ttl));
+    };
+
+    entry_row.apply(row_marker(timestamp, _entry_ttl, gc_clock::now() + _entry_ttl));
+    set_cell("partition_key", data_value(to_bytes(pk.representation())));
+    set_cell("clustering_key", data_value(to_bytes(ck.representation())));
+    set_cell("mutation_fragment_kind", fmt::to_string(kind));
+    // FIXME: Exposing knowledge here that bytes are serialized by just storing the raw value.
+    // Need to replace with a fragmented-buffer serialize API call, which we don't have yet.
+    set_cell_raw("frozen_mutation_fragment", std::move(fmf).representation().to_managed_bytes());
+    set_cell("origin", origin);
+    set_cell("sstable_name", sstable_name);
+
+    return sys_ks->apply_mutation(std::move(entry_mutation)).then([id] {
+        return id;
+    });
+}
+
+future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    auto sys_ks = _sys_ks.get_permit();
+    if (!sys_ks) {
+        co_return corrupt_data_handler::entry_id::create_null_id();
+    }
+
+    const auto ck = cr.key();
+    auto fmf = freeze(s, mutation_fragment_v2(s, make_fragment_permit(s), std::move(cr)));
+
+    co_return co_await do_record_corrupt_mutation_fragment(std::move(sys_ks), s, pk, ck, mutation_fragment_v2::kind::clustering_row, std::move(fmf),
+            std::move(origin), std::move(sstable_name));
+}
+
+void system_table_corrupt_data_handler::plug_system_keyspace(db::system_keyspace& sys_ks) noexcept {
+    _sys_ks.plug(sys_ks.shared_from_this());
+    _fragment_semaphore = std::make_unique<reader_concurrency_semaphore>(reader_concurrency_semaphore::no_limits{}, "system_table_corrupt_data_handler", reader_concurrency_semaphore::register_metrics::no);
+}
+
+future<> system_table_corrupt_data_handler::unplug_system_keyspace() noexcept {
+    co_await _sys_ks.unplug();
+    co_await _fragment_semaphore->stop();
+}
+
+future<corrupt_data_handler::entry_id> nop_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    return make_ready_future<entry_id>(entry_id::create_null_id());
+}
+
+} // namespace db
--- a/db/corrupt_data_handler.hh
+++ b/db/corrupt_data_handler.hh
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "db/system_keyspace.hh"
+#include "utils/UUID.hh"
+#include "utils/pluggable.hh"
+
+class reader_concurrency_semaphore;
+class reader_permit;
+
+namespace db {
+
+class corrupt_data_handler {
+public:
+    // An ID identifying the corrupt data entry.
+    // To be interpreted in the context of the storage where it is recorded, see storage_name().
+    using entry_id = utils::tagged_uuid<struct corrupt_data_entry_tag>;
+
+    struct stats {
+        // Counters for the number of corrupt data entries reported.
+        uint64_t corrupt_data_reported = 0;
+        // Counters for the number of corrupt data entries recorded.
+        // Can be less than reported depending on the configuration or if entries failed to be recorded.
+        uint64_t corrupt_data_recorded = 0;
+
+        uint64_t corrupt_clustering_rows_reported = 0;
+        uint64_t corrupt_clustering_rows_recorded = 0;
+    };
+
+private:
+    stats _stats;
+
+    seastar::metrics::metric_groups _metrics;
+
+protected:
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) = 0;
+
+public:
+    using register_metrics = bool_class<struct corrupt_data_handler_register_metrics_tag>;
+    explicit corrupt_data_handler(register_metrics);
+    virtual ~corrupt_data_handler() = default;
+
+    const stats& get_stats() const noexcept {
+        return _stats;
+    }
+
+    // The name of the storage where corrupt data is recorded.
+    // The storage-name and the entry-id together should allow the user to unambiguously locate the entry.
+    virtual sstring storage_name() const noexcept = 0;
+
+    // Record a corrupt clustering row.
+    // If the returned id is null, the row was not recorded.
+    future<entry_id> record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name);
+};
+
+// Stores corrupt data entries in the system.corrupt_data table.
+class system_table_corrupt_data_handler final : public corrupt_data_handler {
+public:
+    using pluggable_system_keyspace = utils::pluggable<db::system_keyspace>;
+
+    struct config {
+        gc_clock::duration entry_ttl;
+    };
+
+private:
+    gc_clock::duration _entry_ttl;
+
+    pluggable_system_keyspace _sys_ks;
+    std::unique_ptr<reader_concurrency_semaphore> _fragment_semaphore;
+
+private:
+    reader_permit make_fragment_permit(const schema& s);
+
+    future<entry_id> do_record_corrupt_mutation_fragment(pluggable_system_keyspace::permit sys_ks, const schema& user_table_schema, const partition_key& pk, const clustering_key& ck,
+            mutation_fragment_v2::kind kind, frozen_mutation_fragment_v2 mf, sstring origin, std::optional<sstring> sstable_name);
+
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
+
+public:
+    explicit system_table_corrupt_data_handler(config, register_metrics);
+    ~system_table_corrupt_data_handler();
+
+    virtual sstring storage_name() const noexcept override {
+        return format("{}.{}", db::system_keyspace::NAME, db::system_keyspace::CORRUPT_DATA);
+    }
+
+    void plug_system_keyspace(db::system_keyspace& sys_ks) noexcept;
+    future<> unplug_system_keyspace() noexcept;
+};
+
+// A no-op corrupt data handler that does not record any data.
+class nop_corrupt_data_handler final : public corrupt_data_handler {
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
+
+public:
+    explicit nop_corrupt_data_handler(register_metrics rm)
+        : corrupt_data_handler(rm) {}
+    virtual sstring storage_name() const noexcept override {
+        return "/dev/null";
+    }
+};
+
+} // namespace db
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -148,7 +148,7 @@ cql_table_large_data_handler::cql_table_large_data_handler(gms::feature_service&

 template <typename... Args>
 future<> cql_table_large_data_handler::try_record(std::string_view large_table, const sstables::sstable& sst,  const sstables::key& partition_key, int64_t size,
-        std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const {
+        std::string_view size_desc, std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const {
    auto sys_ks = _sys_ks.get_permit();
    if (!sys_ks) {
        co_return;
@@ -168,7 +168,7 @@ future<> cql_table_large_data_handler::try_record(std::string_view large_table,
    const auto sstable_name = large_data_handler::sst_filename(sst);
    std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
    auto timestamp = db_clock::now();
-    large_data_logger.warn("Writing large {} {}/{}: {} ({} bytes) to {}", desc, ks_name, cf_name, extra_path, size, sstable_name);
+    large_data_logger.warn("Writing large {} {}/{}: {} ({}) to {}", desc, ks_name, cf_name, extra_path, size_desc, sstable_name);
    co_await sys_ks->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
            .discard_result()
            .handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
@@ -184,12 +184,14 @@ future<> cql_table_large_data_handler::record_large_partitions(const sstables::s

 future<> cql_table_large_data_handler::internal_record_large_partitions(const sstables::sstable& sst, const sstables::key& key,
        uint64_t partition_size, uint64_t rows) const {
-    return try_record("partition", sst, key, int64_t(partition_size), "partition", "", {"rows"}, data_value((int64_t)rows));
+    const sstring size_desc = seastar::format("{} bytes/{} rows", partition_size, rows);
+    return try_record("partition", sst, key, int64_t(partition_size), size_desc, "partition", "", {"rows"}, data_value((int64_t)rows));
 }

 future<> cql_table_large_data_handler::internal_record_large_partitions_all_data(const sstables::sstable& sst, const sstables::key& key,
        uint64_t partition_size, uint64_t rows, uint64_t range_tombstones, uint64_t dead_rows) const {
-    return try_record("partition", sst, key, int64_t(partition_size), "partition", "", {"rows", "range_tombstones", "dead_rows"},
+    const sstring size_desc = seastar::format("{} bytes/{} rows", partition_size, rows);
+    return try_record("partition", sst, key, int64_t(partition_size), size_desc, "partition", "", {"rows", "range_tombstones", "dead_rows"},
                data_value((int64_t)rows), data_value((int64_t)range_tombstones), data_value((int64_t)dead_rows));
 }

@@ -203,13 +205,14 @@ future<> cql_table_large_data_handler::internal_record_large_cells(const sstable
    auto column_name = cdef.name_as_text();
    std::string_view cell_type = cdef.is_atomic() ? "cell" : "collection";
    static const std::vector<sstring> extra_fields{"clustering_key", "column_name"};
+    const sstring size_desc = seastar::format("{} bytes", cell_size);
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, cell_type, column_name, extra_fields, ck_str, column_name);
    } else {
        auto desc = seastar::format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -217,26 +220,28 @@ future<> cql_table_large_data_handler::internal_record_large_cells_and_collectio
        const clustering_key_prefix* clustering_key, const column_definition& cdef, uint64_t cell_size, uint64_t collection_elements) const {
    auto column_name = cdef.name_as_text();
    std::string_view cell_type = cdef.is_atomic() ? "cell" : "collection";
+    const sstring size_desc = seastar::format("{} bytes", cell_size);
    static const std::vector<sstring> extra_fields{"clustering_key", "column_name", "collection_elements"};
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
    } else {
        auto desc = seastar::format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
    }
 }

 future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable& sst, const sstables::key& partition_key,
        const clustering_key_prefix* clustering_key, uint64_t row_size) const {
    static const std::vector<sstring> extra_fields{"clustering_key"};
+    const sstring size_desc = seastar::format("{} bytes", row_size);
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        std::string ck_str = key_to_str(*clustering_key, s);
-        return try_record("row", sst, partition_key, int64_t(row_size), "row", "", extra_fields, ck_str);
+        return try_record("row", sst, partition_key, int64_t(row_size), size_desc, "row", "", extra_fields, ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
+        return try_record("row", sst, partition_key, int64_t(row_size), size_desc, "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -188,7 +188,7 @@ private:
 private:
    template <typename... Args>
    future<> try_record(std::string_view large_table, const sstables::sstable& sst,  const sstables::key& partition_key, int64_t size,
-            std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const;
+            std::string_view size_desc, std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const;
 };

 class nop_large_data_handler : public large_data_handler {
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -36,6 +36,7 @@
 #include "db/schema_tables.hh"
 #include "gms/generation-number.hh"
 #include "service/storage_service.hh"
+#include "service/storage_proxy.hh"
 #include "service/paxos/paxos_state.hh"
 #include "query-result-set.hh"
 #include "idl/frozen_mutation.dist.hh"
@@ -763,6 +764,35 @@ schema_ptr system_keyspace::large_cells() {
    return large_cells;
 }

+schema_ptr system_keyspace::corrupt_data() {
+    static thread_local auto corrupt_data = [] {
+        auto id = generate_legacy_id(NAME, CORRUPT_DATA);
+        return schema_builder(NAME, CORRUPT_DATA, id)
+                // partition key
+                .with_column("keyspace_name", utf8_type, column_kind::partition_key)
+                .with_column("table_name", utf8_type, column_kind::partition_key)
+                // clustering key
+                .with_column("id", timeuuid_type, column_kind::clustering_key)
+                // regular rows
+                // Storing keys as bytes: having a corrupt key might be the reason
+                // to record the row as corrupt, so we just dump what we have and
+                // leave interpreting to the lucky person investigating the disaster.
+                .with_column("partition_key", bytes_type)
+                .with_column("clustering_key", bytes_type)
+                // Note: mutation-fragment v2
+                .with_column("mutation_fragment_kind", utf8_type)
+                .with_column("frozen_mutation_fragment", bytes_type)
+                .with_column("origin", utf8_type)
+                .with_column("sstable_name", utf8_type)
+                // options
+                .set_comment("mutation-fragments found to be corrupted")
+                .set_gc_grace_seconds(0)
+                .with_hash_version()
+                .build();
+    }();
+    return corrupt_data;
+}
+
 static constexpr auto schema_gc_grace = std::chrono::duration_cast<std::chrono::seconds>(days(7)).count();

 /*static*/ schema_ptr system_keyspace::scylla_local() {
@@ -2312,6 +2342,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
                    peers(), peer_events(), range_xfers(),
                    compactions_in_progress(), compaction_history(),
                    sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
+                    corrupt_data(),
                    scylla_local(), db::schema_tables::scylla_table_schema_history(),
                    repair_history(),
                    v3::views_builds_in_progress(), v3::built_views(),
@@ -3573,4 +3604,12 @@ future<::shared_ptr<cql3::untyped_result_set>> system_keyspace::execute_cql(cons
    return _qp.execute_internal(query_string, values, cql3::query_processor::cache_internal::yes);
 }

+future<> system_keyspace::apply_mutation(mutation m) {
+    if (m.schema()->ks_name() != NAME) {
+        on_internal_error(slogger, fmt::format("system_keyspace::apply_mutation(): attempted to apply mutation belonging to table {}.{}", m.schema()->cf_name(), m.schema()->ks_name()));
+    }
+
+    return _qp.proxy().mutate_locally(m, {}, db::commitlog::force_sync(m.schema()->static_props().wait_for_sync_to_commitlog), db::no_timeout);
+}
+
 } // namespace db
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -142,6 +142,7 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
    static schema_ptr large_partitions();
    static schema_ptr large_rows();
    static schema_ptr large_cells();
+    static schema_ptr corrupt_data();
    static schema_ptr scylla_local();
    future<> force_blocking_flush(sstring cfname);
    // This function is called when the system.peers table is read,
@@ -174,6 +175,7 @@ public:
    static constexpr auto LARGE_PARTITIONS = "large_partitions";
    static constexpr auto LARGE_ROWS = "large_rows";
    static constexpr auto LARGE_CELLS = "large_cells";
+    static constexpr auto CORRUPT_DATA = "corrupt_data";
    static constexpr auto SCYLLA_LOCAL = "scylla_local";
    static constexpr auto RAFT = "raft";
    static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
@@ -692,6 +694,10 @@ public:
        return execute_cql(req, { data_value(std::forward<Args>(args))... });
    }

+    // Apply write as mutation to the system keyspace.
+    // Mutation has to belong to a table int he system keyspace.
+    future<> apply_mutation(mutation m);
+
    friend future<column_mapping> db::schema_tables::get_column_mapping(db::system_keyspace& sys_ks, ::table_id table_id, table_schema_version version);
    friend future<bool> db::schema_tables::column_mapping_exists(db::system_keyspace& sys_ks, table_id table_id, table_schema_version version);
    friend future<> db::schema_tables::drop_column_mapping(db::system_keyspace& sys_ks, table_id table_id, table_schema_version version);
--- a/dist/common/scripts/scylla_sysconfig_setup
+++ b/dist/common/scripts/scylla_sysconfig_setup
@@ -86,9 +86,9 @@ if __name__ == '__main__':
    ethpciid = ''
    if network_mode == 'dpdk':
        dpdk_status = out('/opt/scylladb/scripts/dpdk-devbind.py --status')
-        match = re.search('if={} drv=(\S+)'.format(ifname), dpdk_status, flags=re.MULTILINE)
+        match = re.search(r'if={} drv=(\S+)'.format(ifname), dpdk_status, flags=re.MULTILINE)
        ethdrv = match.group(1)
-        match = re.search('^(\\S+:\\S+:\\S+\.\\S+) [^\n]+ if={} '.format(ifname), dpdk_status, flags=re.MULTILINE)
+        match = re.search(r'^(\S+:\S+:\S+\.\S+) [^\n]+ if={} '.format(ifname), dpdk_status, flags=re.MULTILINE)
        ethpciid = match.group(1)

    if args.mode:
--- a/dist/debian/control.template
+++ b/dist/debian/control.template
@@ -18,7 +18,7 @@ Breaks: scylla-enterprise-conf (<< 2025.1.0~)

 Package: %{product}-server
 Architecture: any
-Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version})
+Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version}), procps
 Replaces: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
 Breaks: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
 Description: Scylla database server binaries
--- a/dist/docker/redhat/build_docker.sh
+++ b/dist/docker/redhat/build_docker.sh
@@ -88,7 +88,7 @@ bcp LICENSE-ScyllaDB-Source-Available.md /licenses/

 run microdnf clean all
 run microdnf --setopt=tsflags=nodocs -y update
-run microdnf --setopt=tsflags=nodocs -y install hostname python3 python3-pip kmod
+run microdnf --setopt=tsflags=nodocs -y install hostname kmod procps-ng python3 python3-pip
 run microdnf clean all
 run pip3 install --no-cache-dir --prefix /usr supervisor
 run bash -ec "echo LANG=C.UTF-8 > /etc/locale.conf"
--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -71,6 +71,7 @@ Group:          Applications/Databases
 Summary:        The Scylla database server
 Requires:       %{product}-conf = %{version}-%{release}
 Requires:       %{product}-python3 = %{version}-%{release}
+Requires:       procps-ng
 AutoReqProv:    no
 Provides:       %{product}-tools:%{_bindir}/nodetool
 Provides:       %{product}-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
--- a/docs/_ext/utils.py
+++ b/docs/_ext/utils.py
@@ -22,6 +22,8 @@ def readable_desc_rst(description):

        cleaned_line = line.replace('\\n', '\n')

+        cleaned_line = cleaned_line.replace('\\t', '\n' + indent * 2)
+        
        if line.endswith('"'):
            cleaned_line = cleaned_line[:-1] + ' '

--- a/docs/_static/data/os-support.json
+++ b/docs/_static/data/os-support.json
@@ -1,15 +1,24 @@
 {
    "Linux Distributions": {
-      "Ubuntu": ["20.04", "22.04", "24.04"],
+      "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
      "Debian": ["11"],
      "Rocky / CentOS / RHEL": ["8", "9"],
      "Amazon Linux": ["2023"]
    },
    "ScyllaDB Versions": [
      {
-        "version": "Enterprise 2025.1",
+        "version": "ScyllaDB 2025.2",
        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04"],
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
+          "Debian": ["11"],
+          "Rocky / CentOS / RHEL": ["8", "9"],
+          "Amazon Linux": ["2023"]
+        }
+      },
+      {
+        "version": "ScyllaDB 2025.1",
+        "supported_OS": {
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
          "Debian": ["11"],
          "Rocky / CentOS / RHEL": ["8", "9"],
          "Amazon Linux": ["2023"]
@@ -18,7 +27,7 @@
      {
        "version": "Enterprise 2024.2",
        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04"],
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04"],
          "Debian": ["11"],
          "Rocky / CentOS / RHEL": ["8", "9"],
          "Amazon Linux": ["2023"]
@@ -27,20 +36,11 @@
      {
        "version": "Enterprise 2024.1",
        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04*"],
+          "Ubuntu": ["20.04 (deprecated)", "22.04", "24.04*"],
          "Debian": ["11"],
          "Rocky / CentOS / RHEL": ["8", "9"],
          "Amazon Linux": []
        }
-      },
-      {
-        "version": "Open Source 6.2",
-        "supported_OS": {
-          "Ubuntu": ["20.04", "22.04", "24.04"],
-          "Debian": ["11"],
-          "Rocky / CentOS / RHEL": ["8", "9"],
-          "Amazon Linux": ["2023"]
-        }
      }
    ]
  }
--- a/docs/_utils/redirects.yaml
+++ b/docs/_utils/redirects.yaml
@@ -2,6 +2,11 @@
 #old path: new path


+# Remove reduntant pages
+
+/stable/getting-started/tutorials: https://docs.scylladb.com/stable/get-started/develop-with-scylladb/tutorials-example-projects.html
+/stable/contribute: https://github.com/scylladb/scylladb/blob/master/CONTRIBUTING.md
+
 # Remove an oudated article

 /stable/troubleshooting/nodetool-memory-read-timeout.html: /stable/troubleshooting/index.html
--- a/docs/architecture/raft.rst
+++ b/docs/architecture/raft.rst
@@ -58,112 +58,12 @@ of nodes in the cluster is available. The following examples illustrate how Raft

 In summary, Raft makes schema changes safe, but it requires that a quorum of nodes in the cluster is available.

-.. _verify-raft-procedure:
-
-Verifying that the Raft upgrade procedure finished successfully
-========================================================================
-
-You may need to perform the following procedure as part of
-the :ref:`manual recovery procedure <recovery-procedure>`.
-
-The Raft upgrade procedure requires **full cluster availability** to correctly setup the Raft algorithm; after the setup finishes, Raft can proceed with only a majority of nodes, but this initial setup is an exception.
-An unlucky event, such as a hardware failure, may cause one of your nodes to fail. If this happens before the Raft upgrade procedure finishes, the procedure will get stuck and your intervention will be required.
-
-To verify that the procedure finishes, look at the log of every ScyllaDB node (using ``journalctl _COMM=scylla``). Search for the following patterns:
-
-* ``Starting internal upgrade-to-raft procedure`` denotes the start of the procedure,
-* ``Raft upgrade finished`` denotes the end.
-
-The following is an example of a log from a node which went through the procedure correctly. Some parts were truncated for brevity:
-
-.. code-block:: console
-
-    features - Feature SUPPORTS_RAFT_CLUSTER_MANAGEMENT is enabled
-    raft_group0 - finish_setup_after_join: SUPPORTS_RAFT feature enabled. Starting internal upgrade-to-raft procedure.
-    raft_group0_upgrade - starting in `use_pre_raft_procedures` state.
-    raft_group0_upgrade - Waiting until everyone is ready to start upgrade...
-    raft_group0_upgrade - Joining group 0...
-    raft_group0 - server 624fa080-8c0e-4e3d-acf6-10af473639ca joined group 0 with group id 8f8a1870-5c4e-11ed-bb13-fe59693a23c9
-    raft_group0_upgrade - Waiting until every peer has joined Raft group 0...
-    raft_group0_upgrade - Every peer is a member of Raft group 0.
-    raft_group0_upgrade - Waiting for schema to synchronize across all nodes in group 0...
-    raft_group0_upgrade - synchronize_schema: my version: a37a3b1e-5251-3632-b6b4-a9468a279834
-    raft_group0_upgrade - synchronize_schema: schema mismatches: {}. 3 nodes had a matching version.
-    raft_group0_upgrade - synchronize_schema: finished.
-    raft_group0_upgrade - Entering synchronize state.
-    raft_group0_upgrade - Schema changes are disabled in synchronize state. If a failure makes us unable to proceed, manual recovery will be required.
-    raft_group0_upgrade - Waiting for all peers to enter synchronize state...
-    raft_group0_upgrade - All peers in synchronize state. Waiting for schema to synchronize...
-    raft_group0_upgrade - synchronize_schema: collecting schema versions from group 0 members...
-    raft_group0_upgrade - synchronize_schema: collected remote schema versions.
-    raft_group0_upgrade - synchronize_schema: my version: a37a3b1e-5251-3632-b6b4-a9468a279834
-    raft_group0_upgrade - synchronize_schema: schema mismatches: {}. 3 nodes had a matching version.
-    raft_group0_upgrade - synchronize_schema: finished.
-    raft_group0_upgrade - Schema synchronized.
-    raft_group0_upgrade - Raft upgrade finished.
-
-In a functioning cluster with good network connectivity the procedure should take no more than a few seconds.
-Network issues may cause the procedure to take longer, but if all nodes are alive and the network is eventually functional (each pair of nodes is eventually connected), the procedure will eventually finish.
-
-Note the following message, which appears in the log presented above:
-
-.. code-block:: console
-
-    Schema changes are disabled in synchronize state. If a failure makes us unable to proceed, manual recovery will be required.
-
-During the procedure, there is a brief window while schema changes are disabled. This is when the schema change mechanism switches from the older unsafe algorithm to the safe Raft-based algorithm. If everything runs smoothly, this window will be unnoticeable; the procedure is designed to minimize that window's length. However, if the procedure gets stuck e.g. due to network connectivity problem, ScyllaDB will return the following error when trying to perform a schema change during this window:
-
-.. code-block:: console
-
-    Cannot perform schema or topology changes during this time; the cluster is currently upgrading to use Raft for schema operations.
-    If this error keeps happening, check the logs of your nodes to learn the state of upgrade. The upgrade procedure may get stuck
-    if there was a node failure.
-
-In the next example, one of the nodes had a power outage before the procedure could finish. The following shows a part of another node's logs:
-
-.. code-block:: console
-
-    raft_group0_upgrade - Entering synchronize state.
-    raft_group0_upgrade - Schema changes are disabled in synchronize state. If a failure makes us unable to proceed, manual recovery will be required.
-    raft_group0_upgrade - Waiting for all peers to enter synchronize state...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.3 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.1 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: retrying in a while...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.1 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: retrying in a while...
-    ...
-    raft_group0_upgrade - Raft upgrade procedure taking longer than expected. Please check if all nodes are live and the network is healthy. If the upgrade procedure does not progress even though the cluster is healthy, try performing a rolling restart of the cluster. If that doesn 't help or some nodes are dead and irrecoverable, manual recovery may be required. Consult the relevant documentation.
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: node 127.90.69.1 not in synchronize state yet...
-    raft_group0_upgrade - wait_for_peers_to_enter_synchronize_state: retrying in a while...
-
-.. TODO: the 'Consult the relevant documentation' message must be updated to point to this doc.
-
-Note the following message:
-
-.. code-block:: console
-
-    raft_group0_upgrade - Raft upgrade procedure taking longer than expected. Please check if all nodes are live and the network is healthy. If the upgrade procedure does not progress even though the cluster is healthy, try performing a rolling restart of the cluster. If that doesn 't help or some nodes are dead and irrecoverable, manual recovery may be required. Consult the relevant documentation.
-
-If the Raft upgrade procedure is stuck, this message will appear periodically in each node's logs.
-
-The message suggests the initial course of action:
-
-* Check if all nodes are alive.
-* If a node is down but can be restarted, restart it.
-* If all nodes are alive, ensure that the network is healthy: that every node is reachable from every other node.
-* If all nodes are alive and the network is healthy, perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of the cluster.
-
-One of the reasons why the procedure may get stuck is a pre-existing problem in schema definitions which causes schema to be unable to synchronize in the cluster. The procedure cannot proceed unless it ensures that schema is synchronized.
-If **all nodes are alive and the network is healthy**, you performed a rolling restart, but the issue still persists, contact `ScyllaDB support <https://www.scylladb.com/product/support/>`_ for assistance.
-
-If some nodes are **dead and irrecoverable**, you'll need to perform a manual recovery procedure. Consult :ref:`the section about Raft recovery <recovery-procedure>`.
-
 .. _raft-topology-changes:

 Consistent Topology with Raft
 -----------------------------------------------------------------

-ScyllaDB can use Raft to manage cluster topology. With Raft-managed topology 
+ScyllaDB uses Raft to manage cluster topology. With Raft-managed topology 
 enabled, all topology operations are internally sequenced in a consistent 
 way. A centralized coordination process ensures that topology metadata is 
 synchronized across the nodes on each step of a topology change procedure. 
@@ -173,42 +73,18 @@ will safely drive all of them to completion. For example, multiple nodes can
 be bootstrapped concurrently, which couldn't be done with the old 
 gossip-based topology.

-The feature is automatically enabled in new clusters.
+.. note::

-Verifying that Raft is Enabled
----------------------------------
+    Enabling consistent topology changes is mandatory in versions 2025.2 and later. If consistent topology changes are
+    disabled in your cluster, you need to follow the instructions in
+    `Enable Consistent Topology Updates <https://docs.scylladb.com/manual/branch-2025.1/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.html>`_.

-.. _schema-on-raft-enabled:
-
-**Schema on Raft**
-
-You can verify that Raft is enabled on your cluster by performing the following query on each node:
-
-.. code-block:: sql
-
-   cqlsh> SELECT * FROM system.scylla_local WHERE key = 'group0_upgrade_state';
-
-The query should return:
-
-   .. code-block:: console
-
-     key                  | value
-    ----------------------+--------------------------
-     group0_upgrade_state | use_post_raft_procedures
-
-    (1 rows)
-
-on every node.
-
-If the query returns 0 rows, or ``value`` is ``synchronize`` or ``use_pre_raft_procedures``, it means that the cluster is in the middle of the Raft upgrade procedure; consult the :ref:`relevant section <verify-raft-procedure>`.
-
-If ``value`` is ``recovery``, it means that the cluster is in the middle of the manual recovery procedure. The procedure must be finished. Consult :ref:`the section about Raft recovery <recovery-procedure>`.
-
-If ``value`` is anything else, it might mean data corruption or a mistake when performing the manual recovery procedure. The value will be treated as if it was equal to ``recovery`` when the node is restarted.
+    If you are uncertain whether consistent topology changes are enabled, refer to the guide below.

 .. _verifying-consistent-topology-changes-enabled:

-**Consistent topology changes**
+Verifying that consistent topology changes are enabled
+-----------------------------------------------------------------

 You can verify that consistent topology management is enabled on your cluster in two ways:

--- a/docs/architecture/tablets.rst
+++ b/docs/architecture/tablets.rst
@@ -147,24 +147,19 @@ Limitations and Unsupported Features
    performance problems, or other issues.

 The following ScyllaDB features are not supported if a keyspace has tablets
-enabled:
+enabled. If you plan to use any of the features listed below, CREATE your keyspace
+:ref:`with tablets disabled <tablets-enable-tablets>`.

 * Counters
 * Change Data Capture (CDC)
 * Lightweight Transactions (LWT)
 * Alternator (as it uses LWT)
+* Materialized Views (MV) ``*``
+* Secondary indexes (SI, as it depends on MV) ``*``

-If you plan to use any of the above features, CREATE your keyspace
-:ref:`with tablets disabled <tablets-enable-tablets>`.
-
-The following ScyllaDB features are disabled by default when used with a keyspace
-that has tablets enabled:
-
-* Materialized Views (MV)
-* Secondary indexes (SI, as it depends on MV)
-
-To enable MV and SI for tablet keyspaces, use the `--experimental-features=views-with-tablets`
-configuration option.  See :ref:`Views with tablets <admin-views-with-tablets>` for details.
+``*`` You can enable experimental support for MV and SI using
+the ``--experimental-features=views-with-tablets`` configuration option. 
+See :ref:`Views with tablets <admin-views-with-tablets>` for details.

 Resharding in keyspaces with tablets enabled has the following limitations:

--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -1,31 +0,0 @@
-Contribute to ScyllaDB
-=======================
-
-Thank you for your interest in making ScyllaDB better!
-We appreciate your help and look forward to welcoming you to the ScyllaDB Community.
-There are two ways you can contribute:
-
-* Send a patch to the ScyllaDB source code
-* Write documentation for ScyllaDB Docs
-
-
-Contribute to ScyllaDB's Source Code
------------------------------------
-ScyllaDB developers use patches and email to share and discuss changes.
-Setting up can take a little time, but once you have done it the first time, it’s easy.
-
-The basic steps are:
-
-* Join the ScyllaDB community
-* Create a Git branch to work on
-* Commit your work with clear commit messages and sign-offs.
-* Send a PR or use ``git format-patch`` and ``git send-email`` to send to the list
-
-
-The entire process is `documented here <https://github.com/scylladb/scylla/blob/master/CONTRIBUTING.md>`_.
-
-Contribute to ScyllaDB Docs
---------------------------
-
-Each ScyllaDB project has accompanying documentation. For information about contributing documentation to a specific ScyllaDB project, refer to the README file for the individual project.
-For general information or to contribute to the ScyllaDB Sphinx theme, read the `Contributor's Guide <https://sphinx-theme.scylladb.com/stable/contribute/>`_.
--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -60,11 +60,11 @@ Keyspace and table names are defined by the following grammar:
   keyspace_name: `name`
   table_name: [ `keyspace_name` '.' ] `name`
   name: `unquoted_name` | `quoted_name`
-   unquoted_name: re('[a-zA-Z_0-9]{1, 48}')
+   unquoted_name: re('[a-zA-Z_0-9]{1, 192}')
   quoted_name: '"' `unquoted_name` '"'

 Both keyspace and table names consist of only alphanumeric characters, cannot be empty, and are limited in
-size to 48 characters (that limit exists mostly to avoid filenames, which may include the keyspace and table name, to go
+size to 192 characters (that limit exists mostly to avoid filenames, which may include the keyspace and table name, to go
 over the limits of certain file systems). By default, keyspace and table names are case insensitive (``myTable`` is
 equivalent to ``mytable``), but case sensitivity can be forced by using double-quotes (``"myTable"`` is different from
 ``mytable``).
--- a/docs/cql/types.rst
+++ b/docs/cql/types.rst
@@ -481,7 +481,8 @@ Creating a new user-defined type is done using a ``CREATE TYPE`` statement defin
   field_definition: `identifier` `cql_type`

 A UDT has a name (``udt_name``), which is used to declare columns of that type and is a set of named and typed fields. The ``udt_name`` can be any
-type, including collections or other UDTs. UDTs and collections inside collections must always be frozen (no matter which version of ScyllaDB you are using). 
+type, including collections or other UDTs.
+Similar to collections, a UDT can be frozen or non-frozen. A frozen UDT is immutable and can only be updated as a whole. Nested UDTs or UDTs used in keys must always be frozen.

 For example::

@@ -506,26 +507,15 @@ For example::

  CREATE TABLE superheroes (
       name frozen<full_name> PRIMARY KEY,
-       home frozen<address>
+       home address
  );

 .. note::

   - Attempting to create an already existing type will result in an error unless the ``IF NOT EXISTS`` option is used. If it is used, the statement will be a no-op if the type already exists.
   - A type is intrinsically bound to the keyspace in which it is created and can only be used in that keyspace. At creation, if the type name is prefixed by a keyspace name, it is created in that keyspace. Otherwise, it is created in the current keyspace.
-   - As of ScyllaDB Open Source 3.2, UDTs not inside collections do not have to be frozen, but in all versions prior to ScyllaDB Open Source 3.2, and in all ScyllaDB Enterprise versions, UDTs **must** be frozen. 


-A non-frozen UDT example with ScyllaDB Open Source 3.2 and higher::
-
-   CREATE TYPE ut (a int, b int);
-   CREATE TABLE cf (a int primary key, b ut);
-
-Same UDT in versions prior::
-
-   CREATE TYPE ut (a int, b int);
-   CREATE TABLE cf (a int primary key, b frozen<ut>);
-
 UDT literals
 ~~~~~~~~~~~~

--- a/docs/dev/system_keyspace.md
+++ b/docs/dev/system_keyspace.md
@@ -121,6 +121,29 @@ SELECT * FROM system.large_cells;
 SELECT * FROM system.large_cells WHERE keyspace_name = 'ks1' and table_name = 'standard1';
 ~~~

+## system.corrupt\_data
+
+Stores data found to be corrupt during internal operations. This data cannot be written to sstables because then it will be spread around by repair and compaction. It will also possibly cause failures in sstable parsing.
+At the same time, the data should be kept around so that it can be inspected and possibly restored by the database operator.
+This table is used to store such data. Data is saved at the mutation-fragment level.
+
+Schema:
+```cql
+CREATE TABLE system.corrupt_data (
+    keyspace_name text,              # keyspace name of source table
+    table_name text,                 # table name of source table
+    id timeuuid,                     # id of the corrupt mutation fragment, assigned by the database when the corrupt data entry is created
+    partition_key blob,              # partition key of partition in the source table, can be incomplete or null due to corruption
+    clustering_key text,             # clustering key of mutation-fragment in the source table, can be null for some mutation-fragment kinds, can be incomplete or null due to corruption
+    mutation_fragment_kind text,     # kind of the mutation fragment, one of 'partition start', 'partition end', 'static row', 'clustering row', 'range tombstone change'; only the latter two can have clustering_key set
+    frozen_mutation_fragment blob,   # the serialized mutation fragment itself
+    origin text,                     # the name of the process that found the corruption, e.g. 'sstable-writer'
+    sstable_name text,               # the name of the sstable that contains the corrupt data, if known; sstable is not kept around, it could be compacted or deleted
+    PRIMARY KEY ((keyspace_name, table_name), id)
+) WITH CLUSTERING ORDER BY (id ASC)
+    AND gc_grace_seconds = 0;
+```
+
 ## system.raft

 Holds information about Raft
--- a/docs/features/cdc/cdc-intro.rst
+++ b/docs/features/cdc/cdc-intro.rst
@@ -67,9 +67,6 @@ You can enable CDC when creating or altering a table using the ``cdc`` option, f

    CREATE TABLE ks.t (pk int, ck int, v int, PRIMARY KEY (pk, ck, v)) WITH cdc = {'enabled':true};

-.. note::
-   If you enabled CDC and later decide to disable it, you need to **stop all writes** to the base table before issuing the ``ALTER TABLE ... WITH cdc = {'enabled':false};`` command.
-
 .. include:: /features/cdc/_common/cdc-params.rst

 Using CDC with Applications
--- a/docs/features/local-secondary-indexes.rst
+++ b/docs/features/local-secondary-indexes.rst
@@ -6,9 +6,9 @@ Local Secondary Indexes is an enhancement to :doc:`Global Secondary Indexes <sec
 which allows ScyllaDB to optimize workloads where the partition key of the base table and the index are the same key.

 .. note::
-   As of ScyllaDB Open Source 4.0, updates for local secondary indexes are performed **synchronously**. When updates are synchronous, the client acknowledges the write
+   Updates for local secondary indexes are performed **synchronously**. When updates are synchronous, the client acknowledges the write
   operation only **after both** the base table modification **and** the view update are written.
-   This is important to note because the process is no longer asynchronous and the modifications are immediately reflected in the index.
+   This is important to note because the process is no longer asynchronous, and the modifications are immediately reflected in the index.
   In addition, if the view update fails, the client receives a write error.

 Example:
--- a/docs/getting-started/cloud-instance-recommendations.rst
+++ b/docs/getting-started/cloud-instance-recommendations.rst
@@ -113,7 +113,38 @@ Pick a zone where Haswell CPUs are found. Local SSD performance offers, accordin
 Image with NVMe disk interface is recommended.
 (`More info <https://cloud.google.com/compute/docs/disks/local-ssd>`_)

-Recommended instances types are `n1-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n1_machines>`_ and `n2-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n2_machines>`_
+Recommended instances types are `z3-highmem-highlssd <https://cloud.google.com/compute/docs/storage-optimized-machines#z3_machine_types>`_,
+`n1-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n1_machines>`_, and `n2-highmem <https://cloud.google.com/compute/docs/general-purpose-machines#n2_machines>`_
+
+
+.. list-table::
+   :widths: 30 20 20 30
+   :header-rows: 1
+
+   * - Model
+     - vCPU
+     - Mem (GB)
+     - Storage (GB)
+   * - z3-highmem-8-highlssd
+     - 8
+     - 64
+     - 3,000
+   * - z3-highmem-16-highlssd
+     - 16
+     - 128
+     - 6,000
+   * - z3-highmem-22-highlssd	
+     - 22
+     - 176
+     - 9,000
+   * - z3-highmem-32-highlssd	
+     - 32
+     - 256
+     - 12,000
+   * - z3-highmem-44-highlssd	
+     - 44
+     - 352
+     - 18,000

 .. list-table::
   :widths: 30 20 20 30
--- a/docs/getting-started/index.rst
+++ b/docs/getting-started/index.rst
@@ -11,7 +11,6 @@ Getting Started
   requirements
   Migrate to ScyllaDB </using-scylla/migrate-scylla>
   Integration Solutions </using-scylla/integrations/index>
-   tutorials

 .. panel-box::
  :title: ScyllaDB Requirements
--- a/docs/getting-started/os-support.rst
+++ b/docs/getting-started/os-support.rst
@@ -4,6 +4,9 @@ OS Support by Linux Distributions and Version
 The following matrix shows which Linux distributions, containers, and images
 are :ref:`supported <os-support-definition>` with which versions of ScyllaDB.

+Note that support for Ubuntu 20.04 is deprecated and will be removed in
+a future release.
+
 .. datatemplate:json:: /_static/data/os-support.json
  :template: platforms.tmpl

--- a/docs/getting-started/tutorials.rst
+++ b/docs/getting-started/tutorials.rst
@@ -1,21 +0,0 @@
-============
-Tutorials
-============
-
-The tutorials will show you how to use ScyllaDB as a data source for an application.
-
-
-ScyllaDB Tutorial
-===================
-
-`Build an IoT App with sensor simulator and a REST API <https://iot.scylladb.com/stable/>`_
-
-ScyllaDB Cloud Tutorial
-=======================
-
-`Implement CRUD operations with a TODO App <https://github.com/scylladb/scylla-cloud-getting-started/>`_
-
-ScyllaDB Cloud Feature Store Tutorial
-=====================================
-
-`Build a machine learning (ML) feature store with ScyllaDB <https://feature-store.scylladb.com/stable/>`_
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -73,6 +73,5 @@ In addition, you can read our `blog <https://www.scylladb.com/blog/>`_ and atten
  kb/index
  reference/index
  faq
-  Contribute to ScyllaDB <contribute>
  2024.2 and earlier documentation <https://enterprise.docs.scylladb.com/branch-2024.2/>

--- a/docs/kb/rf-increase.rst
+++ b/docs/kb/rf-increase.rst
@@ -2,40 +2,65 @@
 How to Safely Increase the Replication Factor
 =======================================================

+A replication factor (RF) is configured per keyspace. You can change the RF
+using the :ref:`ALTER KEYSPACE <alter-keyspace-statement>` command. 

-**Topic: What can happen when you increase RF**
+To increase the RF safely, ensure you follow the guidelines below.
+The guidelines differ depending on whether your a keyspace is tablets-based
+(the default) or has tablets disabled. See :doc:`Data Distribution with Tablets </architecture/tablets>`
+for more information about tablets.

+Increasing the RF in Tablets-based Keyspaces
+-------------------------------------------------

-**Audience: ScyllaDB administrators**
+If a keyspace has tablets enabled (the default), changing the RF does not
+impact data consistency in the cluster.

+However, due to limitations in the current protocol used to pass tablet data
+to drivers, drivers will not pick up new replicas after the RF is increased.
+As a result, drivers will not route requests to new replicas, causing imbalance.

-Issues
------
+To avoid this issue, restart the client applications after the ALTER statement
+that changes the RF completes successfully.

-When a Replication Factor (RF) is increased, using the :ref:`ALTER KEYSPACE <alter-keyspace-statement>` command, the data consistency is effectively dropped
-by the difference of the RF_new value and the RF_old value for all pre-existing data.
+Increasing the RF in Keyspaces with Tablets Disabled
+----------------------------------------------------------
+
+If you :ref:`opted out of tablets when creating a keyspace <tablets-enable-tablets>`,
+so your keyspace is vnodes-based, increasing the RF will impact data consistency.
+
+Data consistency in your cluster is effectively dropped by the difference
+between the RF_new value and the RF_old value for all pre-existing data.
 Consistency will only be restored after running a repair.

-Another issue occurs in keyspaces with tablets enabled and is driver-related. Due to limitations in the current protocol used to pass tablet data to drivers, drivers will not pick
-up new replicas after replication factor is increased. This will cause them to avoid routing requests to those replicas, causing imbalance.

 Resolution
----------
+========================

-When one increases an RF, one should consider that the pre-existing data will **not be streamed** to new replicas (a common misconception).
+When you increase the RF, you should be aware that the pre-existing data will
+**not be streamed** to new replicas (a common misconception).

-As a result, in order to make sure that you can keep on reading the old data with the same level of consistency, increase the read Consistency Level (CL) according to the following formula:
+As a result, in order to make sure that you can keep on reading the old data
+with the same level of consistency:

-``CL_new = CL_old + RF_new - RF_old``
+#. Increase the read Consistency Level (CL) according to the following formula:

-After you run a repair, you can decrease the CL. If RF has only been changed in a particular Data Center (DC) only the nodes in that DC have to be repaired.
+   .. code::
+
+      CL_new = CL_old + RF_new - RF_old
+
+#. Run repair.
+#. Decrease the CL.
+
+
+If RF has only been changed in a particular Datacenter (DC), only the nodes in
+that DC have to be repaired.

-To resolve the driver-related issue, restart the client applications after the ALTER statement that changes the RF completes successfully.

 Example
 =======

-In this example your five node cluster RF is 3 and your CL is TWO. You want to increase your RF from 3 to 5.
+In this example, your five-node cluster RF is 3 and your CL is TWO. You want to increase your RF from 3 to 5.

 #. Increase the read CL by a RF_new - RF_old value.
   Following the example the RF_new is 5 and the RF_old is 3 so, 5-3 =2. You need to increase the CL by 2.
@@ -45,9 +70,9 @@ In this example your five node cluster RF is 3 and your CL is TWO. You want to i
 #. Restore the reads CL to the originally intended value. For this example, QUORUM.


-If you do not follow the procedure above you may start reading stale or null data after increasing the RF.
+If you do not follow the procedure above, you may start reading stale or null data after increasing the RF.

-More Information
+References
 ----------------

 * :doc:`Fault Tolerance </architecture/architecture-fault-tolerance/>`
--- a/docs/operating-scylla/admin-tools/cassandra-stress.rst
+++ b/docs/operating-scylla/admin-tools/cassandra-stress.rst
@@ -5,4 +5,3 @@ The cassandra-stress tool is used for benchmarking and load testing both ScyllaD

 Cassandra Stress is not part of ScyllaDB and it is not distributed along side it anymore. It has it's own separate repository and release cycle. More information about it can be found on `GitHub <https://github.com/scylladb/cassandra-stress>`_ or on `DockerHub <https://hub.docker.com/r/scylladb/cassandra-stress>`_.

-.. include:: /rst_include/apache-copyrights.rst
--- a/docs/operating-scylla/nodetool-commands/backup.rst
+++ b/docs/operating-scylla/nodetool-commands/backup.rst
@@ -18,13 +18,14 @@ Syntax
               [--snapshot <snapshot>]
               --endpoint <endpoint> --bucket <bucket> --prefix <prefix>
               [--nowait]
+               [--move-files]

 Example
 -------

 .. code-block:: console

-    nodetool backup --endpoint s3.us-east-2.amazonaws.com  --bucket bucket-foo --prefix foo/bar/baz --keyspace ks --table table --snapshot ss
+    nodetool backup --endpoint s3.us-east-2.amazonaws.com  --bucket bucket-foo --prefix foo/bar/baz --keyspace ks --table table --snapshot ss --move-files

 Options
 -------
@@ -38,6 +39,7 @@ Options
 * ``--bucket`` - Name of the bucket to backup SSTables to
 * ``--prefix`` - Prefix to backup SSTables to
 * ``--nowait`` - Don't wait on the backup process
+* ``--move-files`` - Move files instead of copying them. This will delete the files from the local disk after they are uploaded to the object storage.

 See also

--- a/docs/operating-scylla/nodetool-commands/cleanup.rst
+++ b/docs/operating-scylla/nodetool-commands/cleanup.rst
@@ -1,3 +1,5 @@
+.. _nodetool-cleanup-cmd:
+
 Nodetool cleanup
 ================
 **cleanup** ``[<keyspace> <tablename ...>]``- triggers the immediate removal of data from node(s) that "lose" part of their token range due to a range movement operation (node addition or node replacement).
--- a/docs/operating-scylla/nodetool-commands/refresh.rst
+++ b/docs/operating-scylla/nodetool-commands/refresh.rst
@@ -29,7 +29,7 @@ Load and Stream

 .. code::

-   nodetool refresh <my_keyspace> <my_table> [--load-and-stream | -las]
+   nodetool refresh <my_keyspace> <my_table> [--load-and-stream | -las] [--scope <scope>]

 The Load and Stream feature extends nodetool refresh. The new ``-las`` option loads arbitrary sstables that do not belong to a node into the cluster. It loads the sstables from the disk and calculates the data's owning nodes, and streams automatically.
 For example, say the old cluster has 6 nodes and the new cluster has 3 nodes. We can copy the sstables from the old cluster to any of the new nodes and trigger the load and stream process.
@@ -39,5 +39,42 @@ Load and Stream make restores and migrations much easier:
 * You can place sstable from every node to every node
 * No need to run nodetool cleanup to remove unused data

+Scope
+-----
+
+The `scope` parameter describes the subset of cluster nodes where you want to load data:
+
+* `node` - On the local node.
+* `rack` - On the local rack.
+* `dc` - In the datacenter (DC) where the local node lives.
+* `all` (default) - Everywhere across the cluster.
+
+Scope supports a variety of options for filtering out the destination nodes.
+On one extreme, one node is given all SStables with the scope ``all``; on the other extreme, all
+nodes are loading only their own SStables with the scope ``node``. In between, you can choose
+a subset of nodes to load only SStables that belong to the rack or DC.
+
+This option is only valid when using the ``--load-and-stream`` option.
+
+
+Skip cleanup
+---------------
+
+.. code::
+
+   nodetool refresh <my_keyspace> <my_table> [--skip-cleanup]
+
+When loading an SSTable, Scylla will cleanup it from keys that the node is not responsible for. To skip this step, use the `--skip-cleanup` option.
+See :ref:`nodetool cleanup <nodetool-cleanup-cmd>`.
+
+
+Skip reshape
+---------------
+
+.. code::
+
+   nodetool refresh <my_keyspace> <my_table> [--skip-reshape]
+
+When refreshing, the SSTables to load might be out of shape, Scylla will attempt to reshape them if that's the case. To skip this step, use the `--skip-reshape` option.

 .. include:: nodetool-index.rst
--- a/docs/operating-scylla/procedures/cluster-management/_common/membership-change-failures-note.rst
+++ b/docs/operating-scylla/procedures/cluster-management/_common/membership-change-failures-note.rst
@@ -1,10 +0,0 @@
-.. note::
-
-    This page only applies to clusters where consistent topology updates are not enabled.
-    Consistent topology updates are mandatory, so **this page serves troubleshooting purposes**.
-
-    The page does NOT apply if you:
-
-    * Created a cluster with ScyllaDB 6.0 or later (consistent topology updates are automatically enabled).
-    * `Manually enabled consistent topology updates <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_
-      after upgrading to 6.0 or before upgrading to 6.1 (required).
--- a/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
+++ b/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
@@ -1,166 +0,0 @@
-Handling Cluster Membership Change Failures
-*******************************************
-
-.. scylladb_include_flag:: membership-change-failures-note.rst
-
-A failure may happen in the middle of a cluster membership change (that is bootstrap, decommission, removenode, or replace), such as loss of power. If that happens, you should ensure that the cluster is brought back to a consistent state as soon as possible. Further membership changes might be impossible until you do so.
-
-For example, a node that crashed in the middle of decommission might leave the cluster in a state where it considers the node to still be a member, but the node itself will refuse to restart and communicate with the cluster. This particular case is very unlikely - it requires a specifically timed crash to happen, after the data streaming phase of decommission finishes but before the node commits that it left. But if it happens, you won't be able to bootstrap other nodes (they will try to contact the partially-decommissioned node and fail) until you remove the remains of the node that crashed.
-
---------------------------
-Handling a Failed Bootstrap
---------------------------
-
-If a failure happens when trying to bootstrap a new node to the cluster, you can try bootstrapping the node again by restarting it.
-
-If the failure persists or you decided that you don't want to bootstrap the node anymore, follow the instructions in the :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section to remove the remains of the bootstrapping node. You can then clear the node's data directories and attempt to bootstrap it again.
-
------------------------------
-Handling a Failed Decommission
------------------------------
-
-There are two cases.
-
-Most likely the failure happened during the data repair/streaming phase - before the node tried to leave the token ring. Look for a log message containing "leaving token ring" in the logs of the node that you tried to decommission. For example:
-
-.. code-block:: console
-
-    INFO  2023-03-14 13:08:38,323 [shard 0] storage_service - decommission[5b2e752e-964d-4f36-871f-254491f4e8cc]: leaving token ring
-
-If the message is **not** present, the failure happened before the node tried to leave the token ring. In that case you can simply restart the node and attempt to decommission it again.
-
-If the message is present, the node attempted to leave the token ring, but it might have left the cluster only partially before the failure. **Do not try to restart the node**. Instead, you must make sure that the node is dead and remove any leftovers using the :doc:`removenode operation </operating-scylla/nodetool-commands/removenode/>`. See :ref:`cleaning up after a failed membership change <cleaning-up-after-change>`. Trying to restart the node after such failure results in unpredictable behavior - it may restart normally, it may refuse to restart, or it may even try to rebootstrap.
-
-If you don't have access to the node's logs anymore, assume the second case (the node might have attempted to leave the token ring), **do not try to restart the node**, instead follow the :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section.
-
----------------------------
-Handling a Failed Removenode
----------------------------
-
-Simply retry the removenode operation.
-
-If you somehow lost the host ID of the node that you tried to remove, follow the instructions in :ref:`cleaning up after a failed membership change <cleaning-up-after-change>`.
-
--------------------------
-Handling a Failed Replace
--------------------------
-
-Replace is a special case of bootstrap, but the bootstrapping node tries to take the place of another dead node. You can retry a failed replace operation by restarting the replacing node.
-
-If the failure persists or you decided that you don't want to perform the replace anymore, follow the instructions in  :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section to remove the remains of the replacing node. You can then clear the node's data directories and attempt to replace again. Alternatively, you can remove the dead node which you initially tried to replace using :doc:`removenode </operating-scylla/nodetool-commands/removenode/>`, and perform a regular bootstrap.
-
-.. _cleaning-up-after-change:
-
--------------------------------------------
-Cleaning up after a Failed Membership Change
--------------------------------------------
-
-After a failed membership change, the cluster may contain remains of a node that tried to leave or join - other nodes may consider the node a member, possibly in a transitioning state. It is important to remove any such "ghost" members. Their presence may reduce the cluster's availability, performance, or prevent further membership changes.
-
-You need to determine the host IDs of any potential ghost members, then remove them using the :doc:`removenode operation </operating-scylla/nodetool-commands/removenode/>`. Note that after a failed replace, there may be two different host IDs that you'll want to find and run ``removenode`` on: the new replacing node and the old node that you tried to replace. (Or you can remove the new node only, then try to replace the old node again.)
-
-Step One: Determining Host IDs of Ghost Members
-===============================================
-
-* After a failed bootstrap, you need to determine the host ID of the node that tried to bootstrap, if it managed to generate a host ID (it might not have chosen the host ID yet if it failed very early in the procedure, in which case there's nothing to remove). Look for a message containing ``system_keyspace - Setting local host id to`` in the node's logs, which will contain the node's host ID. For example: ``system_keyspace - Setting local host id to f180b78b-6094-434d-8432-7327f4d4b38d``. If you don't have access to the node's logs, read the generic method below.
-* After a failed decommission, you need to determine the host ID of the node that tried to decommission. You can search the node's logs as in the failed bootstrap case (see above), or you can use the generic method below.
-* After a failed removenode, you need to determine the host ID of the node that you tried to remove. You should already have it, since executing a removenode requires the host ID in the first place. But if you lost it somehow, read the generic method below.
-* After a failed replace, you need to determine the host ID of the replacing node. Search the node's logs as in the failed bootstrap case (see above), or you can use the generic method below. You may also want to determine the host ID of the replaced node - either to attempt replacing it again after removing the remains of the previous replacing node, or to remove it using :doc:`nodetool removenode </operating-scylla/nodetool-commands/removenode/>`. You should already have the host ID of the replaced node if you used the ``replace_node_first_boot`` option to perform the replace.
-
-If you cannot determine the ghost members' host ID using the suggestions above, use the method described below.
-
-#. Make sure there are no ongoing membership changes.
-
-#. Execute the following CQL query on one of your nodes to retrieve the Raft group 0 ID:
-
-   .. code-block:: cql
-    
-    select value from system.scylla_local where key = 'raft_group0_id'
-
-   For example:
-
-   .. code-block:: cql
-    
-    cqlsh> select value from system.scylla_local where key = 'raft_group0_id';
-
-     value
-    --------------------------------------
-     607fef80-c276-11ed-a6f6-3075f294cc65
-
-#. Use the obtained Raft group 0 ID to query the set of all cluster members' host IDs (which includes the ghost members), by executing the following query:
-
-   .. code-block:: cql
-    
-    select server_id from system.raft_state where group_id = <group0_id>
-
-   replace ``<group0_id>`` with the group 0 ID that you obtained. For example:
-
-   .. code-block:: cql
-    
-    cqlsh> select server_id from system.raft_state where group_id = 607fef80-c276-11ed-a6f6-3075f294cc65;
-
-     server_id
-    --------------------------------------
-     26a9badc-6e96-4b86-a8df-5173e5ab47fe
-     7991e7f5-692e-45a0-8ae5-438be5bc7c4f
-     aff11c6d-fbe7-4395-b7ca-3912d7dba2c6
-
-#. Execute the following CQL query to obtain the host IDs of all token ring members:
-
-   .. code-block:: cql
-    
-    select host_id, up from system.cluster_status;
-
-   For example:
-
-   .. code-block:: cql
-    
-    cqlsh> select peer, host_id, up from system.cluster_status;
-
-     peer      | host_id                              | up
-    -----------+--------------------------------------+-------
-     127.0.0.3 |                                 null | False
-     127.0.0.1 | 26a9badc-6e96-4b86-a8df-5173e5ab47fe |  True
-     127.0.0.2 | 7991e7f5-692e-45a0-8ae5-438be5bc7c4f |  True
-
-   The output of this query is similar to the output of ``nodetool status``.
-
-   We included the ``up`` column to see which nodes are down and the ``peer`` column to see their IP addresses.
-
-   In this example, one of the nodes tried to decommission and crashed as soon as it left the token ring but before it left the Raft group. Its entry will show up in ``system.cluster_status`` queries with ``host_id = null``, like above, until the cluster is restarted.
-
-#. A host ID belongs to a ghost member if:
-
-   * It appears in the ``system.raft_state`` query but not in the ``system.cluster_status`` query,
-   * Or it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
-
-   In our example, the ghost member's host ID was ``aff11c6d-fbe7-4395-b7ca-3912d7dba2c6`` because it appeared in the ``system.raft_state`` query but not in the ``system.cluster_status`` query.
-
-   If you're unsure whether a given row in the ``system.cluster_status`` query corresponds to a node in your cluster, you can connect to each node in the cluster and execute ``select host_id from system.local`` (or search the node's logs) to obtain that node's host ID, collecting the host IDs of all nodes in your cluster. Then check if each host ID from the ``system.cluster_status`` query appears in your collected set; if not, it's a ghost member.
-
-   A good rule of thumb is to look at the members marked as down (``up = False`` in ``system.cluster_status``) - ghost members are eventually marked as down by the remaining members of the cluster. But remember that a real member might also be marked as down if it was shutdown or partitioned away from the rest of the cluster. If in doubt, connect to each node and collect their host IDs, as described in the previous paragraph.
-
-In some cases, even after a failed topology change, there may be no ghost members left - for example, if a bootstrapping node crashed very early in the procedure or a decommissioning node crashed after it committed the membership change but before it finalized its own shutdown steps.
-
-If any ghost members are present, proceed to the next step.
-
-Step Two: Removing the Ghost Members
-====================================
-
-Given the host IDs of ghost members, you can remove them using ``removenode``; follow the :doc:`documentation for removenode operation </operating-scylla/nodetool-commands/removenode/>`.
-
-If you're executing ``removenode`` too quickly after a failed membership change, an error similar to the following might pop up:
-
-.. code-block:: console
-
-    nodetool: ScyllaDB API server HTTP POST to URL '/storage_service/remove_node' failed: seastar::rpc::remote_verb_error (node_ops_cmd_check: Node 127.0.0.2 rejected node_ops_cmd=removenode_abort from node=127.0.0.1 with ops_uuid=0ba0a5ab-efbd-4801-a31c-034b5f55487c, pending_node_ops={b47523f2-de6a-4c38-8490-39127dba6b6a}, pending node ops is in progress)
-
-In that case simply wait for 2 minutes before trying ``removenode`` again.
-
-If ``removenode`` returns an error like:
-
-.. code-block:: console
-
-    nodetool: ScyllaDB API server HTTP POST to URL '/storage_service/remove_node' failed: std::runtime_error (removenode[12e7e05b-d1ae-4978-b6a6-de0066aa80d8]: Host ID 42405b3b-487e-4759-8590-ddb9bdcebdc5 not found in the cluster)
-
-and you're sure that you're providing the correct Host ID, it means that the member was already removed and you don't have to clean up after it.
--- a/docs/operating-scylla/procedures/cluster-management/index.rst
+++ b/docs/operating-scylla/procedures/cluster-management/index.rst
@@ -24,7 +24,6 @@ Cluster Management Procedures
   Update Topology Strategy From Simple to Network <update-topology-strategy-from-simple-to-network>
   Safely Shutdown Your Cluster <safe-shutdown>
   Safely Restart Your Cluster <safe-start>
-   Handling Membership Change Failures <handling-membership-change-failures>
   repair-based-node-operation
   Prevent Quorum Loss in Symmetrical Multi-DC Clusters <arbiter-dc>

@@ -80,8 +79,6 @@ Cluster Management Procedures

  * :doc:`Add a Decommissioned Node Back to a ScyllaDB Cluster </operating-scylla/procedures/cluster-management/revoke-decommission/>`

-  * :doc:`Handling Membership Change Failures </operating-scylla/procedures/cluster-management/handling-membership-change-failures>`
-
  * :ref:`Add Bigger Nodes to a Cluster <add-bigger-nodes-to-a-cluster>`

  * :doc:`Repair Based Node Operations (RBNO) </operating-scylla/procedures/cluster-management/repair-based-node-operation>`
--- a/docs/reference/limits.rst
+++ b/docs/reference/limits.rst
@@ -54,10 +54,8 @@ CQL Limits
       Hundreds of kilobytes (good latency) or megabytes (mediocre latency)
   * - Key length
     - 65533
-   * - Table / CF name length
-     - 48 characters
-   * - Keyspace name length
-     - 48 characters
+   * - Keyspace / Table / View / Index name length
+     - 192 characters
   * - Query parameters in a query
     - 65535 (2^16-1)
   * - Statements in a batch
--- a/docs/troubleshooting/_common/enable-consistent-topology.rst
+++ b/docs/troubleshooting/_common/enable-consistent-topology.rst
@@ -1 +0,0 @@
-Perform `the procedure for enabling consistent topology changes <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_.
--- a/docs/troubleshooting/_common/enabling-consistent-topology-failure.rst
+++ b/docs/troubleshooting/_common/enabling-consistent-topology-failure.rst
@@ -1,3 +0,0 @@
-:ref:`The Raft upgrade procedure <verify-raft-procedure>`
-or `the procedure for enabling consistent topology changes <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_
-got stuck because one of the nodes failed in the middle of the procedure and is irrecoverable.
--- a/docs/troubleshooting/handling-node-failures.rst
+++ b/docs/troubleshooting/handling-node-failures.rst
@@ -67,29 +67,21 @@ Examples
 Manual Recovery Procedure
 ===========================

-You can follow the manual recovery procedure when:
+.. note::

-* The majority of nodes (for example, 2 out of 3) failed and are irrecoverable.
-* .. scylladb_include_flag:: enabling-consistent-topology-failure.rst
+   This recovery procedure assumes that consistent topology changes are enabled for your cluster, which is mandatory in
+   versions 2025.2 and later. If you failed to enable consistent topology changes during the upgrade to 2025.2, you need
+   to follow the `previous recovery procedure <https://docs.scylladb.com/manual/branch-2025.1/troubleshooting/handling-node-failures.html#manual-recovery-procedure>`_.

-.. warning::
+   See :ref:`Verifying that consistent topology changes are enabled <verifying-consistent-topology-changes-enabled>`.

-   Perform the manual recovery procedure **only** if you're dealing with 
-   **irrecoverable** nodes. If possible, restart your nodes, and use the manual 
-   recovery procedure as a last resort.
+You can follow the manual recovery procedure when the majority of nodes (for example, 2 out of 3) failed and are irrecoverable.

-.. warning::
-
-  The manual recovery procedure is not supported :doc:`if tablets are enabled on any of your keyspaces </architecture/tablets/>`. 
-  In such a case, you need to :doc:`restore from backup </operating-scylla/procedures/backup-restore/restore>`. 
-
-During the manual recovery procedure you'll enter a special ``RECOVERY`` mode, remove 
-all faulty nodes (using the standard :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`), 
-delete the internal Raft data, and restart the cluster. This will cause the cluster to 
-perform the Raft upgrade procedure again, initializing the Raft algorithm from scratch.
-
-The manual recovery procedure is applicable both to clusters that were not running Raft 
-in the past and then had Raft enabled, and to clusters that were bootstrapped using Raft.
+During the manual recovery procedure you'll restart live nodes in a special recovery mode, which will cause the
+cluster to initialize the Raft algorithm from scratch. However, this time, faulty nodes will not participate in the
+algorithm. Then, you will replace all faulty nodes (using the standard
+:doc:`node replacement procedure </operating-scylla/procedures/cluster-management/replace-dead-node/>`). Finally, you
+will leave the recovery mode and remove the obsolete internal Raft data.

 **Prerequisites**

@@ -102,53 +94,86 @@ in the past and then had Raft enabled, and to clusters that were bootstrapped us
  to life and communicate with the rest of the cluster, setup firewall rules or otherwise 
  isolate your alive nodes to reject any communication attempts from these dead nodes.

-* Prepare your service for downtime before proceeding.
-  Entering ``RECOVERY`` mode requires a node restart. Restarting an additional node while 
-  some nodes are already dead may lead to unavailability of data queries (assuming that 
-  you haven't lost it already). For example, if you're using the standard RF=3, 
-  CL=QUORUM setup, and you're recovering from a stuck upgrade procedure because one 
-  of your nodes is dead, restarting another node will cause temporary data query 
-  unavailability (until the node finishes restarting). 
+* Ensure all live nodes are in the normal state using
+  :doc:`nodetool status </operating-scylla/nodetool-commands/status>`. If there is a node
+  that is joining or leaving, it cannot be recovered. You must permanently stop it. After
+  performing the recovery procedure, use
+  :doc:`nodetool status </operating-scylla/nodetool-commands/status>` ony any other node.
+  If the stopped node appears in the output, it means that other nodes still consider it
+  a member of the cluster, and you should remove it with the
+  :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`.
+
+* Check if the cluster lost data. If the number of dead nodes is equal or larger than your
+  keyspaces RF, then some of the data is lost, and you need to retrieve it from backup. After
+  completing the manual recovery procedure
+  :doc:`restore the data from backup </operating-scylla/procedures/backup-restore/restore/>`.
+
+* Decide whether to shut down your service for the manual recovery procedure. ScyllaDB
+  serves data queries during the procedure, however, you may not want to rely on it if:
+
+  * you lost some data, or
+
+  * restarting a single node could lead to unavailability of data queries (the procedure involves
+    a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart>`). For
+    example, if you are using the standard RF=3, CL=QUORUM setup, you have two datacenters, all
+    nodes in one of the datacenters are dead and one node in the other datacenter is dead,
+    restarting another node in the other datacenter will cause temporary data query
+    unavailability (until the node finishes restarting).

 **Procedure**

-#. Perform the following query on **every alive node** in the cluster, using e.g. ``cqlsh``:
+#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of your live nodes.
+
+#. Find the group 0 ID by performing the following query on any live node, using e.g. ``cqlsh``:

   .. code-block:: cql

-        cqlsh> UPDATE system.scylla_local SET value = 'recovery' WHERE key = 'group0_upgrade_state';
+        cqlsh> SELECT value FROM system.scylla_local WHERE key = 'raft_group0_id';

-#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of your alive nodes.
+   The group 0 ID is needed in the following steps.

-#. Verify that all the nodes have entered ``RECOVERY`` mode when restarting; look for one of the following messages in their logs:
-
-    .. code-block:: console
-
-        group0_client - RECOVERY mode.
-        raft_group0 - setup_group0: Raft RECOVERY mode, skipping group 0 setup.
-        raft_group0_upgrade - RECOVERY mode. Not attempting upgrade.
-
-#. Remove all your dead nodes using the :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`.
-
-#. Remove existing Raft cluster data by performing the following queries on **every alive node** in the cluster, using e.g. ``cqlsh``:
+#. Find ``commit_idx`` of all live nodes by performing the following query on **every live node**:
+
+   .. code-block:: cql
+
+        cqlsh> SELECT commit_idx FROM system.raft WHERE group_id = <group 0 ID>;
+
+   Choose a node with the largest ``commit_idx``. If there are multiple such nodes, choose any of them.
+   The chosen node will be the *recovery leader*.
+
+#. Perform the following queries on **every live node**:

   .. code-block:: cql

-        cqlsh> TRUNCATE TABLE system.topology;
        cqlsh> TRUNCATE TABLE system.discovery;
-        cqlsh> TRUNCATE TABLE system.group0_history;
        cqlsh> DELETE value FROM system.scylla_local WHERE key = 'raft_group0_id';

-#. Make sure that schema is synchronized in the cluster by executing :doc:`nodetool describecluster </operating-scylla/nodetool-commands/describecluster>` on each node and verifying that the schema version is the same on all nodes.
+#. Add the ``recovery_leader`` property to the ``scylla.yaml`` file and set it to the host ID of the recovery leader on
+   **every live node**. Make sure the change is applied on all nodes by sending the ``SIGHUP`` signal to all ScyllaDB
+   processes.

-#. We can now leave ``RECOVERY`` mode. On **every alive node**, perform the following query:
+#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of all live nodes,
+   however, this time **the recovery leader must be restarted first**.
+
+   After completing this step, Raft should be fully functional.
+
+#. Replace all dead nodes in the cluster using the
+   :doc:`node replacement procedure </operating-scylla/procedures/cluster-management/replace-dead-node/>`.
+
+   .. note::
+
+        Removing some of the dead nodes with the
+        :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>` is also possible,
+        but it may require decreasing RF of your keyspaces. With tablets enabled, ``nodetool removenode`` is rejected
+        if there are not enough nodes to satisfy RF of any tablet keyspace in the node's datacenter.
+
+#. Remove the ``recovery_leader`` property from the ``scylla.yaml`` file on all nodes. Send the ``SIGHUP`` signal to all
+   ScyllaDB processes to ensure the change is applied.
+
+#. Perform the following queries on **every live node**:

   .. code-block:: cql

-        cqlsh> DELETE FROM system.scylla_local WHERE key = 'group0_upgrade_state';
-
-#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of your alive nodes.
-
-#. The Raft upgrade procedure will start anew. :ref:`Verify <verify-raft-procedure>` that it finishes successfully.
-
-#. .. scylladb_include_flag:: enable-consistent-topology.rst
+        cqlsh> DELETE FROM system.raft WHERE group_id = <group 0 ID>;
+        cqlsh> DELETE FROM system.raft_snapshots WHERE group_id = <group 0 ID>;
+        cqlsh> DELETE FROM system.raft_snapshot_config WHERE group_id = <group 0 ID>;
--- a/docs/upgrade/upgrade-guides/index.rst
+++ b/docs/upgrade/upgrade-guides/index.rst
@@ -4,8 +4,7 @@ Upgrade ScyllaDB

 .. toctree::
   
-   ScyllaDB Open Source 6.2 to ScyllaDB 2025.1 <upgrade-guide-from-6.2-to-2025.1/index>
-   ScyllaDB Enterprise 2024.x to ScyllaDB 2025.1 <upgrade-guide-from-2024.x-to-2025.1/index>
+   ScyllaDB 2025.1 to ScyllaDB 2025.2 <upgrade-guide-from-2025.1-to-2025.2/index>
   ScyllaDB Image <ami-upgrade>


--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.rst
@@ -1,129 +0,0 @@
-=====================================
-Enable Consistent Topology Updates
-=====================================
-
-.. note::
-
-    The following procedure only applies if:
-
-    * You're upgrading **from ScyllaDB Enterprise 2024.1** to ScyllaDB 2025.1.
-    * You previously upgraded from 2024.1 to 2024.2 without enabling consistent
-      topology updates (see the `2024.2 upgrade guide <https://enterprise.docs.scylladb.com/branch-2024.2/upgrade/upgrade-enterprise/upgrade-guide-from-2024.1-to-2024.2/enable-consistent-topology.html>`_
-      for reference). 
-
-Introduction
-============
-
-ScyllaDB 2025.1 has :ref:`consistent topology changes based on Raft <raft-topology-changes>`.
-Clusters created with version 2025.1 use consistent topology changes right
-from the start. However, consistent topology changes are *not* automatically
-enabled in clusters upgraded from version 2024.1. In such clusters, you need to
-enable consistent topology changes manually by following the procedure described in this article.
-
-Before you start, you **must** check that the cluster meets the prerequisites
-and ensure that some administrative procedures will not be run while
-the procedure is in progress.
-
-.. _enable-raft-topology-2025.1-prerequisites:
-
-Prerequisites
-=============
-
-* Make sure that all nodes in the cluster are upgraded to ScyllaDB 2025.1.
-* Verify that :ref:`schema on raft is enabled <schema-on-raft-enabled>`.
-* Make sure that all nodes enabled ``SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES`` cluster feature.
-  One way to verify it is to look for the following message in the log:
-
-  .. code-block:: none
-
-    features - Feature SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES is enabled
-
-  Alternatively, it can be verified programmatically by checking whether the ``value``
-  column under the ``enabled_features`` key contains the name of the feature in
-  the ``system.scylla_local`` table. One way to do it is with the following bash script:
-
-  .. code-block:: bash
-
-    until cqlsh -e "select value from system.scylla_local where key = 'enabled_features'" | grep "SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES"
-    do
-        echo "Upgrade didn't finish yet on the local node, waiting 10 seconds before checking again..."
-        sleep 10
-    done
-    echo "Upgrade completed on the local node"
-
-* Make sure that all nodes are alive for the duration of the procedure.
-
-.. _enable-raft-topology-2025.1-forbidden-operations:
-
-Administrative operations that must not be running during the procedure
-=========================================================================
-
-Make sure that administrative operations will not be running while
-the procedure is in progress. In particular, you must abstain from:
-
-* :doc:`Cluster management procedures </operating-scylla/procedures/cluster-management/index>`
-  (adding, replacing, removing, decommissioning nodes, etc.).
-* Running :doc:`nodetool repair </operating-scylla/nodetool-commands/repair>`.
-* Running :doc:`nodetool checkAndRepairCdcStreams </operating-scylla/nodetool-commands/checkandrepaircdcstreams>`.
-* Any modifications of :doc:`authentication </operating-scylla/security/authentication>` and :doc:`authorization </operating-scylla/security/enable-authorization>` settings.
-* Any change of authorization via :doc:`CQL API </operating-scylla/security/authorization>`.
-* Schema changes.
-
-Running the procedure
-=====================
-
-.. warning::
-
-  Before proceeding, make sure that all the :ref:`prerequisites <enable-raft-topology-2025.1-prerequisites>` are met
-  and no :ref:`forbidden administrative operations <enable-raft-topology-2025.1-forbidden-operations>` will run
-  during the procedure. Failing to do so may put the cluster in an inconsistent state.
-
-#. Issue a POST HTTP request to the ``/storage_service/raft_topology/upgrade``
-   endpoint to any of the nodes in the cluster.
-   For example, you can do it with ``curl``:
-
-   .. code-block:: bash
-
-	   curl -X POST "http://127.0.0.1:10000/storage_service/raft_topology/upgrade"
-
-#. Wait until all nodes report that the procedure is complete. You can check
-   whether a node finished the procedure in one of two ways:
-
-   * By sending a HTTP ``GET`` request on the ``/storage_service/raft_topology/upgrade``
-     endpoint. For example, you can do it with ``curl``:
-
-     .. code-block:: bash
-      
-      curl -X GET "http://127.0.0.1:10000/storage_service/raft_topology/upgrade"
-
-     It will return a JSON string that will be equal to ``done`` after the procedure is complete on that node.
-
-   * By querying the ``upgrade_state`` column in the ``system.topology`` table.
-     You can use ``cqlsh`` to get the value of the column:
-
-     .. code-block:: bash
-      
-      cqlsh -e "select upgrade_state from system.topology"
-
-     The ``upgrade_state`` column should be set to ``done`` after the procedure
-     is complete on that node:
-
-After the procedure is complete on all nodes, wait at least one minute before
-issuing any topology changes in order to avoid data loss from writes that were
-started before the procedure.
-
-What if the procedure gets stuck?
-===================================
-
-If the procedure gets stuck at some point, first check the status of your cluster:
-
- If there are some nodes that are not alive, try to restart them.
- If all nodes are alive, ensure that the network is healthy and every node can reach all other nodes.
- If all nodes are alive and the network is healthy, perform
-  a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>` of the cluster.
-
-If none of the above solves the issue, perform :ref:`the Raft recovery procedure <recovery-procedure>`.
-During recovery, the cluster will switch back to the gossip-based topology management mechanism.
-
-After exiting recovery, you should retry enabling consistent topology updates using
-the procedure described in this document.
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/index.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/index.rst
@@ -1,17 +0,0 @@
-==========================================================
-Upgrade - ScyllaDB Enterprise 2024.x to ScyllaDB 2025.1
-==========================================================
-
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   ScyllaDB <upgrade-guide-from-2024.x-to-2025.1>
-   Enable Consistent Topology Updates <enable-consistent-topology>
-   Metrics <metric-update-2024.x-to-2025.1>
-
-* :doc:`Upgrade from ScyllaDB Enterprise 2024.x.y to ScyllaDB 2025.1.y <upgrade-guide-from-2024.x-to-2025.1>`
-* :doc:`Enable Consistent Topology Updates <enable-consistent-topology>`
-* :doc:`Metrics Update Between 2024.x and 2025.1 <metric-update-2024.x-to-2025.1>`
-
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/metric-update-2024.x-to-2025.1.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/metric-update-2024.x-to-2025.1.rst
@@ -1,74 +0,0 @@
-.. |SRC_VERSION| replace:: 2024.x
-.. |NEW_VERSION| replace:: 2025.1
-
-=======================================================================================
-Metrics Update Between |SRC_VERSION| and |NEW_VERSION|
-=======================================================================================
-
-ScyllaDB Enterprise |NEW_VERSION| Dashboards are available as part of the latest |mon_root|.
-
-
-New Metrics
------------
-
-The following metrics are new in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
-
-.. list-table::
-   :widths: 25 150
-   :header-rows: 1
-
-   * - Metric
-     - Description
-   * - scylla_alternator_batch_item_count
-     - The total number of items processed across all batches.
-   * - scylla_hints_for_views_manager_sent_bytes_total
-     - The total size of the sent hints (in bytes).
-   * - scylla_hints_manager_sent_bytes_total
-     - The total size of the sent hints (in bytes).
-   * - scylla_io_queue_activations
-     - The number of times the class was woken up from idle.
-   * - scylla_raft_apply_index
-     - The applied index.
-   * - scylla_raft_commit_index
-     - The commit index.
-   * - scylla_raft_log_last_index
-     - The index of the last log entry.
-   * - scylla_raft_log_last_term
-     - The term of the last log entry.
-   * - scylla_raft_snapshot_last_index
-     - The index of the snapshot.
-   * - scylla_raft_snapshot_last_term
-     - The term of the snapshot.
-   * - scylla_raft_state
-     - The current state: 0 - follower, 1 - candidate, 2 - leader
-   * - scylla_rpc_client_delay_samples
-     - The total number of delay samples.
-   * - scylla_rpc_client_delay_total
-     - The total delay in seconds.
-   * - scylla_storage_proxy_replica_received_hints_bytes_total
-     - The total size of hints and MV hints received by this node.
-   * - scylla_storage_proxy_replica_received_hints_total
-     - The number of hints and MV hints received by this node.
-
-Renamed Metrics
------------------
-
-The following metrics are renamed in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
-
-.. list-table::
-   :widths: 25 150
-   :header-rows: 1
-
-   * - 2024.2
-     - 2025.1
-   * - scylla_hints_for_views_manager_sent
-     - scylla_hints_for_views_manager_sent_total
-   * - scylla_hints_manager_sent
-     - scylla_hints_manager_sent_total
-   * - scylla_forward_service_requests_dispatched_to_other_nodes
-     - scylla_mapreduce_service_requests_dispatched_to_other_nodes
-   * - scylla_forward_service_requests_dispatched_to_own_shards
-     - scylla_mapreduce_service_requests_dispatched_to_own_shards
-   * - scylla_forward_service_requests_executed
-     - scylla_mapreduce_service_requests_executed
-  
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/upgrade-guide-from-2024.x-to-2025.1.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/upgrade-guide-from-2024.x-to-2025.1.rst
@@ -1,395 +0,0 @@
-.. |SCYLLA_NAME| replace:: ScyllaDB
-
-.. |SRC_VERSION| replace:: 2024.x
-.. |NEW_VERSION| replace:: 2025.1
-
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: ./#rollback-procedure
-
-.. |SCYLLA_METRICS| replace:: ScyllaDB Metrics Update - ScyllaDB 2024.x to 2025.1
-.. _SCYLLA_METRICS: ../metric-update-2024.x-to-2025.1
-
-=======================================================================================
-Upgrade from |SCYLLA_NAME| Enterprise |SRC_VERSION| to |SCYLLA_NAME| |NEW_VERSION|
-=======================================================================================
-
-This document is a step-by-step procedure for upgrading from |SCYLLA_NAME| |SRC_VERSION| 
-to |NEW_VERSION|, and rollback to version |SRC_VERSION| if required.
-
-This guide covers upgrading ScyllaDB on Red Hat Enterprise Linux (RHEL) CentOS, Debian, 
-and Ubuntu. See :doc:`OS Support by Platform and Version </getting-started/os-support>` 
-for information about supported versions.
-
-This guide also applies when you're upgrading ScyllaDB official image on EC2, 
-GCP, or Azure.
-
-
-Before You Upgrade ScyllaDB
-================================
-
-**Upgrade Your Driver**
-
-If you're using a :doc:`ScyllaDB driver </using-scylla/drivers/cql-drivers/index>`, 
-upgrade the driver before you upgrade ScyllaDB. The latest two versions of each driver 
-are supported.
-
-**Upgrade ScyllaDB Monitoring Stack**
-
-If you're using the ScyllaDB Monitoring Stack, verify that your Monitoring Stack 
-version supports the ScyllaDB version to which you want to upgrade. See 
-`ScyllaDB Monitoring Stack Support Matrix <https://monitoring.docs.scylladb.com/stable/reference/matrix.html>`_.
-  
-We recommend upgrading the Monitoring Stack to the latest version.
-
-**Check Feature Updates**
-
-See the ScyllaDB Release Notes for the latest updates. The Release Notes are published 
-at the `ScyllaDB Community Forum <https://forum.scylladb.com/>`_.
-
-Upgrade Procedure
-=================
-
-A ScyllaDB upgrade is a rolling procedure that does **not** require full cluster shutdown.
-For each of the nodes in the cluster, you will:
-
-* Check that the cluster's schema is synchronized
-* Drain the node and backup the data
-* Backup the configuration file
-* Stop ScyllaDB
-* Download and install new ScyllaDB packages
-* Start ScyllaDB
-* Validate that the upgrade was successful
-
-
-.. caution:: 
-
-   Apply the procedure **serially** on each node. Do not move to the next node before 
-   validating that the node you upgraded is up and running the new version.
-
-**During** the rolling upgrade, it is highly recommended:
-
-* Not to use the new |NEW_VERSION| features.
-* Not to run administration functions, like repairs, refresh, rebuild, or add or remove 
-  nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/>`_ for suspending 
-  ScyllaDB Manager's scheduled or running repairs.
-* Not to apply schema changes.
-
-**After** the upgrade, you may need to enable consistent topology updates.
-See :ref:`After Upgrading Every Node <upgrade-2024.x-2025.1-after-upgrading-nodes>` for details.
-
-
-Upgrade Steps
-=============
-
-Check the cluster schema
-------------------------
-Make sure that all nodes have the schema synchronized before upgrade. The upgrade 
-procedure will fail if there is a schema disagreement between nodes.
-
-.. code:: sh
-
-   nodetool describecluster
-
-Backup the data
-----------------------------------
-
-Before any major procedure, like an upgrade, it is recommended to backup all the data 
-to an external device. 
-We recommend using `ScyllaDB Manager <https://manager.docs.scylladb.com/stable/backup/index.html>`_
-to create backups.
-
-Alternatively, you can use the ``nodetool snapshot`` command. For **each** node in the cluster, run 
-the following command:
-
-.. code:: sh
-
-   nodetool drain
-   nodetool snapshot
-
-Take note of the directory name that nodetool gives you, and copy all the directories 
-having that name under ``/var/lib/scylla`` to a backup device.
-
-When the upgrade is completed on all nodes, remove the snapshot with the 
-``nodetool clearsnapshot -t <snapshot>`` command to prevent running out of space.
-
-Backup the configuration file
------------------------------
-
-Back up the ``scylla.yaml`` configuration file and the ScyllaDB packages
-in case you need to rollback the upgrade.
-
-.. tabs::
-
-   .. group-tab:: Debian/Ubuntu
-
-      .. code:: sh
-         
-         sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup
-         sudo cp /etc/apt/sources.list.d/scylla.list ~/scylla.list-backup
-
-   .. group-tab:: RHEL/CentOS
-
-      .. code:: sh
-         
-         sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup
-         sudo cp /etc/yum.repos.d/scylla.repo ~/scylla.repo-backup
-
-Gracefully stop the node
------------------------
-
-.. code:: sh
-
-   sudo service scylla-server stop
-
-Download and install the new release
------------------------------------
-
-Before upgrading, check what version you are running now using ``scylla --version``. 
-You should use the same version as this version in case you want to |ROLLBACK|_ 
-the upgrade. 
-
-.. tabs::
-
-   .. group-tab:: Debian/Ubuntu
-
-        #. Update the ScyllaDB deb repo to |NEW_VERSION|.
-
-            .. code-block:: console
-
-               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/scylla-2025.1.list
-
-        #. Install the new ScyllaDB version:
-
-            .. code-block:: console
-
-               sudo apt-get clean all
-               sudo apt-get update
-               sudo apt-get dist-upgrade scylla
-
-        Answer ‘y’ to the first two questions.
-
-   .. group-tab:: RHEL/CentOS
-
-        #. Update the ScyllaDB rpm repo to |NEW_VERSION|.
-
-            .. code-block:: console
-
-               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/scylla-2025.1.repo
-
-        #. Install the new ScyllaDB version:
-
-            .. code:: sh
-
-               sudo yum clean all
-               sudo yum update scylla\* -y
-
-   .. group-tab:: EC2/GCP/Azure Ubuntu Image
-      
-      If you’re using the ScyllaDB official image (recommended), see
-      the **Debian/Ubuntu** tab for upgrade instructions. If you’re using your
-      own image and have installed ScyllaDB packages for Ubuntu or Debian,
-      you need to apply an extended upgrade procedure:
-      
-      #. Update the ScyllaDB deb repo (see the **Debian/Ubuntu** tab).
-      #. Install the new ScyllaDB version with the additional 
-         ``scylla-machine-image`` package:
-
-        .. code::
-         
-         sudo apt-get clean all
-         sudo apt-get update
-         sudo apt-get dist-upgrade scylla
-         sudo apt-get dist-upgrade scylla-machine-image
-
-      #. Run ``scylla_setup`` without running ``io_setup``.
-      #. Run ``sudo /opt/scylladb/scylla-machine-image/scylla_cloud_io_setup``.
-
-If you need JMX server, see
-:doc:`Install scylla-jmx Package </getting-started/installation-common/install-jmx>`
-and get new version. 
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo service scylla-server start
-
-Validate
--------
-#. Check cluster status with ``nodetool status`` and make sure **all** nodes, including 
-   the one you just upgraded, are in ``UN`` status.
-#. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` 
-   to check the ScyllaDB version. Validate that the version matches the one you upgraded to.
-#. Check scylla-server log (using ``journalctl _COMM=scylla``) and ``/var/log/syslog`` 
-   to validate there are no new errors in the log.
-#. Check again after two minutes to validate that no new issues are introduced.
-
-Once you are sure the node upgrade was successful, move to the next node in the cluster.
-
-.. _upgrade-2024.x-2025.1-after-upgrading-nodes:
-
-After Upgrading Every Node
-===============================
-
-This step applies if:
-
-* You're upgrading from ScyllaDB Enterprise **2024.1** to ScyllaDB 2025.1.
-* You previously upgraded from 2024.1 to 2024.2 without enabling consistent
-  topology updates (see the `2024.2 upgrade guide <https://enterprise.docs.scylladb.com/branch-2024.2/upgrade/upgrade-enterprise/upgrade-guide-from-2024.1-to-2024.2/enable-consistent-topology.html>`_
-  for reference).
-
-After you have upgraded every node, you must enable the Raft-based consistent
-topology updates feature. See 
-:doc:`Enable Consistent Topology Updates </upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology>`
-for instructions.
-
-Rollback Procedure
-==================
-
-.. warning::
-
-   The rollback procedure can only be applied if some nodes have **not** been upgraded 
-   to |NEW_VERSION| yet. As soon as the last node in the rolling upgrade procedure is 
-   started with |NEW_VERSION|, rollback becomes impossible. At that point, the only way 
-   to restore a cluster to |SRC_VERSION| is by restoring it from backup.
-
-The following procedure describes a rollback from |SCYLLA_NAME| |NEW_VERSION|.x to 
-|SRC_VERSION|.y. Apply this procedure if an upgrade from |SRC_VERSION| to |NEW_VERSION| 
-failed before completing on all nodes.
-
-* Use this procedure only for nodes you upgraded to |NEW_VERSION|.
-* Execute the commands one node at a time, moving to the next node
-  only after the rollback procedure is completed successfully.
-
-ScyllaDB rollback is a rolling procedure that does **not** require a full cluster shutdown.
-For each of the nodes you rollback to |SRC_VERSION|, you will:
-
-* Drain the node and stop ScyllaDB
-* Retrieve the old ScyllaDB packages
-* Restore the configuration file
-* Reload systemd configuration
-* Restart ScyllaDB
-* Validate the rollback success
-
-Apply the procedure **serially** on each node. Do not move to the next node
-before validating that the rollback was successful and the node is up and
-running the old version.
-
-Rollback Steps
-==============
-
-Drain and gracefully stop the node
----------------------------------
-
-.. code:: sh
-
-   nodetool drain
-   sudo service scylla-server stop
-
-Download and install the old release
------------------------------------
-
-.. tabs::
-
-   .. group-tab:: Debian/Ubuntu
-
-        #. Remove the old repo file.
-
-            .. code:: sh
-
-               sudo rm -rf /etc/apt/sources.list.d/scylla.list
-
-        
-        #. Restore the |SRC_VERSION| packages backed up during the upgrade.
-
-            .. code:: sh
-
-               sudo cp ~/scylla.list-backup /etc/apt/sources.list.d/scylla.list
-               sudo chown root.root /etc/apt/sources.list.d/scylla.list
-               sudo chmod 644 /etc/apt/sources.list.d/scylla.list
-
-        #. Install:
-
-            .. code-block::
-
-               sudo apt-get update
-               sudo apt-get remove scylla\* -y
-               sudo apt-get install scylla-enterprise
-
-        Answer ‘y’ to the first two questions.
-
-   .. group-tab:: RHEL/CentOS
-
-        #. Remove the old repo file.
-
-            .. code:: sh
-
-               sudo rm -rf /etc/yum.repos.d/scylla.repo
-
-        #. Restore the |SRC_VERSION| packages backed up during the upgrade procedure.
-
-            .. code:: sh
-
-               sudo cp ~/scylla.repo-backup /etc/yum.repos.d/scylla.repo
-               sudo chown root.root /etc/yum.repos.d/scylla.repo
-               sudo chmod 644 /etc/yum.repos.d/scylla.repo
-
-        #. Install:
-
-            .. code:: console
-
-               sudo yum clean all
-               sudo yum remove scylla\*
-               sudo yum install scylla-enterprise
-
-   .. group-tab:: EC2/GCP/Azure Ubuntu Image
-
-      If you’re using the ScyllaDB official image (recommended), see the **Debian/Ubuntu** 
-      tab for upgrade instructions.
-
-      If you’re using your own image and installed ScyllaDB packages for Ubuntu or Debian, 
-      you need to additionally restore the ``scylla-machine-image`` package.
-
-      #. Restore the |SRC_VERSION| packages backed up during the upgrade
-         (see the **Debian/Ubuntu** tab).
-      #. Install:
-
-            .. code-block::
-
-               sudo apt-get update
-               sudo apt-get remove scylla\* -y
-               sudo apt-get install scylla-enterprise 
-               sudo apt-get install scylla-enterpraise-machine-image
-
-        Answer ‘y’ to the first two questions.
-
-Restore the configuration file
------------------------------
-
-.. code:: sh
-
-   sudo rm -rf /etc/scylla/scylla.yaml
-   sudo cp /etc/scylla/scylla.yaml-backup /etc/scylla/scylla.yaml
-
-Reload systemd configuration
----------------------------
-
-You must reload the unit file if the systemd unit file is changed.
-
-.. code:: sh
-
-   sudo systemctl daemon-reload
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo service scylla-server start
-
-Validate
--------
-
-Check the upgrade instructions above for validation. Once you are sure the node rollback 
-is successful, move to the next node in the cluster.
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/index.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/index.rst
@@ -0,0 +1,13 @@
+==========================================================
+Upgrade - ScyllaDB 2025.1 to ScyllaDB 2025.2
+==========================================================
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   Upgrade ScyllaDB <upgrade-guide-from-2025.1-to-2025.2>
+   Metrics Update <metric-update-2025.1-to-2025.2>
+
+* :doc:`Upgrade from ScyllaDB 2025.1.x to ScyllaDB 2025.2.y <upgrade-guide-from-2025.1-to-2025.2>`
+* :doc:`Metrics Update Between 2025.1 and 2025.2 <metric-update-2025.1-to-2025.2>`
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/metric-update-2025.1-to-2025.2.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/metric-update-2025.1-to-2025.2.rst
@@ -0,0 +1,61 @@
+.. |SRC_VERSION| replace:: 2025.1
+.. |NEW_VERSION| replace:: 2025.2
+
+Metrics Update Between |SRC_VERSION| and |NEW_VERSION|
+================================================================
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+ScyllaDB |NEW_VERSION| Dashboards are available as part of the latest |mon_root|.
+
+New Metrics
+------------
+
+The following metrics are new in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
+
+
+.. list-table::
+   :widths: 25 150
+   :header-rows: 1
+
+   * - Metric
+     - Description
+   * - scylla_alternator_batch_item_count_histogram
+     - A histogram of the number of items in a batch request.
+   * - scylla_database_total_view_updates_failed_pairing
+     - Total number of view updates for which we failed base/view pairing.
+   * - scylla_group_name_cross_rack_collocations
+     - The number of co-locating migrations that move replica across racks.
+   * - scylla_network_bytes_received
+     - The number of bytes received from network sockets.
+   * - scylla_network_bytes_sent
+     - The number of bytes written to network sockets.
+   * - scylla_reactor_awake_time_ms_total
+     - Total reactor awake time (wall_clock).
+   * - scylla_reactor_cpu_used_time_ms
+     - Total reactor thread CPU time (from CLOCK_THREAD_CPUTIME).
+   * - scylla_reactor_sleep_time_ms_total
+     - Total reactor sleep time (wall clock).
+   * - scylla_sstable_compression_dicts_total_live_memory_bytes
+     - Total amount of memory consumed by SSTable compression dictionaries in RAM.
+   * - scylla_transport_connections_blocked
+     - Holds an incrementing counter with the CQL connections that were blocked
+       before being processed due to threshold configured via
+       uninitialized_connections_semaphore_cpu_concurrency.Blocks are normal
+       when we have multiple connections initialized at once. If connectionsare
+       timing out and this value is high it indicates either connections storm
+       or unusually slow processing.
+   * - scylla_transport_connections_shed
+     - Holds an incrementing counter with the CQL connections that were shed
+       due to concurrency semaphore timeout (threshold configured via
+       uninitialized_connections_semaphore_cpu_concurrency). This typically can
+       happen during connection.
+   
+  
+
+
+
+
+
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/upgrade-guide-from-2025.1-to-2025.2.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-2025.1-to-2025.2/upgrade-guide-from-2025.1-to-2025.2.rst
@@ -1,16 +1,16 @@
 .. |SCYLLA_NAME| replace:: ScyllaDB

-.. |SRC_VERSION| replace:: 6.2
-.. |NEW_VERSION| replace:: 2025.1
+.. |SRC_VERSION| replace:: 2025.1
+.. |NEW_VERSION| replace:: 2025.2

 .. |ROLLBACK| replace:: rollback
 .. _ROLLBACK: ./#rollback-procedure

-.. |SCYLLA_METRICS| replace:: ScyllaDB Metrics Update - ScyllaDB 6.2 to 2025.1
-.. _SCYLLA_METRICS: ../metric-update-6.2-to-2025.1
+.. |SCYLLA_METRICS| replace:: ScyllaDB Metrics Update - ScyllaDB 2025.1 to 2025.2
+.. _SCYLLA_METRICS: ../metric-update-2025.1-to-2025.2

 =======================================================================================
-Upgrade from |SCYLLA_NAME| Open Source |SRC_VERSION| to |SCYLLA_NAME| |NEW_VERSION|
+Upgrade from |SCYLLA_NAME| |SRC_VERSION| to |SCYLLA_NAME| |NEW_VERSION|
 =======================================================================================

 This document describes a step-by-step procedure for upgrading from |SCYLLA_NAME| |SRC_VERSION| 
@@ -20,7 +20,7 @@ This guide covers upgrading ScyllaDB on Red Hat Enterprise Linux (RHEL), CentOS,
 and Ubuntu. See :doc:`OS Support by Platform and Version </getting-started/os-support>` 
 for information about supported versions.

-It also applies when using ScyllaDB official image on EC2, GCP, or Azure.
+It also applies when using the ScyllaDB official image on EC2, GCP, or Azure.

 Before You Upgrade ScyllaDB
 ==============================
@@ -28,7 +28,7 @@ Before You Upgrade ScyllaDB
 **Upgrade Your Driver**

 If you're using a :doc:`ScyllaDB driver </using-scylla/drivers/cql-drivers/index>`, 
-upgrade the driver before you upgrade ScyllaDB. The latest two versions of each driver 
+upgrade the driver before upgrading ScyllaDB. The latest two versions of each driver 
 are supported.

 **Upgrade ScyllaDB Monitoring Stack**
@@ -44,6 +44,14 @@ We recommend upgrading the Monitoring Stack to the latest version.
 See the ScyllaDB Release Notes for the latest updates. The Release Notes are published 
 at the `ScyllaDB Community Forum <https://forum.scylladb.com/>`_.

+.. note::
+
+   If you previously upgraded from 2024.x to 2025.1 without enabling consistent
+   topology updates, ensure you enable the feature before you upgrade to 2025.2.
+   For instructions, see
+   `Enable Consistent Topology Updates <https://docs.scylladb.com/manual/branch-2025.1/upgrade/upgrade-guides/upgrade-guide-from-2024.x-to-2025.1/enable-consistent-topology.html>`_
+   in the upgrade guide for version 2025.1.
+
 Upgrade Procedure
 =================

@@ -150,7 +158,7 @@ You should take note of the current version in case you want to |ROLLBACK|_ the

            .. code-block:: console

-               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/scylla-2025.1.list
+               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/scylla-2025.2.list

        #. Install the new ScyllaDB version:

@@ -168,7 +176,7 @@ You should take note of the current version in case you want to |ROLLBACK|_ the

            .. code-block:: console

-               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/scylla-2025.1.repo
+               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/scylla-2025.2.repo

        #. Install the new ScyllaDB version:

--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-6.2-to-2025.1/index.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-6.2-to-2025.1/index.rst
@@ -1,13 +0,0 @@
-==========================================================
-Upgrade - ScyllaDB Open Source 6.2 to ScyllaDB 2025.1 
-==========================================================
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   Upgrade ScyllaDB <upgrade-guide-from-6.2-to-2025.1>
-   Metrics Update <metric-update-6.2-to-2025.1>
-
-* :doc:`Upgrade from ScyllaDB Open Source 6.2 .x to ScyllaDB 2025.1.y <upgrade-guide-from-6.2-to-2025.1>`
-* :doc:`Metrics Update Between 6.2 and 2025.1 <metric-update-6.2-to-2025.1>`
--- a/docs/upgrade/upgrade-guides/upgrade-guide-from-6.2-to-2025.1/metric-update-6.2-to-2025.1.rst
+++ b/docs/upgrade/upgrade-guides/upgrade-guide-from-6.2-to-2025.1/metric-update-6.2-to-2025.1.rst
@@ -1,54 +0,0 @@
-.. |SRC_VERSION| replace:: 6.2
-.. |NEW_VERSION| replace:: 2025.1
-
-Metrics Update Between |SRC_VERSION| and |NEW_VERSION|
-================================================================
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-ScyllaDB |NEW_VERSION| Dashboards are available as part of the latest |mon_root|.
-
-New Metrics
------------
-
-The following metrics are new in ScyllaDB |NEW_VERSION| compared to |SRC_VERSION|:
-
-
-.. list-table::
-   :widths: 25 150
-   :header-rows: 1
-
-   * - Metric
-     - Description
-   * - scylla_alternator_rcu_total 
-     - The total number of consumed read units, counted as half units.
-   * - scylla_alternator_wcu_total
-     - The total number of consumed write units, counted as half units.
-   * - scylla_rpc_compression_bytes_received
-     - The bytes read from RPC connections after decompression.
-   * - scylla_rpc_compression_bytes_sent
-     - The bytes written to RPC connections before compression.
-   * - scylla_rpc_compression_compressed_bytes_received
-     - The bytes read from RPC connections before decompression.
-   * - scylla_rpc_compression_compressed_bytes_sent
-     - The bytes written to RPC connections after compression.
-   * - scylla_rpc_compression_compression_cpu_nanos
-     - The nanoseconds spent on compression.
-   * - scylla_rpc_compression_decompression_cpu_nanos
-     - The nanoseconds spent on decompression.
-   * - scylla_rpc_compression_messages_received
-     - The RPC messages received.
-   * - scylla_rpc_compression_messages_sent	
-     - The RPC messages sent.
-
-
-
-
-  
-
-
-
-
-
--- a/docs/using-scylla/mig-tool-review.rst
+++ b/docs/using-scylla/mig-tool-review.rst
@@ -8,10 +8,7 @@ such as Apache Cassandra, or from other ScyllaDB clusters:
 * From SSTable to SSTable
    - Using nodetool refresh, :ref:`Load and Stream <nodetool-refresh-load-and-stream>` option.
    - On a large scale, it requires tooling to upload / transfer files from location to location.
-* From SSTable to CQL.
-    - :doc:`sstableloader</operating-scylla/admin-tools/sstableloader/>`
 * From CQL to CQL
-    - `Spark Migrator <https://github.com/scylladb/scylla-migrator>`_.  The Spark migrator allows you to easily transform the data before pushing it to the destination DB.
-
+    - `Spark Migrator <https://migrator.docs.scylladb.com/>`_.  The Spark migrator allows you to easily transform the data before pushing it to the destination DB.
 * From DynamoDB to ScyllaDB Alternator
-    - `Spark Migrator <https://github.com/scylladb/scylla-migrator>`_.  The Spark migrator allows you to easily transform the data before pushing it to the destination DB.
+    - `Spark Migrator <https://migrator.docs.scylladb.com/>`_.  The Spark migrator allows you to easily transform the data before pushing it to the destination DB.
--- a/ent/encryption/encryption.cc
+++ b/ent/encryption/encryption.cc
@@ -472,6 +472,14 @@ public:
            for (auto&& [id, h] : _per_thread_kmip_host_cache[this_shard_id()]) {
                co_await h->disconnect();
            }
+            static auto stop_all = [](auto&& cache) -> future<> {
+                for (auto& [k, host] : cache) {
+                    co_await host->stop();
+                }
+            };
+            co_await stop_all(_per_thread_kms_host_cache[this_shard_id()]);
+            co_await stop_all(_per_thread_gcp_host_cache[this_shard_id()]);
+
            _per_thread_provider_cache[this_shard_id()].clear();
            _per_thread_system_key_cache[this_shard_id()].clear();
            _per_thread_kmip_host_cache[this_shard_id()].clear();
--- a/ent/encryption/gcp_host.cc
+++ b/ent/encryption/gcp_host.cc
@@ -97,6 +97,7 @@ public:
    ~impl() = default;

    future<> init();
+    future<> stop();
    const host_options& options() const {
        return _options;
    }
@@ -827,6 +828,11 @@ future<> encryption::gcp_host::impl::init() {
    _initialized = true;
 }

+future<> encryption::gcp_host::impl::stop() {
+    co_await _attr_cache.stop();
+    co_await _id_cache.stop();
+}
+
 std::tuple<std::string, std::string> encryption::gcp_host::impl::parse_key(std::string_view spec) {
    auto i = spec.find_last_of('/');
    if (i == std::string_view::npos) {
@@ -989,6 +995,10 @@ future<> encryption::gcp_host::init() {
    return _impl->init();
 }

+future<> encryption::gcp_host::stop() {
+    return _impl->stop();
+}
+
 const encryption::gcp_host::host_options& encryption::gcp_host::options() const {
    return _impl->options();
 }
--- a/ent/encryption/gcp_host.hh
+++ b/ent/encryption/gcp_host.hh
@@ -65,6 +65,8 @@ public:
    ~gcp_host();

    future<> init();
+    future<> stop();
+
    const host_options& options() const;

    struct option_override : public t_credentials_source<std::optional<std::string>> {
--- a/ent/encryption/kmip_host.cc
+++ b/ent/encryption/kmip_host.cc
@@ -724,9 +724,11 @@ future<> kmip_host::impl::connect() {
 }

 future<> kmip_host::impl::disconnect() {
-    return do_for_each(_options.hosts, [this](const sstring& host) {
+    co_await do_for_each(_options.hosts, [this](const sstring& host) {
        return clear_connections(host);
    });
+    co_await _attr_cache.stop();
+    co_await _id_cache.stop();
 }

 static unsigned from_str(unsigned (*f)(char*, int, int*), const sstring& s, const sstring& what) {
@@ -867,8 +869,8 @@ future<std::vector<kmip_host::id_type>> kmip_host::impl::find_matching_keys(cons

    auto [kdl_attrs, crypt_alg] = make_attributes(info, false);

-    static const char kmip_tag_cryptographic_length[] = KMIP_TAG_CRYPTOGRAPHIC_LENGTH_STR;
-    static const char kmip_tag_cryptographic_usage_mask[] = KMIP_TAG_CRYPTOGRAPHIC_USAGE_MASK_STR;
+    static const char kmip_tag_cryptographic_length[] = KMIP_TAGSTR_CRYPTOGRAPHIC_LENGTH;
+    static const char kmip_tag_cryptographic_usage_mask[] = KMIP_TAGSTR_CRYPTOGRAPHIC_USAGE_MASK;

    // #1079. Query mask apparently ignores things like cryptographic 
    // attribute set of options, instead we must specify the query 
--- a/ent/encryption/kms_host.cc
+++ b/ent/encryption/kms_host.cc
@@ -154,6 +154,8 @@ public:
    ~impl() = default;

    future<> init();
+    future<> stop();
+
    const host_options& options() const {
        return _options;
    }
@@ -826,6 +828,11 @@ future<> encryption::kms_host::impl::init() {
    _initialized = true;
 }

+future<> encryption::kms_host::impl::stop() {
+    co_await _attr_cache.stop();
+    co_await _id_cache.stop();
+}
+
 future<encryption::kms_host::impl::key_and_id_type> encryption::kms_host::impl::create_key(const attr_cache_key& k) {
    auto& master_key = k.master_key;
    auto& aws_assume_role_arn = k.aws_assume_role_arn;
@@ -988,6 +995,10 @@ future<> encryption::kms_host::init() {
    return _impl->init();
 }

+future<> encryption::kms_host::stop() {
+    return _impl->stop();
+}
+
 const encryption::kms_host::host_options& encryption::kms_host::options() const {
    return _impl->options();
 }
--- a/ent/encryption/kms_host.hh
+++ b/ent/encryption/kms_host.hh
@@ -63,6 +63,8 @@ public:
    ~kms_host();

    future<> init();
+    future<> stop();
+
    const host_options& options() const;

    struct option_override {
--- a/generic_server.cc
+++ b/generic_server.cc
@@ -241,20 +241,22 @@ server::server(const sstring& server_name, logging::logger& logger, config cfg)
    , _logger{logger}
    , _gate("generic_server::server")
    , _conns_cpu_concurrency(cfg.uninitialized_connections_semaphore_cpu_concurrency)
-    , _prev_conns_cpu_concurrency(_conns_cpu_concurrency)
-    , _conns_cpu_concurrency_semaphore(_conns_cpu_concurrency, named_semaphore_exception_factory{"connections cpu concurrency semaphore"})
-{
-    _conns_cpu_concurrency.observe([this] (const uint32_t &concurrency) {
+    , _conns_cpu_concurrency_observer(_conns_cpu_concurrency.observe([this] (const uint32_t &concurrency) {
        if (concurrency == _prev_conns_cpu_concurrency) {
            return;
        }
+        _logger.info("Updating uninitialized_connections_semaphore_cpu_concurrency from {} to {} due to config update", _prev_conns_cpu_concurrency, concurrency);
+
        if (concurrency > _prev_conns_cpu_concurrency) {
            _conns_cpu_concurrency_semaphore.signal(concurrency - _prev_conns_cpu_concurrency);
        } else {
            _conns_cpu_concurrency_semaphore.consume(_prev_conns_cpu_concurrency - concurrency);
        }
        _prev_conns_cpu_concurrency = concurrency;
-    });
+    }))
+    , _prev_conns_cpu_concurrency(_conns_cpu_concurrency)
+    , _conns_cpu_concurrency_semaphore(_conns_cpu_concurrency, named_semaphore_exception_factory{"connections cpu concurrency semaphore"})
+{
 }

 server::~server()
--- a/generic_server.hh
+++ b/generic_server.hh
@@ -122,6 +122,7 @@ protected:
    shared_ptr<seastar::tls::server_credentials> _credentials;
 private:
    utils::updateable_value<uint32_t> _conns_cpu_concurrency;
+    utils::observer<uint32_t> _conns_cpu_concurrency_observer;
    uint32_t _prev_conns_cpu_concurrency;
    named_semaphore _conns_cpu_concurrency_semaphore;
 public:
--- a/gms/gossiper.cc
+++ b/gms/gossiper.cc
@@ -959,8 +959,8 @@ future<> gossiper::failure_detector_loop_for_node(locator::host_id host_id, gene
    auto diff = gossiper::clk::duration(0);
    auto echo_interval = std::chrono::seconds(2);
    auto max_duration = echo_interval + std::chrono::milliseconds(_gcfg.failure_detector_timeout_ms());
-    auto node = _address_map.get(host_id);
    while (is_enabled()) {
+        auto node = _address_map.find(host_id);
        bool failed = false;
        try {
            logger.debug("failure_detector_loop: Send echo to node {}/{}, status = started", host_id, node);
--- a/idl-compiler.py
+++ b/idl-compiler.py
@@ -196,6 +196,16 @@ template<typename Input>
  return static_cast<{name}>(deserialize(buf, std::type_identity<{self.underlying_type}>()));
 }}""")

+    def serializer_skip_impl(self, cout):
+        name = self.ns_qualified_name()
+
+        fprintln(cout, f"""
+{self.template_declaration}
+template<typename Input>
+void serializer<{name}>::skip(Input& buf) {{
+  buf.skip(sizeof({self.underlying_type}));
+}}""")
+

 class Attributes(ASTBase):
    ''' AST node for representing class and field attributes.
@@ -843,6 +853,7 @@ def handle_enum(enum, hout, cout):

    enum.serializer_write_impl(cout)
    enum.serializer_read_impl(cout)
+    enum.serializer_skip_impl(cout)


 def join_template(template_params):
--- a/idl/CMakeLists.txt
+++ b/idl/CMakeLists.txt
@@ -59,6 +59,7 @@ set(idl_headers
  replica_exception.idl.hh
  per_partition_rate_limit_info.idl.hh
  position_in_partition.idl.hh
+  full_position.idl.hh
  experimental/broadcast_tables_lang.idl.hh
  join_node.idl.hh
  utils.idl.hh
--- a/idl/full_position.idl.hh
+++ b/idl/full_position.idl.hh
@@ -0,0 +1,14 @@
+/*
+ * Copyright 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "idl/position_in_partition.idl.hh"
+
+struct full_position {
+    partition_key partition;
+    position_in_partition position;
+};
--- a/idl/mutation.idl.hh
+++ b/idl/mutation.idl.hh
@@ -11,6 +11,7 @@

 #include "idl/uuid.idl.hh"
 #include "idl/keys.idl.hh"
+#include "idl/position_in_partition.idl.hh"

 class counter_id final {
    utils::UUID uuid();
@@ -114,6 +115,12 @@ class range_tombstone [[writable]] {
    bound_kind end_kind [[version 1.3]] = bound_kind::incl_end;
 };

+class range_tombstone_change stub [[writable]] {
+    clustering_key_prefix key;
+    bound_weight weight; // we are trying to move away from bound_kind
+    tombstone tomb;
+};
+
 class mutation_partition stub [[writable]] {
    tombstone tomb;
    row static_row;
@@ -168,3 +175,7 @@ class mutation_fragment stub [[writable]] {
                   partition_start, partition_end> fragment;
 };

+class mutation_fragment_v2 stub [[writable]] {
+    std::variant<clustering_row, static_row, range_tombstone_change,
+                   partition_start, partition_end> fragment;
+};
--- a/idl/position_in_partition.idl.hh
+++ b/idl/position_in_partition.idl.hh
@@ -26,8 +26,3 @@ class position_in_partition {
    bound_weight get_bound_weight();
    std::optional<clustering_key_prefix> get_clustering_key_prefix();
 };
-
-struct full_position {
-    partition_key partition;
-    position_in_partition position;
-};
--- a/idl/storage_proxy.idl.hh
+++ b/idl/storage_proxy.idl.hh
@@ -22,6 +22,7 @@
 #include "idl/keys.idl.hh"
 #include "idl/uuid.idl.hh"
 #include "idl/storage_service.idl.hh"
+#include "idl/full_position.idl.hh"

 verb [[with_client_info, with_timeout, one_way]] mutation (frozen_mutation fm [[ref]], inet_address_vector_replica_set forward [[ref]], gms::inet_address reply_to, unsigned shard, uint64_t response_id, std::optional<tracing::trace_info> trace_info [[ref]] [[version 1.3.0]], db::per_partition_rate_limit::info rate_limit_info [[version 5.1.0]], service::fencing_token fence [[version 5.4.0]], host_id_vector_replica_set forward_id [[ref, version 6.3.0]], locator::host_id reply_to_id [[version 6.3.0]]);
 verb [[with_client_info, one_way]] mutation_done (unsigned shard, uint64_t response_id, db::view::update_backlog backlog [[version 3.1.0]]);
--- a/init.cc
+++ b/init.cc
@@ -13,6 +13,7 @@

 #include <boost/algorithm/string/trim.hpp>
 #include <seastar/core/coroutine.hh>
+#include "sstables/sstable_compressor_factory.hh"

 logging::logger startlog("init");

@@ -129,3 +130,17 @@ void service_set::add(std::any value) {
 std::any service_set::find(const std::type_info& type) const {
    return _impl->find(type);
 }
+
+// Placed here to avoid dependency on db::config in compress.cc,
+// where the rest of default_sstable_compressor_factory_config is.
+auto default_sstable_compressor_factory_config::from_db_config(
+    const db::config& cfg,
+    std::span<const unsigned> numa_config) -> self
+{
+    return self {
+        .register_metrics = true,
+        .enable_writing_dictionaries = cfg.sstable_compression_dictionaries_enable_writing,
+        .memory_fraction_starting_at_which_we_stop_writing_dicts = cfg.sstable_compression_dictionaries_memory_budget_fraction,
+        .numa_config{numa_config.begin(), numa_config.end()},
+    };
+}
--- a/locator/production_snitch_base.cc
+++ b/locator/production_snitch_base.cc
@@ -112,19 +112,19 @@ void production_snitch_base::parse_property_file(std::string contents) {

 [[noreturn]]
 void production_snitch_base::throw_double_declaration(const sstring& key) const {
-    logger().error("double \"{}\" declaration in {}", key, _prop_file_name);
+    logger().warn("double \"{}\" declaration in {}", key, _prop_file_name);
    throw bad_property_file_error();
 }

 [[noreturn]]
 void production_snitch_base::throw_bad_format(const sstring& line) const {
-    logger().error("Bad format in properties file {}: {}", _prop_file_name, line);
+    logger().warn("Bad format in properties file {}: {}", _prop_file_name, line);
    throw bad_property_file_error();
 }

 [[noreturn]]
 void production_snitch_base::throw_incomplete_file() const {
-    logger().error("Property file {} is incomplete. Some obligatory fields are missing.", _prop_file_name);
+    logger().warn("Property file {} is incomplete. Some obligatory fields are missing.", _prop_file_name);
    throw bad_property_file_error();
 }

--- a/locator/tablets.cc
+++ b/locator/tablets.cc
@@ -221,6 +221,18 @@ std::optional<tablet_replica> get_leaving_replica(const tablet_info& tinfo, cons
    return *leaving.begin();
 }

+bool is_post_cleanup(tablet_replica replica, const tablet_info& tinfo, const tablet_transition_info& trinfo) {
+    if (replica == locator::get_leaving_replica(tinfo, trinfo)) {
+        // we do tablet cleanup on the leaving replica in the `cleanup` stage, after which there is only the `end_migration` stage.
+        return trinfo.stage == locator::tablet_transition_stage::end_migration;
+    }
+    if (replica == trinfo.pending_replica) {
+        // we do tablet cleanup on the pending replica in the `cleanup_target` stage, after which there is only the `revert_migration` stage.
+        return trinfo.stage == locator::tablet_transition_stage::revert_migration;
+    }
+    return false;
+}
+
 tablet_replica_set get_new_replicas(const tablet_info& tinfo, const tablet_migration_info& mig) {
    return replace_replica(tinfo.replicas, mig.src, mig.dst);
 }
--- a/locator/tablets.hh
+++ b/locator/tablets.hh
@@ -291,6 +291,10 @@ struct tablet_transition_info {
 // Returns the leaving replica for a given transition.
 std::optional<tablet_replica> get_leaving_replica(const tablet_info&, const tablet_transition_info&);

+// True if the tablet is transitioning and it's in a stage that follows the stage
+// where we clean up the tablet on the given replica.
+bool is_post_cleanup(tablet_replica replica, const tablet_info& tinfo, const tablet_transition_info& trinfo);
+
 /// Represents intention to move a single tablet replica from src to dst.
 struct tablet_migration_info {
    locator::tablet_transition_kind kind;
--- a/main.cc
+++ b/main.cc
@@ -1236,17 +1236,19 @@ sharded<locator::shared_token_metadata> token_metadata;
            auto stop_lang_man = defer_verbose_shutdown("lang manager", [] { langman.invoke_on_all(&lang::manager::stop).get(); });
            langman.invoke_on_all(&lang::manager::start).get();

-            auto sstable_compressor_factory = make_sstable_compressor_factory(sstable_compressor_factory::config{
-                .register_metrics = true,
-                .enable_writing_dictionaries = cfg->sstable_compression_dictionaries_enable_writing,
-                .memory_fraction_starting_at_which_we_stop_writing_dicts = cfg->sstable_compression_dictionaries_memory_budget_fraction,
+            sharded<default_sstable_compressor_factory> sstable_compressor_factory;
+            auto numa_groups = local_engine->smp().shard_to_numa_node_mapping();
+            sstable_compressor_factory.start(sharded_parameter(default_sstable_compressor_factory::config::from_db_config,
+                                                               std::cref(*cfg), std::cref(numa_groups))).get();
+            auto stop_compressor_factory = defer_verbose_shutdown("sstable_compressor_factory", [&sstable_compressor_factory] {
+                sstable_compressor_factory.stop().get();
            });

            checkpoint(stop_signal, "starting database");

            debug::the_database = &db;
            db.start(std::ref(*cfg), dbcfg, std::ref(mm_notifier), std::ref(feature_service), std::ref(token_metadata),
-                    std::ref(cm), std::ref(sstm), std::ref(langman), std::ref(sst_dir_semaphore), std::ref(*sstable_compressor_factory),
+                    std::ref(cm), std::ref(sstm), std::ref(langman), std::ref(sst_dir_semaphore), std::ref(sstable_compressor_factory),
                    std::ref(stop_signal.as_sharded_abort_source()), utils::cross_shard_barrier()).get();
            auto stop_database_and_sstables = defer_verbose_shutdown("database", [&db] {
                // #293 - do not stop anything - not even db (for real)
@@ -1717,7 +1719,7 @@ sharded<locator::shared_token_metadata> token_metadata;
                auto sstables_prefix = std::string_view("sstables/");
                if (name.starts_with(sstables_prefix)) {
                    auto table = table_id(utils::UUID(name.substr(sstables_prefix.size())));
-                    co_await sstable_compressor_factory->set_recommended_dict(table, std::move(dict.data));
+                    co_await sstable_compressor_factory.local().set_recommended_dict(table, std::move(dict.data));
                } else if (name == dictionary_service::rpc_compression_dict_name) {
                    co_await utils::announce_dict_to_shards(compressor_tracker, std::move(dict));
                }
@@ -1755,6 +1757,24 @@ sharded<locator::shared_token_metadata> token_metadata;

            utils::get_local_injector().inject("stop_after_starting_repair", [] { std::raise(SIGSTOP); });

+            debug::the_stream_manager = &stream_manager;
+            checkpoint(stop_signal, "starting streaming service");
+            stream_manager.start(std::ref(*cfg), std::ref(db), std::ref(view_builder), std::ref(messaging), std::ref(mm), std::ref(gossiper), maintenance_scheduling_group).get();
+            auto stop_stream_manager = defer_verbose_shutdown("stream manager", [&stream_manager] {
+                // FIXME -- keep the instances alive, just call .stop on them
+                stream_manager.invoke_on_all(&streaming::stream_manager::stop).get();
+            });
+
+            checkpoint(stop_signal, "starting streaming manager");
+            stream_manager.invoke_on_all([&stop_signal] (streaming::stream_manager& sm) {
+                return sm.start(stop_signal.as_local_abort_source());
+            }).get();
+
+            api::set_server_stream_manager(ctx, stream_manager).get();
+            auto stop_stream_manager_api = defer_verbose_shutdown("stream manager api", [&ctx] {
+                api::unset_server_stream_manager(ctx).get();
+            });
+
            checkpoint(stop_signal, "initializing storage service");
            debug::the_storage_service = &ss;
            ss.start(std::ref(stop_signal.as_sharded_abort_source()),
@@ -1921,24 +1941,6 @@ sharded<locator::shared_token_metadata> token_metadata;
                proxy.invoke_on_all(&service::storage_proxy::stop_remote).get();
            });

-            debug::the_stream_manager = &stream_manager;
-            checkpoint(stop_signal, "starting streaming service");
-            stream_manager.start(std::ref(*cfg), std::ref(db), std::ref(view_builder), std::ref(messaging), std::ref(mm), std::ref(gossiper), maintenance_scheduling_group).get();
-            auto stop_stream_manager = defer_verbose_shutdown("stream manager", [&stream_manager] {
-                // FIXME -- keep the instances alive, just call .stop on them
-                stream_manager.invoke_on_all(&streaming::stream_manager::stop).get();
-            });
-
-            checkpoint(stop_signal, "starting streaming manager");
-            stream_manager.invoke_on_all([&stop_signal] (streaming::stream_manager& sm) {
-                return sm.start(stop_signal.as_local_abort_source());
-            }).get();
-
-            api::set_server_stream_manager(ctx, stream_manager).get();
-            auto stop_stream_manager_api = defer_verbose_shutdown("stream manager api", [&ctx] {
-                api::unset_server_stream_manager(ctx).get();
-            });
-
            checkpoint(stop_signal, "starting hinted handoff manager");
            if (!hinted_handoff_enabled.is_disabled_for_all()) {
                hints_dir_initializer.ensure_rebalanced().get();
@@ -2025,22 +2027,23 @@ sharded<locator::shared_token_metadata> token_metadata;
                });
            };

-            checkpoint(stop_signal, "starting maintenance auth service");
-            auth::service_config maintenance_auth_config;
-            maintenance_auth_config.authorizer_java_name = sstring{auth::allow_all_authorizer_name};
-            maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name};
-            maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name};
-
-            maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client),  std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes).get();
-
-            cql_transport::controller cql_maintenance_server_ctl(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, maintenance_cql_sg_stats_key, maintenance_socket_enabled::yes, dbcfg.statement_scheduling_group);
-
+            std::optional<cql_transport::controller> cql_maintenance_server_ctl;
            std::any stop_maintenance_auth_service;
            std::any stop_maintenance_cql;

            if (cfg->maintenance_socket() != "ignore") {
+                checkpoint(stop_signal, "starting maintenance auth service");
+                auth::service_config maintenance_auth_config;
+                maintenance_auth_config.authorizer_java_name = sstring{auth::allow_all_authorizer_name};
+                maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name};
+                maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name};
+
+                maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client),  std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes).get();
+
+                cql_maintenance_server_ctl.emplace(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, maintenance_cql_sg_stats_key, maintenance_socket_enabled::yes, dbcfg.statement_scheduling_group);
+
                start_auth_service(maintenance_auth_service, stop_maintenance_auth_service, "maintenance auth service");
-                start_cql(cql_maintenance_server_ctl, stop_maintenance_cql, "maintenance native server");
+                start_cql(*cql_maintenance_server_ctl, stop_maintenance_cql, "maintenance native server");
            }

            checkpoint(stop_signal, "starting REST API");
--- a/mutation/frozen_mutation.hh
+++ b/mutation/frozen_mutation.hh
@@ -281,6 +281,22 @@ public:

 frozen_mutation_fragment freeze(const schema& s, const mutation_fragment& mf);

+class frozen_mutation_fragment_v2 {
+    bytes_ostream _bytes;
+public:
+    explicit frozen_mutation_fragment_v2(bytes_ostream bytes) : _bytes(std::move(bytes)) { }
+    const bytes_ostream& representation() const { return _bytes; }
+    bytes_ostream&& representation() && { return std::move(_bytes); }
+
+    mutation_fragment_v2 unfreeze(const schema& s, reader_permit permit);
+
+    future<> clear_gently() noexcept {
+        return _bytes.clear_gently();
+    }
+};
+
+frozen_mutation_fragment_v2 freeze(const schema& s, const mutation_fragment_v2& mf);
+
 template<FlattenedConsumerV2 Consumer>
 auto frozen_mutation::consume(schema_ptr s, frozen_mutation_consumer_adaptor<Consumer>& adaptor) const -> frozen_mutation_consume_result<decltype(adaptor.consumer().consume_end_of_stream())> {
    check_schema_version(schema_version(), *s);
--- a/mutation/mutation_compactor.hh
+++ b/mutation/mutation_compactor.hh
@@ -149,7 +149,8 @@ class compact_mutation_state {
    gc_clock::time_point _query_time;
    max_purgeable_fn _get_max_purgeable;
    can_gc_fn _can_gc;
-    api::timestamp_type _max_purgeable = api::missing_timestamp;
+    api::timestamp_type _max_purgeable_regular = api::missing_timestamp;
+    api::timestamp_type _max_purgeable_shadowable = api::missing_timestamp;
    std::optional<gc_clock::time_point> _gc_before;
    const query::partition_slice& _slice;
    uint64_t _row_limit{};
@@ -288,11 +289,12 @@ private:
        if (!t) {
            return false;
        }
-        if (_max_purgeable == api::missing_timestamp) {
-            _max_purgeable = _get_max_purgeable(*_dk, is_shadowable);
+        auto& max_purgeable = is_shadowable ? _max_purgeable_shadowable : _max_purgeable_regular;
+        if (max_purgeable == api::missing_timestamp) {
+            max_purgeable = _get_max_purgeable(*_dk, is_shadowable);
        }
-        auto ret = t.timestamp < _max_purgeable;
-        mclog.debug("can_gc: t={} is_shadowable={} max_purgeable={}: ret={}", t, is_shadowable, _max_purgeable, ret);
+        auto ret = t.timestamp < max_purgeable;
+        mclog.debug("can_gc: t={} is_shadowable={} max_purgeable={}: ret={}", t, is_shadowable, max_purgeable, ret);
        return ret;
    };

@@ -347,7 +349,8 @@ public:
        _static_row_live = false;
        _partition_tombstone = {};
        _current_partition_limit = std::min(_row_limit, _partition_row_limit);
-        _max_purgeable = api::missing_timestamp;
+        _max_purgeable_regular = api::missing_timestamp;
+        _max_purgeable_shadowable = api::missing_timestamp;
        _gc_before = std::nullopt;
        _last_static_row.reset();
        _last_pos = position_in_partition::for_partition_start();
--- a/mutation/mutation_partition.cc
+++ b/mutation/mutation_partition.cc
@@ -36,6 +36,11 @@
 logging::logger mclog("mutation_compactor");
 logging::logger mplog("mutation_partition");

+void on_bad_row_key(const schema& s, position_in_partition_view pos, const char* reason) {
+    on_internal_error(mplog, format("check_row_key(): attempted to use {} {} as row key for non-compact table {}.{}",
+            reason, pos, s.ks_name(), s.cf_name()));
+}
+
 mutation_partition::mutation_partition(const schema& s, const mutation_partition& x)
        : _tombstone(x._tombstone)
        , _static_row(s, column_kind::static_column, x._static_row)
@@ -524,6 +529,7 @@ mutation_partition::find_row(const schema& s, const clustering_key& key) const {
 deletable_row&
 mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
    check_schema(s);
+    check_row_key(s, key, is_dummy::no);
    auto i = _rows.find(key, rows_entry::tri_compare(s));
    if (i == _rows.end()) {
        auto e = alloc_strategy_unique_ptr<rows_entry>(
@@ -536,6 +542,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
 deletable_row&
 mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
    check_schema(s);
+    check_row_key(s, key, is_dummy::no);
    auto i = _rows.find(key, rows_entry::tri_compare(s));
    if (i == _rows.end()) {
        auto e = alloc_strategy_unique_ptr<rows_entry>(
@@ -548,6 +555,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
 deletable_row&
 mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
    check_schema(s);
+    check_row_key(s, key, is_dummy::no);
    auto i = _rows.find(key, rows_entry::tri_compare(s));
    if (i == _rows.end()) {
        auto e = alloc_strategy_unique_ptr<rows_entry>(
@@ -560,6 +568,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
 rows_entry&
 mutation_partition::clustered_rows_entry(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
    check_schema(s);
+    check_row_key(s, pos, dummy);
    auto i = _rows.find(pos, rows_entry::tri_compare(s));
    if (i == _rows.end()) {
        auto e = alloc_strategy_unique_ptr<rows_entry>(
@@ -577,6 +586,7 @@ mutation_partition::clustered_row(const schema& s, position_in_partition_view po
 deletable_row&
 mutation_partition::append_clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
    check_schema(s);
+    check_row_key(s, pos, dummy);
    const auto cmp = rows_entry::tri_compare(s);
    auto i = _rows.end();
    if (!_rows.empty() && (cmp(*std::prev(i), pos) >= 0)) {
--- a/mutation/mutation_partition.hh
+++ b/mutation/mutation_partition.hh
@@ -1165,6 +1165,29 @@ struct apply_resume {
    }
 };

+[[noreturn]] void on_bad_row_key(const schema& s, position_in_partition_view pos, const char* reason);
+
+inline void check_row_key(const schema& s, const clustering_key& key, is_dummy dummy) {
+    if (!dummy && !key.is_full(s) && !s.is_compact_table()) {
+        on_bad_row_key(s, position_in_partition_view::for_key(key), "non-full or empty prefix key");
+    }
+}
+
+inline void check_row_key(const schema& s, position_in_partition_view pos, is_dummy dummy) {
+    if (!pos.has_clustering_key()) {
+        on_bad_row_key(s, pos, "non-clustering position");
+    }
+    if (dummy) {
+        return;
+    }
+    if (pos.get_bound_weight() != bound_weight::equal) {
+        on_bad_row_key(s, pos, "non-key position");
+    }
+    if (!s.is_compact_table() && !pos.key().is_full(s)) {
+        on_bad_row_key(s, pos, "non-full or empty prefix key");
+    }
+}
+
 // Represents a set of writes made to a single partition.
 //
 // The object is schema-dependent. Each instance is governed by some
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				Perform `the procedure for enabling consistent topology changes <https://opensource.docs.scylladb.com/branch-6.0/upgrade/upgrade-opensource/upgrade-guide-from-5.4-to-6.0/enable-consistent-topology.html>`_.