test/auth_cluster: cover empty legacy table in service level upgrade

Add a cluster test that upgrades to raft topology with an empty legacy `system_distributed.service_levels` table and verifies that the migration still marks `service_level_version` as `2`.
service_levels: mark v2 migration complete on empty legacy table
2026-04-23 10:00:35 +00:00 · 2026-04-05 19:46:15 +03:00 · 2026-04-05 18:00:12 +03:00
86 changed files with 822 additions and 2213 deletions
--- a/.github/workflows/trigger-scylla-ci.yaml
+++ b/.github/workflows/trigger-scylla-ci.yaml
@@ -15,19 +15,13 @@ jobs:
      - name: Verify Org Membership
        id: verify_author
        env:
-          EVENT_NAME: ${{ github.event_name }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-          PR_ASSOCIATION: ${{ github.event.pull_request.author_association }}
-          COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
-          COMMENT_ASSOCIATION: ${{ github.event.comment.author_association }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        shell: bash
        run: |
-          if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
-            AUTHOR="$PR_AUTHOR"
-            ASSOCIATION="$PR_ASSOCIATION"
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            AUTHOR="${{ github.event.pull_request.user.login }}"
          else
-            AUTHOR="$COMMENT_AUTHOR"
-            ASSOCIATION="$COMMENT_ASSOCIATION"
+            AUTHOR="${{ github.event.comment.user.login }}"
          fi
          ORG="scylladb"
          if gh api "/orgs/${ORG}/members/${AUTHOR}" --silent 2>/dev/null; then
@@ -40,11 +34,13 @@ jobs:
      - name: Validate Comment Trigger
        if: github.event_name == 'issue_comment'
        id: verify_comment
-        env:
-          COMMENT_BODY: ${{ github.event.comment.body }}
        shell: bash
        run: |
-          CLEAN_BODY=$(echo "$COMMENT_BODY" | grep -v '^[[:space:]]*>')
+          BODY=$(cat << 'EOF'
+          ${{ github.event.comment.body }}
+          EOF
+          )
+          CLEAN_BODY=$(echo "$BODY" | grep -v '^[[:space:]]*>')

          if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
            echo "trigger=true" >> $GITHUB_OUTPUT
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2026.1.1
+VERSION=2026.1.0

 if test -f version
 then
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -48,7 +48,6 @@
 #include "mutation/mutation_fragment_stream_validator.hh"
 #include "utils/assert.hh"
 #include "utils/error_injection.hh"
-#include "utils/chunked_vector.hh"
 #include "utils/pretty_printers.hh"
 #include "readers/multi_range.hh"
 #include "readers/compacting.hh"
@@ -612,23 +611,23 @@ private:
    }

    // Called in a seastar thread
-    utils::chunked_vector<dht::partition_range>
+    dht::partition_range_vector
    get_ranges_for_invalidation(const std::vector<sstables::shared_sstable>& sstables) {
        // If owned ranges is disengaged, it means no cleanup work was done and
        // so nothing needs to be invalidated.
        if (!_owned_ranges) {
-            return {};
+            return dht::partition_range_vector{};
        }
-        auto owned_ranges = dht::to_partition_ranges_chunked(*_owned_ranges).get();
+        auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);

        auto non_owned_ranges = sstables
                | std::views::transform([] (const sstables::shared_sstable& sst) {
            seastar::thread::maybe_yield();
            return dht::partition_range::make({sst->get_first_decorated_key(), true},
                                              {sst->get_last_decorated_key(), true});
-        })      | std::ranges::to<utils::chunked_vector<dht::partition_range>>();
+        })      | std::ranges::to<dht::partition_range_vector>();

-        return dht::subtract_ranges(*_schema, std::move(non_owned_ranges), std::move(owned_ranges)).get();
+        return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
    }
 protected:
    compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker)
@@ -719,8 +718,8 @@ protected:

    compaction_completion_desc
    get_compaction_completion_desc(std::vector<sstables::shared_sstable> input_sstables, std::vector<sstables::shared_sstable> output_sstables) {
-        auto ranges = get_ranges_for_invalidation(input_sstables);
-        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges)};
+        auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
+        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
    }

    // Tombstone expiration is enabled based on the presence of sstable set.
--- a/compaction/compaction_descriptor.hh
+++ b/compaction/compaction_descriptor.hh
@@ -16,7 +16,6 @@
 #include "sstables/sstable_set.hh"
 #include "compaction_fwd.hh"
 #include "mutation_writer/token_group_based_splitting_writer.hh"
-#include "utils/chunked_vector.hh"

 namespace compaction {

@@ -39,7 +38,7 @@ struct compaction_completion_desc {
    // New, fresh SSTables that should be added to SSTable set, replacing the old ones.
    std::vector<sstables::shared_sstable> new_sstables;
    // Set of compacted partition ranges that should be invalidated in the cache.
-    utils::chunked_vector<dht::partition_range> ranges_for_cache_invalidation;
+    dht::partition_range_vector ranges_for_cache_invalidation;
 };

 // creates a new SSTable for a given shard
--- a/cql3/prepared_statements_cache.hh
+++ b/cql3/prepared_statements_cache.hh
@@ -105,7 +105,6 @@ public:
    static const std::chrono::minutes entry_expiry;

    using key_type = prepared_cache_key_type;
-    using pinned_value_type = cache_value_ptr;
    using value_type = checked_weak_ptr;
    using statement_is_too_big = typename cache_type::entry_is_too_big;

@@ -117,14 +116,9 @@ public:
        : _cache(size, entry_expiry, logger)
    {}

-    template <typename LoadFunc>
-    future<pinned_value_type> get_pinned(const key_type& key, LoadFunc&& load) {
-        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); });
-    }
-
    template <typename LoadFunc>
    future<value_type> get(const key_type& key, LoadFunc&& load) {
-        return get_pinned(key, std::forward<LoadFunc>(load)).then([] (cache_value_ptr v_ptr) {
+        return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); }).then([] (cache_value_ptr v_ptr) {
            return make_ready_future<value_type>((*v_ptr)->checked_weak_from_this());
        });
    }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -697,7 +697,7 @@ future<::shared_ptr<cql_transport::messages::result_message::prepared>>
 query_processor::prepare(sstring query_string, const service::client_state& client_state, cql3::dialect d) {
    try {
        auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
-        auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
+        auto prep_ptr = co_await _prepared_cache.get(key, [this, &query_string, &client_state, d] {
                auto prepared = get_statement(query_string, client_state, d);
                prepared->calculate_metadata_id();
                auto bound_terms = prepared->statement->get_bound_terms();
@@ -711,13 +711,13 @@ query_processor::prepare(sstring query_string, const service::client_state& clie
                return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
            });

-        co_await utils::get_local_injector().inject(
-                "query_processor_prepare_wait_after_cache_get",
-                utils::wait_for_message(std::chrono::seconds(60)));
-  
-        auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
+        const auto& warnings = prep_ptr->warnings;
+        const auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_ptr),
                    client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
-        co_return std::move(msg);
+        for (const auto& w : warnings) {
+            msg->add_warning(w);
+        }
+        co_return ::shared_ptr<cql_transport::messages::result_message::prepared>(std::move(msg));
    } catch(typename prepared_statements_cache::statement_is_too_big&) {
        throw prepared_statement_is_too_big(query_string);
    }
--- a/db/row_cache.cc
+++ b/db/row_cache.cc
@@ -29,7 +29,6 @@
 #include "utils/assert.hh"
 #include "utils/updateable_value.hh"
 #include "utils/labels.hh"
-#include "utils/chunked_vector.hh"

 namespace cache {

@@ -1216,10 +1215,10 @@ future<> row_cache::invalidate(external_updater eu, const dht::decorated_key& dk
 }

 future<> row_cache::invalidate(external_updater eu, const dht::partition_range& range, cache_invalidation_filter filter) {
-    return invalidate(std::move(eu), utils::chunked_vector<dht::partition_range>({range}), std::move(filter));
+    return invalidate(std::move(eu), dht::partition_range_vector({range}), std::move(filter));
 }

-future<> row_cache::invalidate(external_updater eu, utils::chunked_vector<dht::partition_range>&& ranges, cache_invalidation_filter filter) {
+future<> row_cache::invalidate(external_updater eu, dht::partition_range_vector&& ranges, cache_invalidation_filter filter) {
    return do_update(std::move(eu), [this, ranges = std::move(ranges), filter = std::move(filter)] mutable {
        return seastar::async([this, ranges = std::move(ranges), filter = std::move(filter)] {
            auto on_failure = defer([this] () noexcept {
--- a/db/row_cache.hh
+++ b/db/row_cache.hh
@@ -17,7 +17,6 @@
 #include "utils/histogram.hh"
 #include "mutation/partition_version.hh"
 #include "utils/double-decker.hh"
-#include "utils/chunked_vector.hh"
 #include "db/cache_tracker.hh"
 #include "readers/empty.hh"
 #include "readers/mutation_source.hh"
@@ -458,7 +457,7 @@ public:
    // mutation source made prior to the call to invalidate().
    future<> invalidate(external_updater, const dht::decorated_key&);
    future<> invalidate(external_updater, const dht::partition_range& = query::full_partition_range, cache_invalidation_filter filter = [] (const auto&) { return true; });
-    future<> invalidate(external_updater, utils::chunked_vector<dht::partition_range>&&, cache_invalidation_filter filter = [] (const auto&) { return true; });
+    future<> invalidate(external_updater, dht::partition_range_vector&&, cache_invalidation_filter filter = [] (const auto&) { return true; });

    // Evicts entries from cache.
    //
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -105,7 +105,7 @@ namespace {
        schema_builder::register_schema_initializer([](schema_builder& builder) {
            if (builder.ks_name() == schema_tables::NAME) {
                // all schema tables are group0 tables
-                builder.set_is_group0_table();
+                builder.set_is_group0_table(true);
            }
        });
 }
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -87,15 +87,31 @@ namespace {
        static const std::unordered_set<sstring> tables = {
            schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY,
            system_keyspace::BROADCAST_KV_STORE,
+            system_keyspace::CDC_GENERATIONS_V3,
            system_keyspace::RAFT,
            system_keyspace::RAFT_SNAPSHOTS,
            system_keyspace::RAFT_SNAPSHOT_CONFIG,
            system_keyspace::GROUP0_HISTORY,
            system_keyspace::DISCOVERY,
+            system_keyspace::TABLETS,
+            system_keyspace::TOPOLOGY,
+            system_keyspace::TOPOLOGY_REQUESTS,
            system_keyspace::LOCAL,
            system_keyspace::PEERS,
+            system_keyspace::SCYLLA_LOCAL,
            system_keyspace::COMMITLOG_CLEANUPS,
+            system_keyspace::SERVICE_LEVELS_V2,
+            system_keyspace::VIEW_BUILD_STATUS_V2,
+            system_keyspace::CDC_STREAMS_STATE,
+            system_keyspace::CDC_STREAMS_HISTORY,
+            system_keyspace::ROLES,
+            system_keyspace::ROLE_MEMBERS,
+            system_keyspace::ROLE_ATTRIBUTES,
+            system_keyspace::ROLE_PERMISSIONS,
            system_keyspace::CDC_LOCAL,
+            system_keyspace::DICTS,
+            system_keyspace::VIEW_BUILDING_TASKS,
+            system_keyspace::CLIENT_ROUTES,
        };
        if (builder.ks_name() == system_keyspace::NAME && tables.contains(builder.cf_name())) {
            builder.enable_schema_commitlog();
@@ -127,7 +143,7 @@ namespace {
                system_keyspace::REPAIR_TASKS,
            };
            if (builder.ks_name() == system_keyspace::NAME && tables.contains(builder.cf_name())) {
-                builder.set_is_group0_table();
+                builder.set_is_group0_table(true);
            }
        });
 }
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -930,7 +930,8 @@ bool view_updates::can_skip_view_updates(const clustering_or_static_row& update,
    const row& existing_row = existing.cells();
    const row& updated_row = update.cells();

-    return std::ranges::all_of(_base->regular_columns(), [this, &updated_row, &existing_row] (const column_definition& cdef) {
+    const bool base_has_nonexpiring_marker = update.marker().is_live() && !update.marker().is_expiring();
+    return std::ranges::all_of(_base->regular_columns(), [this, &updated_row, &existing_row, base_has_nonexpiring_marker] (const column_definition& cdef) {
        const auto view_it = _view->columns_by_name().find(cdef.name());
        const bool column_is_selected = view_it != _view->columns_by_name().end();

@@ -938,29 +939,49 @@ bool view_updates::can_skip_view_updates(const clustering_or_static_row& update,
        // as part of its PK, there are NO virtual columns corresponding to the unselected columns in the view.
        // Because of that, we don't generate view updates when the value in an unselected column is created
        // or changes.
-        if (!column_is_selected) {
+        if (!column_is_selected && _base_info.has_base_non_pk_columns_in_view_pk) {
            return true;
        }

-        // We cannot skip if the value was created or deleted
+        //TODO(sarna): Optimize collections case - currently they do not go under optimization
+        if (!cdef.is_atomic()) {
+            return false;
+        }
+
+        // We cannot skip if the value was created or deleted, unless we have a non-expiring marker
        const auto* existing_cell = existing_row.find_cell(cdef.id);
        const auto* updated_cell = updated_row.find_cell(cdef.id);
        if (existing_cell == nullptr || updated_cell == nullptr) {
-            return existing_cell == updated_cell;
+            return existing_cell == updated_cell || (!column_is_selected && base_has_nonexpiring_marker);
        }
-
-        if (!cdef.is_atomic()) {
-            return existing_cell->as_collection_mutation().data == updated_cell->as_collection_mutation().data;
-        }
-
        atomic_cell_view existing_cell_view = existing_cell->as_atomic_cell(cdef);
        atomic_cell_view updated_cell_view = updated_cell->as_atomic_cell(cdef);

        // We cannot skip when a selected column is changed
-        if (view_it->second->is_view_virtual()) {
-            return atomic_cells_liveness_equal(existing_cell_view, updated_cell_view);
+        if (column_is_selected) {
+            if (view_it->second->is_view_virtual()) {
+                return atomic_cells_liveness_equal(existing_cell_view, updated_cell_view);
+            }
+            return compare_atomic_cell_for_merge(existing_cell_view, updated_cell_view) == 0;
        }
-        return compare_atomic_cell_for_merge(existing_cell_view, updated_cell_view) == 0;
+
+        // With non-expiring row marker, liveness checks below are not relevant
+        if (base_has_nonexpiring_marker) {
+            return true;
+        }
+
+        if (existing_cell_view.is_live() != updated_cell_view.is_live()) {
+            return false;
+        }
+
+        // We cannot skip if the change updates TTL
+        const bool existing_has_ttl = existing_cell_view.is_live_and_has_ttl();
+        const bool updated_has_ttl = updated_cell_view.is_live_and_has_ttl();
+        if (existing_has_ttl || updated_has_ttl) {
+            return existing_has_ttl == updated_has_ttl && existing_cell_view.expiry() == updated_cell_view.expiry();
+        }
+
+        return true;
    });
 }

@@ -1728,7 +1749,7 @@ static endpoints_to_update get_view_natural_endpoint_vnodes(
        std::vector<std::reference_wrapper<const locator::node>> base_nodes,
        std::vector<std::reference_wrapper<const locator::node>> view_nodes,
        locator::endpoint_dc_rack my_location,
-        const bool network_topology,
+        const locator::network_topology_strategy* network_topology,
        replica::cf_stats& cf_stats) {
    using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
    node_vector base_endpoints, view_endpoints;
@@ -1881,7 +1902,7 @@ endpoints_to_update get_view_natural_endpoint(
        locator::host_id me,
        const locator::effective_replication_map_ptr& base_erm,
        const locator::effective_replication_map_ptr& view_erm,
-        const bool network_topology,
+        const locator::abstract_replication_strategy& replication_strategy,
        const dht::token& base_token,
        const dht::token& view_token,
        bool use_tablets,
@@ -1889,6 +1910,7 @@ endpoints_to_update get_view_natural_endpoint(
    auto& topology = base_erm->get_token_metadata_ptr()->get_topology();
    auto& view_topology = view_erm->get_token_metadata_ptr()->get_topology();
    auto& my_location = topology.get_location(me);
+    auto* network_topology = dynamic_cast<const locator::network_topology_strategy*>(&replication_strategy);

    auto resolve = [&] (const locator::topology& topology, const locator::host_id& ep, bool is_view) -> const locator::node& {
        if (auto* np = topology.find_node(ep)) {
@@ -1922,7 +1944,7 @@ endpoints_to_update get_view_natural_endpoint(
                // view pairing as the leaving base replica.
                // note that the recursive call will not recurse again because leaving_base is in base_nodes.
                auto leaving_base = it->get().host_id();
-                return get_view_natural_endpoint(leaving_base, base_erm, view_erm, network_topology, base_token,
+                return get_view_natural_endpoint(leaving_base, base_erm, view_erm, replication_strategy, base_token,
                        view_token, use_tablets, cf_stats);
            }
        }
@@ -2018,9 +2040,7 @@ future<> view_update_generator::mutate_MV(
        wait_for_all_updates wait_for_all)
 {
    auto& ks = _db.find_keyspace(base->ks_name());
-    const bool uses_tablets = ks.uses_tablets();
-    const bool uses_nts = dynamic_cast<const locator::network_topology_strategy*>(&ks.get_replication_strategy()) != nullptr;
-    // The object pointed by `ks` may disappear after preeemption. It should not be touched again after this comment.
+    auto& replication = ks.get_replication_strategy();
    std::unordered_map<table_id, locator::effective_replication_map_ptr> erms;
    auto get_erm = [&] (table_id id) {
        auto it = erms.find(id);
@@ -2039,8 +2059,8 @@ future<> view_update_generator::mutate_MV(
    co_await max_concurrent_for_each(view_updates, max_concurrent_updates, [&] (frozen_mutation_and_schema mut) mutable -> future<> {
        auto view_token = dht::get_token(*mut.s, mut.fm.key());
        auto view_ermp = erms.at(mut.s->id());
-        auto [target_endpoint, no_pairing_endpoint] = get_view_natural_endpoint(me, base_ermp, view_ermp, uses_nts, base_token, view_token,
-                uses_tablets, cf_stats);
+        auto [target_endpoint, no_pairing_endpoint] = get_view_natural_endpoint(me, base_ermp, view_ermp, replication, base_token, view_token,
+                ks.uses_tablets(), cf_stats);
        auto remote_endpoints = view_ermp->get_pending_replicas(view_token);
        auto memory_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_update_memory_units.split(memory_usage_of(mut)));
        if (no_pairing_endpoint) {
--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -303,7 +303,7 @@ endpoints_to_update get_view_natural_endpoint(
    locator::host_id node,
    const locator::effective_replication_map_ptr& base_erm,
    const locator::effective_replication_map_ptr& view_erm,
-    const bool network_topology,
+    const locator::abstract_replication_strategy& replication_strategy,
    const dht::token& base_token,
    const dht::token& view_token,
    bool use_tablets,
--- a/dht/i_partitioner.cc
+++ b/dht/i_partitioner.cc
@@ -352,16 +352,6 @@ dht::partition_range_vector to_partition_ranges(const dht::token_range_vector& r
    return prs;
 }

-future<utils::chunked_vector<dht::partition_range>> to_partition_ranges_chunked(const dht::token_range_vector& ranges) {
-    utils::chunked_vector<dht::partition_range> prs;
-    prs.reserve(ranges.size());
-    for (auto& range : ranges) {
-        prs.push_back(dht::to_partition_range(range));
-        co_await coroutine::maybe_yield();
-    }
-    co_return prs;
-}
-
 std::map<unsigned, dht::partition_range_vector>
 split_range_to_shards(dht::partition_range pr, const schema& s, const sharder& raw_sharder) {
    std::map<unsigned, dht::partition_range_vector> ret;
@@ -374,11 +364,11 @@ split_range_to_shards(dht::partition_range pr, const schema& s, const sharder& r
    return ret;
 }

-future<utils::chunked_vector<dht::partition_range>> subtract_ranges(const schema& schema, utils::chunked_vector<dht::partition_range> source_ranges, utils::chunked_vector<dht::partition_range> ranges_to_subtract) {
+future<dht::partition_range_vector> subtract_ranges(const schema& schema, const dht::partition_range_vector& source_ranges, dht::partition_range_vector ranges_to_subtract) {
    auto cmp = dht::ring_position_comparator(schema);
    // optimize set of potentially overlapping ranges by deoverlapping them.
-    auto ranges = dht::partition_range::deoverlap(std::move(source_ranges), cmp);
-    utils::chunked_vector<dht::partition_range> res;
+    auto ranges = dht::partition_range::deoverlap(source_ranges, cmp);
+    dht::partition_range_vector res;
    res.reserve(ranges.size() * 2);

    auto range = ranges.begin();
--- a/dht/i_partitioner.hh
+++ b/dht/i_partitioner.hh
@@ -91,7 +91,6 @@ inline token get_token(const schema& s, partition_key_view key) {

 dht::partition_range to_partition_range(dht::token_range);
 dht::partition_range_vector to_partition_ranges(const dht::token_range_vector& ranges, utils::can_yield can_yield = utils::can_yield::no);
-future<utils::chunked_vector<dht::partition_range>> to_partition_ranges_chunked(const dht::token_range_vector& ranges);

 // Each shard gets a sorted, disjoint vector of ranges
 std::map<unsigned, dht::partition_range_vector>
@@ -106,7 +105,7 @@ std::unique_ptr<dht::i_partitioner> make_partitioner(sstring name);
 // Returns a sorted and deoverlapped list of ranges that are
 // the result of subtracting all ranges from ranges_to_subtract.
 // ranges_to_subtract must be sorted and deoverlapped.
-future<utils::chunked_vector<dht::partition_range>> subtract_ranges(const schema& schema, utils::chunked_vector<dht::partition_range> ranges, utils::chunked_vector<dht::partition_range> ranges_to_subtract);
+future<dht::partition_range_vector> subtract_ranges(const schema& schema, const dht::partition_range_vector& ranges, dht::partition_range_vector ranges_to_subtract);

 // Returns a token_range vector split based on the given number of most-significant bits
 dht::token_range_vector split_token_range_msb(unsigned most_significant_bits);
--- a/dht/token.hh
+++ b/dht/token.hh
@@ -30,31 +30,6 @@ enum class token_kind {
    after_all_keys,
 };

-// Represents a token for partition keys.
-// Has a disengaged state, which sorts before all engaged states.
-struct raw_token {
-    int64_t value;
-
-    /// Constructs a disengaged token.
-    raw_token() : value(std::numeric_limits<int64_t>::min()) {}
-
-    /// Constructs an engaged token.
-    /// The token must be of token_kind::key kind.
-    explicit raw_token(const token&);
-
-    explicit raw_token(int64_t v) : value(v) {};
-
-    std::strong_ordering operator<=>(const raw_token& o) const noexcept = default;
-    std::strong_ordering operator<=>(const token& o) const noexcept;
-
-    /// Returns true iff engaged.
-    explicit operator bool() const noexcept {
-        return value != std::numeric_limits<int64_t>::min();
-    }
-};
-
-using raw_token_opt = seastar::optimized_optional<raw_token>;
-
 class token {
    // INT64_MIN is not a legal token, but a special value used to represent
    // infinity in token intervals.
@@ -77,10 +52,6 @@ public:

    constexpr explicit token(int64_t d) noexcept : token(kind::key, normalize(d)) {}

-    token(raw_token raw) noexcept
-        : token(raw ? kind::key : kind::before_all_keys, raw.value)
-    { }
-
    // This constructor seems redundant with the bytes_view constructor, but
    // it's necessary for IDL, which passes a deserialized_bytes_proxy here.
    // (deserialized_bytes_proxy is convertible to bytes&&, but not bytes_view.)
@@ -252,29 +223,6 @@ public:
    }
 };

-inline
-raw_token::raw_token(const token& t)
-    : value(t.raw())
-{
-#ifdef DEBUG
-    assert(t._kind == token::kind::key);
-#endif
-}
-
-inline
-std::strong_ordering raw_token::operator<=>(const token& o) const noexcept {
-    switch (o._kind) {
-        case token::kind::after_all_keys:
-            return std::strong_ordering::less;
-        case token::kind::before_all_keys:
-            // before_all_keys has a raw value set to the same raw value as a disengaged raw_token, and sorts before all keys.
-            // So we can order them by just comparing raw values.
-            [[fallthrough]];
-        case token::kind::key:
-            return value <=> o._data;
-    }
-}
-
 inline constexpr std::strong_ordering tri_compare_raw(const int64_t l1, const int64_t l2) noexcept {
    if (l1 == l2) {
        return std::strong_ordering::equal;
@@ -381,17 +329,6 @@ struct fmt::formatter<dht::token> : fmt::formatter<string_view> {
    }
 };

-template <>
-struct fmt::formatter<dht::raw_token> : fmt::formatter<string_view> {
-    template <typename FormatContext>
-    auto format(const dht::raw_token& t, FormatContext& ctx) const {
-        if (!t) {
-            return fmt::format_to(ctx.out(), "null");
-        }
-        return fmt::format_to(ctx.out(), "{}", t.value);
-    }
-};
-
 namespace std {

 template<>
--- a/docs/cql/dml/select.rst
+++ b/docs/cql/dml/select.rst
@@ -281,8 +281,8 @@ For example::
      ORDER BY embedding ANN OF [0.1, 0.2, 0.3, 0.4] LIMIT 5;


-Vector queries also support filtering with ``WHERE`` clauses on columns that are part of the primary key.
-See :ref:`WHERE <where-clause>`.
+Vector queries also support filtering with ``WHERE`` clauses on columns that are part of the primary key
+or columns provided in a definition of the index.

 For example::

@@ -290,6 +290,10 @@ For example::
      WHERE user_id = 'user123'
      ORDER BY embedding ANN OF [0.1, 0.2, 0.3, 0.4] LIMIT 5;

+The supported operations are equal relations (``=`` and ``IN``) with restrictions as in regular ``WHERE`` clauses. See :ref:`WHERE <where-clause>`.
+
+Other filtering scenarios are currently not supported.
+
 .. note::

   Vector indexes are supported in ScyllaDB Cloud only in clusters that have the Vector Search feature enabled.
--- a/docs/getting-started/install-scylla/install-on-linux.rst
+++ b/docs/getting-started/install-scylla/install-on-linux.rst
@@ -52,7 +52,7 @@ Install ScyllaDB
            .. code-block:: console
               :substitutions:
    
-               sudo wget -O /etc/apt/sources.list.d/scylla.list https://downloads.scylladb.com/deb/debian/|UBUNTU_SCYLLADB_LIST|
+               sudo wget -O /etc/apt/sources.list.d/scylla.list http://downloads.scylladb.com/deb/debian/|UBUNTU_SCYLLADB_LIST|


        #. Install ScyllaDB packages.
@@ -125,7 +125,7 @@ Install ScyllaDB
            .. code-block:: console
               :substitutions:
    
-               sudo curl -o /etc/yum.repos.d/scylla.repo -L https://downloads.scylladb.com/rpm/centos/|CENTOS_SCYLLADB_REPO|
+               sudo curl -o /etc/yum.repos.d/scylla.repo -L http://downloads.scylladb.com/rpm/centos/|CENTOS_SCYLLADB_REPO|

        #. Install ScyllaDB packages.

@@ -133,19 +133,19 @@ Install ScyllaDB
    
               sudo yum install scylla

-            Running the command installs the latest official version of ScyllaDB.
-            Alternatively, you can install a specific patch version:
+            Running the command installs the latest official version of ScyllaDB Open Source.
+            Alternatively, you can to install a specific patch version:

            .. code-block:: console
    
               sudo yum install scylla-<your patch version>

-            Example: The following example shows installing ScyllaDB 2025.3.1.
+            Example: The following example shows the command to install ScyllaDB 5.2.3.

            .. code-block:: console
               :class: hide-copy-button
    
-               sudo yum install scylla-2025.3.1
+               sudo yum install scylla-5.2.3

 .. include:: /getting-started/_common/setup-after-install.rst

--- a/docs/getting-started/installation-common/scylla-web-installer.rst
+++ b/docs/getting-started/installation-common/scylla-web-installer.rst
@@ -36,8 +36,11 @@ release versions, run:
  curl -sSf get.scylladb.com/server | sudo bash -s -- --list-active-releases


-To install a non-default version, run the command with the ``--scylla-version``
-option to specify the version you want to install.
+Versions 2025.1 and Later
+==============================
+
+Run the command with the ``--scylla-version`` option to specify the version
+you want to install.

 **Example**

@@ -47,4 +50,20 @@ option to specify the version you want to install.
  curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-version |CURRENT_VERSION|


+Versions Earlier than 2025.1
+================================
+
+To install a supported version of *ScyllaDB Enterprise*, run the command with:
+
+* ``--scylla-product scylla-enterprise`` to specify that you want to install
+  ScyllaDB Entrprise.
+* ``--scylla-version`` to specify the version you want to install.
+
+For example:
+
+.. code:: console
+  
+  curl -sSf get.scylladb.com/server | sudo bash -s -- --scylla-product scylla-enterprise --scylla-version 2024.1
+
+
 .. include:: /getting-started/_common/setup-after-install.rst
--- a/docs/operating-scylla/procedures/cluster-management/cluster-platform-migration.rst
+++ b/docs/operating-scylla/procedures/cluster-management/cluster-platform-migration.rst
@@ -1,492 +0,0 @@
-=================================================
-Cluster Platform Migration Using Node Cycling
-=================================================
-
-This procedure describes how to migrate a ScyllaDB cluster to new instance types
-using the add-and-replace approach, which is commonly used for:
-
-* Migrating from one CPU architecture to another (e.g., x86_64 to ARM/Graviton)
-* Upgrading to newer instance types with better performance
-* Changing instance families within the same cloud provider
-
-The add-and-replace approach maintains data replication throughout the migration
-and ensures zero downtime for client applications.
-
-.. note::
-
-   This procedure does **not** change the ScyllaDB software version. All nodes
-   (both existing and new) must run the same ScyllaDB version. For software
-   version upgrades, see :doc:`Upgrade </upgrade/index>`.
-
-Overview
--------
-
-The add-and-replace migration follows these steps:
-
-#. Add new nodes (on target instance type) to the existing cluster
-#. Wait for data to stream to the new nodes
-#. Decommission old nodes (on source instance type)
-
-This approach keeps the cluster operational throughout the migration while
-maintaining the configured replication factor.
-
-Key characteristics
-===================
-
-* **Zero downtime**: Client applications continue to operate during migration
-* **Data safety**: Replication factor is maintained throughout the process
-* **Flexible**: Works with both vnodes and tablets-enabled clusters
-* **Multi-DC support**: Can migrate nodes across multiple datacenters
-
-.. warning::
-
-   Ensure your cluster has sufficient capacity during the migration. At the peak
-   of the process, your cluster will temporarily have double the number of nodes.
-
-Prerequisites
-------------
-
-Check cluster health
-====================
-
-Before starting the migration, verify that your cluster is healthy:
-
-#. Check that all nodes are in Up Normal (UN) status:
-
-   .. code-block:: shell
-
-      nodetool status
-
-   All nodes should show ``UN`` status. Do not proceed if any nodes are down.
-
-#. Ensure no streaming or repair operations are in progress:
-
-   .. code-block:: shell
-
-      nodetool netstats
-      nodetool compactionstats
-
-Plan the migration
-==================
-
-Before provisioning new instances, plan the following:
-
-**Instance type mapping**: Identify the source and target instance types.
-If your cluster uses vnodes (not tablets), consider that mismatched shard
-counts between source and target instance types can cause slower repairs.
-With tablets enabled, shard count mismatch is fully supported.
-
-**Rack assignment planning**: Each new node must be assigned to the same rack
-as the node it will replace. This maintains rack-aware topology for:
-
-* Rack-aware replication (NetworkTopologyStrategy)
-* Proper data distribution across failure domains
-* Minimizing data movement during decommission
-
-Example mapping for a 3-node cluster:
-
-.. code-block:: none
-
-   Source nodes (to be decommissioned):     Target nodes (to be added):
-   192.168.1.10 - RACK0                 →   192.168.2.10 - RACK0
-   192.168.1.11 - RACK1                 →   192.168.2.11 - RACK1
-   192.168.1.12 - RACK2                 →   192.168.2.12 - RACK2
-
-Create a backup
-===============
-
-Back up the data before starting the migration. One of the following
-methods can be used:
-
-* **ScyllaDB Manager** (recommended): Use ScyllaDB Manager to perform a
-  cluster-wide backup. See the
-  `ScyllaDB Manager documentation <https://manager.docs.scylladb.com/stable/backup/>`_
-  for details.
-
-* **Snapshots**: On each node in the cluster, create a snapshot:
-
-  .. code-block:: shell
-
-     nodetool snapshot -t pre_migration_backup
-     nodetool listsnapshots
-
-  .. note::
-
-     Snapshots are local to each node and do not protect against node or disk
-     failure. For full disaster recovery, use ScyllaDB Manager backup.
-
-
-Procedure
---------
-
-Adding new nodes
-================
-
-#. Provision new instances with the target instance type. Ensure:
-
-   * The same ScyllaDB version as existing nodes
-   * Same network configuration and security groups
-   * Appropriate storage configuration
-
-#. On each new node, configure ``/etc/scylla/scylla.yaml`` to join the existing
-   cluster:
-
-   * **cluster_name**: Must match the existing cluster name
-   * **seeds**: IP address of an existing node in the cluster (used to discover cluster topology on join)
-   * **endpoint_snitch**: Must match the existing cluster configuration
-   * **listen_address**: IP address of the new node
-   * **rpc_address**: IP address of the new node
-
-   All other cluster-wide settings (tablets configuration, encryption settings,
-   experimental features, etc.) must match the existing nodes.
-
-   .. caution::
-
-      Make sure that the ScyllaDB version on the new node is identical to the
-      version on the other nodes in the cluster. Running nodes with different
-      versions is not supported.
-
-#. If using ``GossipingPropertyFileSnitch``, configure
-   ``/etc/scylla/cassandra-rackdc.properties`` with the correct datacenter
-   and rack assignment for this node:
-
-   .. code-block:: none
-
-      dc = <datacenter-name>
-      rack = <rack-name>
-      prefer_local = true
-
-   .. warning::
-
-      Each node must have the correct rack assignment. Using the same rack for
-      all new nodes breaks rack-aware replication topology.
-
-#. Start ScyllaDB on the new node:
-
-   .. code-block:: shell
-
-      sudo systemctl start scylla-server
-
-   For Docker deployments:
-
-   .. code-block:: shell
-
-      docker exec -it <container-name> supervisorctl start scylla
-
-#. Monitor the bootstrap process from an existing node:
-
-   .. code-block:: shell
-
-      nodetool status
-
-   The new node will appear with ``UJ`` (Up, Joining) status while streaming
-   data from existing nodes. Wait until it transitions to ``UN`` (Up, Normal).
-
-   **Example output during bootstrap:**
-
-   .. code-block:: shell
-
-      Datacenter: dc1
-      Status=Up/Down
-      State=Normal/Leaving/Joining/Moving
-      --  Address        Load       Tokens  Owns   Host ID                               Rack
-      UN  192.168.1.10   500 MB     256     33.3%  8d5ed9f4-7764-4dbd-bad8-43fddce94b7c  RACK0
-      UN  192.168.1.11   500 MB     256     33.3%  125ed9f4-7777-1dbn-mac8-43fddce9123e  RACK1
-      UN  192.168.1.12   500 MB     256     33.3%  675ed9f4-6564-6dbd-can8-43fddce952gy  RACK2
-      UJ  192.168.2.10   250 MB     256     ?      a1b2c3d4-5678-90ab-cdef-112233445566  RACK0
-
-   **Example output after bootstrap completes:**
-
-   .. code-block:: shell
-
-      Datacenter: dc1
-      Status=Up/Down
-      State=Normal/Leaving/Joining/Moving
-      --  Address        Load       Tokens  Owns   Host ID                               Rack
-      UN  192.168.1.10   400 MB     256     25.0%  8d5ed9f4-7764-4dbd-bad8-43fddce94b7c  RACK0
-      UN  192.168.1.11   400 MB     256     25.0%  125ed9f4-7777-1dbn-mac8-43fddce9123e  RACK1
-      UN  192.168.1.12   400 MB     256     25.0%  675ed9f4-6564-6dbd-can8-43fddce952gy  RACK2
-      UN  192.168.2.10   400 MB     256     25.0%  a1b2c3d4-5678-90ab-cdef-112233445566  RACK0
-
-#. For tablets-enabled clusters, wait for tablet load balancing to complete.
-   After the node reaches ``UN`` status, verify no streaming is in progress:
-
-   .. code-block:: shell
-
-      nodetool netstats
-
-   Wait until output shows "Not sending any streams" and no active receiving streams.
-
-#. Repeat steps 1-6 for each new node to be added.
-
-.. note::
-
-   You can add multiple nodes in parallel if they are in different datacenters.
-   Within a single datacenter, add nodes one at a time for best results.
-
-
-Updating seed node configuration
-================================
-
-If any of your original nodes are configured as seed nodes, you must update
-the seed configuration before decommissioning them.
-
-#. Check the current seed configuration on any node:
-
-   .. code-block:: shell
-
-      grep -A 4 "seed_provider" /etc/scylla/scylla.yaml
-
-#. If the seeds include nodes you plan to decommission, update ``scylla.yaml``
-   on **all new nodes** to use the new node IPs as seeds:
-
-   .. code-block:: yaml
-
-      seed_provider:
-        - class_name: org.apache.cassandra.locator.SimpleSeedProvider
-          parameters:
-            - seeds: "192.168.2.10,192.168.2.11,192.168.2.12"
-
-   .. note::
-
-      Updating seed configuration on the **old nodes** (that will be
-      decommissioned) is optional. Seeds are only used during node startup
-      to discover the cluster. If you don't plan to restart the old nodes
-      before decommissioning them, their seed configuration doesn't matter.
-      However, updating all nodes is recommended for safety in case an old
-      node unexpectedly restarts during the migration.
-
-#. Restart ScyllaDB on each new node (one at a time) to apply the new seed
-   configuration:
-
-   .. code-block:: shell
-
-      sudo systemctl restart scylla-server
-
-   Wait for the node to fully start before restarting the next node.
-
-#. After restarting the new nodes, verify the cluster is healthy:
-
-   .. code-block:: shell
-
-      nodetool status
-      nodetool describecluster
-
-.. warning::
-
-   Complete this seed list update on **all new nodes** before decommissioning
-   any old nodes. This ensures the new nodes can reform the cluster after
-   the old nodes are removed.
-
-
-Decommissioning old nodes
-=========================
-
-After all new nodes are added and healthy, decommission the old nodes one
-at a time.
-
-#. Verify all nodes are healthy before starting decommission:
-
-   .. code-block:: shell
-
-      nodetool status
-
-   All nodes should show ``UN`` status.
-
-#. On the node to be decommissioned, run:
-
-   .. code-block:: shell
-
-      nodetool decommission
-
-   This command blocks until the decommission is complete. The node will
-   stream its data to the remaining nodes.
-
-#. Monitor the decommission progress from another node:
-
-   .. code-block:: shell
-
-      nodetool status
-
-   The decommissioning node will transition from ``UN`` → ``UL`` (Up, Leaving)
-   → removed from the cluster.
-
-   You can also monitor streaming progress:
-
-   .. code-block:: shell
-
-      nodetool netstats
-
-#. After decommission completes, verify the node is no longer in the cluster:
-
-   .. code-block:: shell
-
-      nodetool status
-
-   The decommissioned node should no longer appear in the output.
-
-#. Run ``nodetool cleanup`` on the remaining nodes to remove data that
-   no longer belongs to them after the topology change:
-
-   .. code-block:: shell
-
-      nodetool cleanup
-
-   .. note::
-
-      ``nodetool cleanup`` can be resource-intensive. Run it on one node at a
-      time during low-traffic periods.
-
-#. Wait for the cluster to stabilize before decommissioning the next node.
-   Ensure no streaming operations are in progress.
-
-#. Repeat steps 1-7 for each old node to be decommissioned.
-
-
-Post-migration verification
---------------------------
-
-After all old nodes are decommissioned, verify the migration was successful.
-
-Verify cluster topology
-=======================
-
-.. code-block:: shell
-
-   nodetool status
-
-Confirm:
-
-* All nodes show ``UN`` (Up, Normal) status
-* Only the new instance type nodes are present
-* Nodes are balanced across racks
-
-Verify schema agreement
-=======================
-
-.. code-block:: shell
-
-   nodetool describecluster
-
-All nodes should report the same schema version.
-
-Verify data connectivity
-========================
-
-Connect to the cluster and run a test query:
-
-.. code-block:: shell
-
-   cqlsh <node-ip> -e "SELECT count(*) FROM system_schema.keyspaces;"
-
-.. note::
-
-   If ScyllaDB is configured with ``listen_interface``, you must use the
-   node's interface IP address (not localhost) for cqlsh connections.
-
-Verify ScyllaDB version
-=======================
-
-Confirm all nodes are running the same ScyllaDB version:
-
-.. code-block:: shell
-
-   scylla --version
-
-Verify data integrity (optional)
-================================
-
-Run data validation on each keyspace to verify sstable integrity:
-
-.. code-block:: shell
-
-   nodetool scrub --mode=VALIDATE <keyspace_name>
-
-Rollback
--------
-
-If issues occur during the migration, you can roll back by reversing the
-procedure.
-
-During add phase
-================
-
-If a new node fails to bootstrap:
-
-#. Stop ScyllaDB on the new node:
-
-   .. code-block:: shell
-
-      sudo systemctl stop scylla-server
-
-#. From an existing node, remove the failed node:
-
-   .. code-block:: shell
-
-      nodetool removenode <host-id-of-failed-node>
-
-During decommission phase
-=========================
-
-If a decommission operation gets stuck:
-
-#. If the node is still reachable, try stopping and restarting ScyllaDB
-#. If the node is unresponsive, from another node:
-
-   .. code-block:: shell
-
-      nodetool removenode <host-id>
-
-   See :doc:`Remove a Node from a ScyllaDB Cluster </operating-scylla/procedures/cluster-management/remove-node>`
-   for more details.
-
-Full rollback
-=============
-
-To roll back after the migration is complete (all nodes on new instance type),
-apply the same add-and-replace procedure in reverse:
-
-#. Add new nodes on the original instance type
-#. Wait for data streaming to complete
-#. Decommission the nodes on the new instance type
-
-
-Troubleshooting
---------------
-
-Node stuck in Joining (UJ) state
-================================
-
-If a new node remains in ``UJ`` state for an extended period:
-
-* Check ScyllaDB logs for streaming errors: ``journalctl -u scylla-server``
-* Verify network connectivity between nodes
-* Ensure sufficient disk space on all nodes
-* Check for any ongoing operations that may be blocking
-
-Decommission taking too long
-============================
-
-Decommission duration depends on data size. If it appears stuck:
-
-* Check streaming progress: ``nodetool netstats``
-* Look for errors in ScyllaDB logs
-* Verify network bandwidth between nodes
-
-Schema disagreement
-===================
-
-If nodes report different schema versions:
-
-* Wait a few minutes for schema to propagate
-* If disagreement persists, restart the nodes one by one
-* Run ``nodetool describecluster`` to verify agreement
-
-
-Additional resources
--------------------
-
-* :doc:`Adding a New Node Into an Existing ScyllaDB Cluster </operating-scylla/procedures/cluster-management/add-node-to-cluster>`
-* :doc:`Remove a Node from a ScyllaDB Cluster </operating-scylla/procedures/cluster-management/remove-node>`
-* :doc:`Replace a Running Node in a ScyllaDB Cluster </operating-scylla/procedures/cluster-management/replace-running-node>`
-* :doc:`Upgrade </upgrade/index>`
--- a/docs/operating-scylla/procedures/cluster-management/index.rst
+++ b/docs/operating-scylla/procedures/cluster-management/index.rst
@@ -26,7 +26,6 @@ Cluster Management Procedures
   Safely Restart Your Cluster <safe-start>
   repair-based-node-operation
   Prevent Quorum Loss in Symmetrical Multi-DC Clusters <arbiter-dc>
-   Cluster Platform Migration <cluster-platform-migration>


 .. panel-box::
@@ -86,8 +85,6 @@ Cluster Management Procedures

  * :doc:`Preventing Quorum Loss in Symmetrical Multi-DC Clusters <arbiter-dc>`

-  * :doc:`Cluster Platform Migration Using Node Cycling </operating-scylla/procedures/cluster-management/cluster-platform-migration>`
-
 .. panel-box::
  :title: Topology Changes
  :id: "getting-started"
--- a/docs/operating-scylla/procedures/config-change/advanced-internode-compression.rst
+++ b/docs/operating-scylla/procedures/config-change/advanced-internode-compression.rst
@@ -57,11 +57,12 @@ To enable shared dictionaries:
    internode_compression_enable_advanced: true
    rpc_dict_training_when: when_leader

-.. note::
+.. warning:: Enabling shared dictionary training might leak unencrypted data to disk.

-   Some dictionary training data may be encrypted using storage-level encryption
-   (if enabled) instead of database-level encryption, meaning protection is
-   applied at the storage layer rather than within the database itself.
+             Trained dictionaries contain randomly chosen samples of data transferred between
+             nodes. The data samples are persisted in the Raft log, which is not encrypted.
+             As a result, some data from otherwise encrypted tables might be stored on disk
+             unencrypted.


 Reference
--- a/locator/everywhere_replication_strategy.cc
+++ b/locator/everywhere_replication_strategy.cc
@@ -42,14 +42,7 @@ void everywhere_replication_strategy::validate_options(const gms::feature_servic

 sstring everywhere_replication_strategy::sanity_check_read_replicas(const effective_replication_map& erm, const host_id_vector_replica_set& read_replicas) const {
    const auto replication_factor = erm.get_replication_factor();
-    if (const auto& topo_info = erm.get_token_metadata().get_topology_change_info(); topo_info && topo_info->read_new) {
-        if (read_replicas.size() > replication_factor + 1) {
-            return seastar::format(
-                    "everywhere_replication_strategy: the number of replicas for everywhere_replication_strategy is {}, "
-                    "cannot be higher than replication factor {} + 1 during the 'read from new replicas' stage of a topology change",
-                    read_replicas.size(), replication_factor);
-        }
-    } else if (read_replicas.size() > replication_factor) {
+    if (read_replicas.size() > replication_factor) {
        return seastar::format("everywhere_replication_strategy: the number of replicas for everywhere_replication_strategy is {}, cannot be higher than replication factor {}", read_replicas.size(), replication_factor);
    }
    return {};
--- a/mutation/collection_mutation.cc
+++ b/mutation/collection_mutation.cc
@@ -261,7 +261,7 @@ static collection_mutation serialize_collection_mutation(

        writev(v.serialize());
    }
-    return collection_mutation(type, std::move(ret));
+    return collection_mutation(type, ret);
 }

 collection_mutation collection_mutation_description::serialize(const abstract_type& type) const {
--- a/node_ops/task_manager_module.cc
+++ b/node_ops/task_manager_module.cc
@@ -103,7 +103,7 @@ future<std::optional<tasks::task_status>> node_ops_virtual_task::get_status(task
        .entity = stats.entity,
        .progress_units = "",
        .progress = tasks::task_manager::task::progress{},
-        .children = co_await get_children(get_module(), id, _ss.get_token_metadata_ptr())
+        .children = co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper()))
    };
 }

--- a/pgo/profiles/aarch64/profile.profdata.xz
+++ b/pgo/profiles/aarch64/profile.profdata.xz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:088a9d7e165d33436eb3029ab092582cbae61f0e17486c226d8947ff44658c78
-size 6535832
+oid sha256:52c9772c9ac334650d8b179b591c47769ee38d34fad784b61c682e11c03f2506
+size 6530196
--- a/pgo/profiles/x86_64/profile.profdata.xz
+++ b/pgo/profiles/x86_64/profile.profdata.xz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f0c0709f9724cd3a545ebcc50ed587f28b2424d55e2334ac2db5d917903bcaf
-size 6536892
+oid sha256:d1a869ebfe4e90d9681499246eb86bb032ae402c350357e19d97b989037a5bd3
+size 6528308
--- a/reader_concurrency_semaphore.cc
+++ b/reader_concurrency_semaphore.cc
@@ -1021,8 +1021,8 @@ void reader_concurrency_semaphore::signal(const resources& r) noexcept {
        on_internal_error_noexcept(rcslog,
                format("reader_concurrency_semaphore::signal(): semaphore {} detected resource leak, available {} exceeds initial {}", _name,
                        _resources, _initial_resources));
-        _resources.count = std::min(_resources.count, _initial_resources.count);
-        _resources.memory = std::min(_resources.memory, _initial_resources.memory);
+        _resources.count = std::max(_resources.count, _initial_resources.count);
+        _resources.memory = std::max(_resources.memory, _initial_resources.memory);
    }
    maybe_wake_execution_loop();
 }
--- a/replica/compaction_group.hh
+++ b/replica/compaction_group.hh
@@ -432,9 +432,7 @@ public:
    // refresh_mutation_source must be called when there are changes to data source
    // structures but logical state of data is not changed (e.g. when state for a
    // new tablet replica is allocated).
-    virtual void update_effective_replication_map(const locator::effective_replication_map_ptr& old_erm,
-                                                  const locator::effective_replication_map& erm,
-                                                  noncopyable_function<void()> refresh_mutation_source) = 0;
+    virtual void update_effective_replication_map(const locator::effective_replication_map& erm, noncopyable_function<void()> refresh_mutation_source) = 0;

    virtual compaction_group& compaction_group_for_token(dht::token token) const = 0;
    virtual compaction_group& compaction_group_for_key(partition_key_view key, const schema_ptr& s) const = 0;
@@ -444,7 +442,7 @@ public:
    virtual storage_group& storage_group_for_token(dht::token) const = 0;
    virtual utils::chunked_vector<storage_group_ptr> storage_groups_for_token_range(dht::token_range tr) const = 0;

-    virtual locator::combined_load_stats table_load_stats() const = 0;
+    virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const = 0;
    virtual bool all_storage_groups_split() = 0;
    virtual future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) = 0;
    virtual future<> maybe_split_compaction_group_of(size_t idx) = 0;
--- a/replica/database.cc
+++ b/replica/database.cc
@@ -1697,7 +1697,7 @@ static db::rate_limiter::can_proceed account_singular_ranges_to_rate_limit(
        if (!range.is_singular()) {
            continue;
        }
-        auto token = dht::token::to_int64(range.start()->value().token());
+        auto token = dht::token::to_int64(ranges.front().start()->value().token());
        if (limiter.account_operation(read_label, token, table_limit, rate_limit_info) == db::rate_limiter::can_proceed::no) {
            // Don't return immediately - account all ranges first
            ret = can_proceed::no;
--- a/replica/database.hh
+++ b/replica/database.hh
@@ -1129,7 +1129,9 @@ public:
        return _stats;
    }

-    locator::combined_load_stats table_load_stats() const;
+    // The tablet filter is used to not double account migrating tablets, so it's important that
+    // only one of pending or leaving replica is accounted based on current migration stage.
+    locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const;

    const db::view::stats& get_view_stats() const {
        return _view_stats;
--- a/replica/table.cc
+++ b/replica/table.cc
@@ -711,9 +711,7 @@ public:
        return make_ready_future<>();
    }

-    void update_effective_replication_map(const locator::effective_replication_map_ptr& old_erm,
-                                          const locator::effective_replication_map& erm,
-                                          noncopyable_function<void()> refresh_mutation_source) override {}
+    void update_effective_replication_map(const locator::effective_replication_map& erm, noncopyable_function<void()> refresh_mutation_source) override {}

    compaction_group& compaction_group_for_token(dht::token token) const override {
        return get_compaction_group();
@@ -736,7 +734,7 @@ public:
        return *_single_sg;
    }

-    locator::combined_load_stats table_load_stats() const override {
+    locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const override {
        return locator::combined_load_stats{
            .table_ls = locator::table_load_stats{
                            .size_in_bytes = _single_sg->live_disk_space_used(),
@@ -759,11 +757,6 @@ public:
    }
 };

-struct background_merge_guard {
-    compaction::compaction_reenabler compaction_guard;
-    locator::effective_replication_map_ptr erm_guard;
-};
-
 class tablet_storage_group_manager final : public storage_group_manager {
    replica::table& _t;
    locator::host_id _my_host_id;
@@ -784,7 +777,7 @@ class tablet_storage_group_manager final : public storage_group_manager {
    utils::phased_barrier _merge_fiber_barrier;
    std::optional<utils::phased_barrier::operation> _pending_merge_fiber_work;
    // Holds compaction reenabler which disables compaction temporarily during tablet merge
-    std::vector<background_merge_guard> _compaction_reenablers_for_merging;
+    std::vector<compaction::compaction_reenabler> _compaction_reenablers_for_merging;
 private:
    const schema_ptr& schema() const {
        return _t.schema();
@@ -808,8 +801,7 @@ private:
    // Called when coordinator executes tablet merge. Tablet ids X and X+1 are merged into
    // the new tablet id (X >> 1). In practice, that means storage groups for X and X+1
    // are merged into a new storage group with id (X >> 1).
-    void handle_tablet_merge_completion(locator::effective_replication_map_ptr old_erm,
-                                        const locator::tablet_map& old_tmap, const locator::tablet_map& new_tmap);
+    void handle_tablet_merge_completion(const locator::tablet_map& old_tmap, const locator::tablet_map& new_tmap);

    // When merge completes, compaction groups of sibling tablets are added to same storage
    // group, but they're not merged yet into one, since the merge completion handler happens
@@ -903,9 +895,7 @@ public:
                std::exchange(_stop_fut, make_ready_future())).discard_result();
    }

-    void update_effective_replication_map(const locator::effective_replication_map_ptr& old_erm,
-                                          const locator::effective_replication_map& erm,
-                                          noncopyable_function<void()> refresh_mutation_source) override;
+    void update_effective_replication_map(const locator::effective_replication_map& erm, noncopyable_function<void()> refresh_mutation_source) override;

    compaction_group& compaction_group_for_token(dht::token token) const override;
    utils::chunked_vector<storage_group_ptr> storage_groups_for_token_range(dht::token_range tr) const override;
@@ -919,7 +909,7 @@ public:
        return storage_group_for_id(storage_group_of(token).first);
    }

-    locator::combined_load_stats table_load_stats() const override;
+    locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const override;
    bool all_storage_groups_split() override;
    future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) override;
    future<> maybe_split_compaction_group_of(size_t idx) override;
@@ -2943,108 +2933,17 @@ void table::on_flush_timer() {
    });
 }

-// The following functions return true if we should return the tablet size of a tablet in
-// migration depending on its transition stage and whether it is a leaving or pending replica
-bool has_size_on_leaving (locator::tablet_transition_stage stage) {
-    switch (stage) {
-        case locator::tablet_transition_stage::allow_write_both_read_old:               [[fallthrough]];
-        case locator::tablet_transition_stage::write_both_read_old:                     [[fallthrough]];
-        case locator::tablet_transition_stage::streaming:                               [[fallthrough]];
-        case locator::tablet_transition_stage::write_both_read_new:                     [[fallthrough]];
-        case locator::tablet_transition_stage::use_new:                                 [[fallthrough]];
-        case locator::tablet_transition_stage::cleanup_target:                          [[fallthrough]];
-        case locator::tablet_transition_stage::revert_migration:                        [[fallthrough]];
-        case locator::tablet_transition_stage::rebuild_repair:                          [[fallthrough]];
-        case locator::tablet_transition_stage::repair:                                  [[fallthrough]];
-        case locator::tablet_transition_stage::end_repair:
-            return true;
-        case locator::tablet_transition_stage::cleanup:                                 [[fallthrough]];
-        case locator::tablet_transition_stage::end_migration:
-            return false;
-    }
-}
-
-bool has_size_on_pending (locator::tablet_transition_stage stage) {
-    switch (stage) {
-        case locator::tablet_transition_stage::allow_write_both_read_old:               [[fallthrough]];
-        case locator::tablet_transition_stage::write_both_read_old:                     [[fallthrough]];
-        case locator::tablet_transition_stage::streaming:                               [[fallthrough]];
-        case locator::tablet_transition_stage::cleanup_target:                          [[fallthrough]];
-        case locator::tablet_transition_stage::revert_migration:                        [[fallthrough]];
-        case locator::tablet_transition_stage::rebuild_repair:
-            return false;
-        case locator::tablet_transition_stage::write_both_read_new:                     [[fallthrough]];
-        case locator::tablet_transition_stage::use_new:                                 [[fallthrough]];
-        case locator::tablet_transition_stage::cleanup:                                 [[fallthrough]];
-        case locator::tablet_transition_stage::end_migration:                           [[fallthrough]];
-        case locator::tablet_transition_stage::repair:                                  [[fallthrough]];
-        case locator::tablet_transition_stage::end_repair:
-            return true;
-    }
-}
-
-locator::combined_load_stats tablet_storage_group_manager::table_load_stats() const {
+locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
    locator::table_load_stats table_stats;
    table_stats.split_ready_seq_number = _split_ready_seq_number;

    locator::tablet_load_stats tablet_stats;

    for_each_storage_group([&] (size_t id, storage_group& sg) {
-        auto tid = locator::tablet_id(id);
-        locator::global_tablet_id gid { _t.schema()->id(), tid };
-        locator::tablet_replica me { _my_host_id, this_shard_id() };
-        const uint64_t tablet_size = sg.live_disk_space_used();
-
-        auto transition = _tablet_map->get_tablet_transition_info(tid);
-        auto& info = _tablet_map->get_tablet_info(tid);
-        bool is_pending = transition && transition->pending_replica == me;
-        bool is_leaving = transition && locator::get_leaving_replica(info, *transition) == me;
-
-        // It's important to tackle the anomaly in reported size, since both leaving and
-        // pending replicas could otherwise be accounted during tablet migration.
-        // If transition hasn't reached write_both_read_new stage, then leaving replicas are accounted.
-        // Otherwise, pending replicas are accounted.
-        // This helps to reduce the discrepancy window.
-        auto table_size_filter = [&] () {
-            // if tablet is not in transit, it's filtered in.
-            if (!transition) {
-                return true;
-            }
-
-            auto s = transition->reads; // read selector
-
-            return (!is_pending && !is_leaving)
-                    || (is_leaving && s == locator::read_replica_set_selector::previous)
-                    || (is_pending && s == locator::read_replica_set_selector::next);
-        };
-
-        // When a tablet is in migration, we want to send its size during any migration stage when
-        // we still know the tablet's size. This way the balancer will have better information about
-        // tablet sizes, and we reduce the chance that the node will be ignored during balancing
-        // due to missing tablet size. On the leaving replica we include tablets until the use_new
-        // stage (inclusive), and on the pending we include tablets after the streaming stage.
-        // There is an overlap in tablet sizes (we report sizes on both the leaving and pending
-        // replicas for some stages), but that should not be a problem.
-        auto tablet_size_filter = [&] () {
-            // if tablet is not in transit, it's filtered in.
-            if (!transition) {
-                return true;
-            }
-
-            if (is_leaving) {
-                return has_size_on_leaving(transition->stage);
-            } else if (is_pending) {
-                return has_size_on_pending(transition->stage);
-            }
-
-            return true;
-        };
-
-        if (table_size_filter()) {
+        locator::global_tablet_id gid { _t.schema()->id(), locator::tablet_id(id) };
+        if (tablet_filter(*_tablet_map, gid)) {
+            const uint64_t tablet_size = sg.live_disk_space_used();
            table_stats.size_in_bytes += tablet_size;
-        }
-
-        if (tablet_size_filter()) {
            const dht::token_range trange = _tablet_map->get_token_range(gid.tablet);
            // Make sure the token range is in the form (a, b]
            SCYLLA_ASSERT(!trange.start()->is_inclusive() && trange.end()->is_inclusive());
@@ -3057,8 +2956,8 @@ locator::combined_load_stats tablet_storage_group_manager::table_load_stats() co
    };
 }

-locator::combined_load_stats table::table_load_stats() const {
-    return _sg_manager->table_load_stats();
+locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
+    return _sg_manager->table_load_stats(std::move(tablet_filter));
 }

 void tablet_storage_group_manager::handle_tablet_split_completion(const locator::tablet_map& old_tmap, const locator::tablet_map& new_tmap) {
@@ -3170,9 +3069,7 @@ future<> tablet_storage_group_manager::merge_completion_fiber() {
    }
 }

-void tablet_storage_group_manager::handle_tablet_merge_completion(locator::effective_replication_map_ptr old_erm,
-                                                                  const locator::tablet_map& old_tmap,
-                                                                  const locator::tablet_map& new_tmap) {
+void tablet_storage_group_manager::handle_tablet_merge_completion(const locator::tablet_map& old_tmap, const locator::tablet_map& new_tmap) {
    auto table_id = schema()->id();
    size_t old_tablet_count = old_tmap.tablet_count();
    size_t new_tablet_count = new_tmap.tablet_count();
@@ -3196,7 +3093,7 @@ void tablet_storage_group_manager::handle_tablet_merge_completion(locator::effec
        auto new_cg = make_lw_shared<compaction_group>(_t, new_tid, new_range, make_repair_sstable_classifier_func());
        for (auto& view : new_cg->all_views()) {
            auto cre = _t.get_compaction_manager().stop_and_disable_compaction_no_wait(*view, "tablet merging");
-            _compaction_reenablers_for_merging.push_back(background_merge_guard{std::move(cre), old_erm});
+            _compaction_reenablers_for_merging.push_back(std::move(cre));
        }
        auto new_sg = make_lw_shared<storage_group>(std::move(new_cg));

@@ -3229,11 +3126,7 @@ void tablet_storage_group_manager::handle_tablet_merge_completion(locator::effec
    _merge_completion_event.signal();
 }

-void tablet_storage_group_manager::update_effective_replication_map(
-        const locator::effective_replication_map_ptr& old_erm,
-        const locator::effective_replication_map& erm,
-        noncopyable_function<void()> refresh_mutation_source)
-{
+void tablet_storage_group_manager::update_effective_replication_map(const locator::effective_replication_map& erm, noncopyable_function<void()> refresh_mutation_source) {
    auto* new_tablet_map = &erm.get_token_metadata().tablets().get_tablet_map(schema()->id());
    auto* old_tablet_map = std::exchange(_tablet_map, new_tablet_map);

@@ -3249,7 +3142,7 @@ void tablet_storage_group_manager::update_effective_replication_map(
        if (utils::get_local_injector().is_enabled("tablet_force_tablet_count_decrease_once")) {
            utils::get_local_injector().disable("tablet_force_tablet_count_decrease");
        }
-        handle_tablet_merge_completion(old_erm, *old_tablet_map, *new_tablet_map);
+        handle_tablet_merge_completion(*old_tablet_map, *new_tablet_map);
    }

    // Allocate storage group if tablet is migrating in, or deallocate if it's migrating out.
@@ -3335,7 +3228,7 @@ void table::update_effective_replication_map(locator::effective_replication_map_
    };

    if (uses_tablets()) {
-        _sg_manager->update_effective_replication_map(old_erm, *_erm, refresh_mutation_source);
+        _sg_manager->update_effective_replication_map(*_erm, refresh_mutation_source);
    }
    if (old_erm) {
        old_erm->invalidate();
@@ -3797,6 +3690,7 @@ future<> database::snapshot_table_on_all_shards(sharded<database>& sharded_db, c
        tlogger.debug("Taking snapshot of {}.{}: name={}", s->ks_name(), s->cf_name(), name);

        std::vector<snapshot_sstable_set> sstable_sets(smp::count);
+        std::vector<int64_t> tablet_counts(smp::count);

        co_await writer->init();
        co_await smp::invoke_on_all([&] -> future<> {
@@ -3804,6 +3698,7 @@ future<> database::snapshot_table_on_all_shards(sharded<database>& sharded_db, c
            auto [tables, permit] = co_await t.snapshot_sstables();
            auto sstables_metadata = co_await t.get_sstables_manager().take_snapshot(std::move(tables), name);
            sstable_sets[this_shard_id()] = make_foreign(std::make_unique<utils::chunked_vector<sstables::sstable_snapshot_metadata>>(std::move(sstables_metadata)));
+            tablet_counts[this_shard_id()] = t.calculate_tablet_count();
        });
        co_await writer->sync();

@@ -3817,13 +3712,12 @@ future<> database::snapshot_table_on_all_shards(sharded<database>& sharded_db, c
        });
        tlogger.debug("snapshot {}: seal_snapshot", name);
        const auto& topology = sharded_db.local().get_token_metadata().get_topology();
-        std::optional<int64_t> tablet_count;
+        std::optional<int64_t> min_tablet_count;
        if (t.uses_tablets()) {
-            auto erm = t.get_effective_replication_map();
-            auto& tm = erm->get_token_metadata().tablets().get_tablet_map(s->id());
-            tablet_count = tm.tablet_count();
+            SCYLLA_ASSERT(!tablet_counts.empty());
+            min_tablet_count = *std::ranges::min_element(tablet_counts);
        }
-        co_await write_manifest(topology, *writer, std::move(sstable_sets), name, std::move(opts), s, tablet_count).handle_exception([&] (std::exception_ptr ptr) {
+        co_await write_manifest(topology, *writer, std::move(sstable_sets), name, std::move(opts), s, min_tablet_count).handle_exception([&] (std::exception_ptr ptr) {
            tlogger.error("Failed to seal snapshot in {}: {}.", name, ptr);
            ex = std::move(ptr);
        });
@@ -3881,7 +3775,6 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
            }

            auto lister = directory_lister(snapshots_dir, lister::dir_entry_types::of<directory_entry_type::directory>());
-            auto close_lister = deferred_close(lister);
            while (auto de = lister.get().get()) {
                auto snapshot_name = de->name;
                all_snapshots.emplace(snapshot_name, snapshot_details());
@@ -3889,9 +3782,6 @@ future<std::unordered_map<sstring, table::snapshot_details>> table::get_snapshot
                auto& sd = all_snapshots.at(snapshot_name);
                sd.total += details.total;
                sd.live += details.live;
-                utils::get_local_injector().inject("get_snapshot_details", [&] (auto& handler) -> future<> {
-                    throw std::runtime_error("Injected exception in get_snapshot_details");
-                }).get();
            }
        }
        return all_snapshots;
@@ -3911,66 +3801,53 @@ future<table::snapshot_details> table::get_snapshot_details(fs::path snapshot_di
    }

    auto lister = directory_lister(snapshot_directory, snapshot_dir, lister::dir_entry_types::of<directory_entry_type::regular>());
-    std::exception_ptr ex;
-    try {
-        while (auto de = co_await lister.get()) {
-            const auto& name = de->name;
-            future<stat_data> (&file_stat)(file& directory, std::string_view name, follow_symlink) noexcept = seastar::file_stat;
-            auto sd = co_await io_check(file_stat, snapshot_directory, name, follow_symlink::no);
-            auto size = sd.allocated_size;
+    while (auto de = co_await lister.get()) {
+        const auto& name = de->name;
+        future<stat_data> (&file_stat)(file& directory, std::string_view name, follow_symlink) noexcept = seastar::file_stat;
+        auto sd = co_await io_check(file_stat, snapshot_directory, name, follow_symlink::no);
+        auto size = sd.allocated_size;

-            utils::get_local_injector().inject("per-snapshot-get_snapshot_details", [&] (auto& handler) -> future<> {
-                throw std::runtime_error("Injected exception in per-snapshot-get_snapshot_details");
-            }).get();
-
-            // The manifest and schema.cql files are the only files expected to be in this directory not belonging to the SSTable.
-            //
-            // All the others should just generate an exception: there is something wrong, so don't blindly
-            // add it to the size.
-            if (name != "manifest.json" && name != "schema.cql") {
-                details.total += size;
-                if (sd.number_of_links == 1) {
-                    // File exists only in the snapshot directory.
-                    details.live += size;
-                    continue;
-                }
-                // If the number of links is greater than 1, it is still possible that the file is linked to another snapshot
-                // So check the datadir for the file too.
-            } else {
+        // The manifest and schema.sql files are the only files expected to be in this directory not belonging to the SSTable.
+        //
+        // All the others should just generate an exception: there is something wrong, so don't blindly
+        // add it to the size.
+        if (name != "manifest.json" && name != "schema.cql") {
+            details.total += size;
+            if (sd.number_of_links == 1) {
+                // File exists only in the snapshot directory.
+                details.live += size;
                continue;
            }
-
-            auto exists_in_dir = [&] (file& dir, const fs::path& path, std::string_view name) -> future<bool> {
-              try {
-                // File exists in the main SSTable directory. Snapshots are not contributing to size
-                auto psd = co_await io_check(file_stat, dir, name, follow_symlink::no);
-                // File in main SSTable directory must be hardlinked to the file in the snapshot dir with the same name.
-                if (psd.device_id != sd.device_id || psd.inode_number != sd.inode_number) {
-                    dblog.warn("[{} device_id={} inode_number={} size={}] is not the same file as [{} device_id={} inode_number={} size={}]",
-                            (path / name).native(), psd.device_id, psd.inode_number, psd.size,
-                            (snapshot_dir / name).native(), sd.device_id, sd.inode_number, sd.size);
-                    co_return false;
-                }
-                co_return true;
-              } catch (std::system_error& e) {
-                if (e.code() != std::error_code(ENOENT, std::system_category())) {
-                    throw;
-                }
-                co_return false;
-              }
-            };
-            // Check staging dir first, as files might be moved from there to the datadir concurrently to this check
-            if ((!staging_dir || !co_await exists_in_dir(staging_directory, *staging_dir, name)) &&
-                    !co_await exists_in_dir(data_directory, datadir, name)) {
-                details.live += size;
-            }
+            // If the number of links is greater than 1, it is still possible that the file is linked to another snapshot
+            // So check the datadir for the file too.
+        } else {
+            continue;
+        }
+
+        auto exists_in_dir = [&] (file& dir, const fs::path& path, std::string_view name) -> future<bool> {
+          try {
+            // File exists in the main SSTable directory. Snapshots are not contributing to size
+            auto psd = co_await io_check(file_stat, dir, name, follow_symlink::no);
+            // File in main SSTable directory must be hardlinked to the file in the snapshot dir with the same name.
+            if (psd.device_id != sd.device_id || psd.inode_number != sd.inode_number) {
+                dblog.warn("[{} device_id={} inode_number={} size={}] is not the same file as [{} device_id={} inode_number={} size={}]",
+                        (path / name).native(), psd.device_id, psd.inode_number, psd.size,
+                        (snapshot_dir / name).native(), sd.device_id, sd.inode_number, sd.size);
+                co_return false;
+            }
+            co_return true;
+          } catch (std::system_error& e) {
+            if (e.code() != std::error_code(ENOENT, std::system_category())) {
+                throw;
+            }
+            co_return false;
+          }
+        };
+        // Check staging dir first, as files might be moved from there to the datadir concurrently to this check
+        if ((!staging_dir || !co_await exists_in_dir(staging_directory, *staging_dir, name)) &&
+                !co_await exists_in_dir(data_directory, datadir, name)) {
+            details.live += size;
        }
-    } catch (...) {
-        ex = std::current_exception();
-    }
-    co_await lister.close();
-    if (ex) {
-        co_await coroutine::return_exception_ptr(std::move(ex));
    }

    co_return details;
--- a/schema/schema_builder.hh
+++ b/schema/schema_builder.hh
@@ -263,9 +263,8 @@ public:
    void enable_schema_commitlog() {
        _static_props.enable_schema_commitlog();
    }
-    void set_is_group0_table() {
-        _static_props.is_group0_table = true;
-        enable_schema_commitlog();
+    void set_is_group0_table(bool enabled = true) {
+        _static_props.is_group0_table = enabled;
    }

    class default_names {
--- a/service/paxos/paxos_state.cc
+++ b/service/paxos/paxos_state.cc
@@ -454,7 +454,7 @@ static future<cql3::untyped_result_set> do_execute_cql_with_timeout(sstring req,
    auto ps_ptr = qp.get_prepared(cache_key);
    if (!ps_ptr) {
        const auto msg_ptr = co_await qp.prepare(req, qs, cql3::internal_dialect());
-        ps_ptr = msg_ptr->get_prepared();
+        ps_ptr = std::move(msg_ptr->get_prepared());
        if (!ps_ptr) {
            on_internal_error(paxos_state::logger, "prepared statement is null");
        }
--- a/service/qos/service_level_controller.cc
+++ b/service/qos/service_level_controller.cc
@@ -948,10 +948,6 @@ future<> service_level_controller::migrate_to_v2(size_t nodes_count, db::system_
        qs,
        {},
        cql3::query_processor::cache_internal::no);
-    if (rows->empty()) {
-        co_return;
-    }
-    

    auto col_names = schema->all_columns() | std::views::transform([] (const auto& col) {return col.name_as_cql_string(); }) | std::ranges::to<std::vector<sstring>>();
    auto col_names_str = fmt::to_string(fmt::join(col_names, ", "));
--- a/service/raft/group0_state_machine.cc
+++ b/service/raft/group0_state_machine.cc
@@ -350,10 +350,6 @@ static void ensure_group0_schema(const group0_command& cmd, const replica::datab
            if (!schema->static_props().is_group0_table) {
                on_internal_error(slogger, fmt::format("ensure_group0_schema: schema is not group0: {}", schema->cf_name()));
            }
-
-            if (!schema->static_props().use_schema_commitlog) {
-                on_internal_error(slogger, fmt::format("ensure_group0_schema: group0 table {} does not use schema commitlog", schema->cf_name()));
-            }
        }
    };

--- a/service/raft/raft_group0.cc
+++ b/service/raft/raft_group0.cc
@@ -559,7 +559,6 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
        group0_id = g0_info.group0_id;
        raft::server_address my_addr{my_id, {}};

-        bool starting_server_as_follower = false;
        if (server == nullptr) {
            // This is the first time discovery is run. Create and start a Raft server for group 0 on this node.
            raft::configuration initial_configuration;
@@ -587,7 +586,6 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
                // trigger an empty snapshot transfer.
                nontrivial_snapshot = true;
            } else {
-                starting_server_as_follower = true;
                co_await handshaker->pre_server_start(g0_info);
            }

@@ -616,9 +614,7 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
        }

        SCYLLA_ASSERT(server);
-        co_await utils::get_local_injector().inject("join_group0_pause_before_config_check",
-                utils::wait_for_message(std::chrono::minutes{5}));
-        if (!starting_server_as_follower && server->get_configuration().contains(my_id)) {
+        if (server->get_configuration().contains(my_id)) {
            // True if we started a new group or completed a configuration change initiated earlier.
            group0_log.info("server {} already in group 0 (id {}) as {}", my_id, group0_id,
                    server->get_configuration().can_vote(my_id)? "voter" : "non-voter");
--- a/service/storage_service.cc
+++ b/service/storage_service.cc
@@ -6156,57 +6156,6 @@ future<> storage_service::snitch_reconfigured() {
    }
 }

-future<> storage_service::local_topology_barrier() {
-    if (this_shard_id() != 0) {
-        co_await container().invoke_on(0, [] (storage_service& ss) {
-            return ss.local_topology_barrier();
-        });
-        co_return;
-    }
-
-    auto version = _topology_state_machine._topology.version;
-
-    utils::get_local_injector().inject("raft_topology_barrier_and_drain_fail_before", [] {
-        throw std::runtime_error("raft_topology_barrier_and_drain_fail_before injected exception");
-    });
-
-    co_await utils::get_local_injector().inject("pause_before_barrier_and_drain", utils::wait_for_message(std::chrono::minutes(5)));
-    if (_topology_state_machine._topology.tstate == topology::transition_state::write_both_read_old) {
-        for (auto& n : _topology_state_machine._topology.transition_nodes) {
-            if (!_address_map.find(locator::host_id{n.first.uuid()})) {
-                rtlogger.error("The topology transition is in a double write state but the IP of the node in transition is not known");
-                break;
-            }
-        }
-    }
-
-    co_await container().invoke_on_all([version] (storage_service& ss) -> future<> {
-        const auto current_version = ss._shared_token_metadata.get()->get_version();
-        rtlogger.info("Got raft_topology_cmd::barrier_and_drain, version {}, current version {}",
-                      version, current_version);
-
-        // This shouldn't happen under normal operation, it's only plausible
-        // if the topology change coordinator has
-        // moved to another node and managed to update the topology
-        // parallel to this method. The previous coordinator
-        // should be inactive now, so it won't observe this
-        // exception. By returning exception we aim
-        // to reveal any other conditions where this may arise.
-        if (current_version != version) {
-            co_await coroutine::return_exception(std::runtime_error(
-                    ::format("raft topology: command::barrier_and_drain, the version has changed, "
-                             "version {}, current_version {}, the topology change coordinator "
-                             " had probably migrated to another node",
-                             version, current_version)));
-        }
-
-        co_await ss._shared_token_metadata.stale_versions_in_use();
-        co_await get_topology_session_manager().drain_closing_sessions();
-
-        rtlogger.info("raft_topology_cmd::barrier_and_drain done");
-    });
-}
-
 future<raft_topology_cmd_result> storage_service::raft_topology_cmd_handler(raft::term_t term, uint64_t cmd_index, const raft_topology_cmd& cmd) {
    raft_topology_cmd_result result;
    rtlogger.info("topology cmd rpc {} is called index={}", cmd.cmd, cmd_index);
@@ -6234,6 +6183,12 @@ future<raft_topology_cmd_result> storage_service::raft_topology_cmd_handler(raft
            state.last_index = cmd_index;
        }

+        // We capture the topology version right after the checks
+        // above, before any yields. This is crucial since _topology_state_machine._topology
+        // might be altered concurrently while this method is running,
+        // which can cause the fence command to apply an invalid fence version.
+        const auto version = _topology_state_machine._topology.version;
+
        switch (cmd.cmd) {
            case raft_topology_cmd::command::barrier: {
                utils::get_local_injector().inject("raft_topology_barrier_fail",
@@ -6272,7 +6227,43 @@ future<raft_topology_cmd_result> storage_service::raft_topology_cmd_handler(raft
            }
            break;
            case raft_topology_cmd::command::barrier_and_drain: {
-                co_await local_topology_barrier();
+                utils::get_local_injector().inject("raft_topology_barrier_and_drain_fail_before", [] {
+                    throw std::runtime_error("raft_topology_barrier_and_drain_fail_before injected exception");
+                });
+                co_await utils::get_local_injector().inject("pause_before_barrier_and_drain", utils::wait_for_message(std::chrono::minutes(5)));
+                if (_topology_state_machine._topology.tstate == topology::transition_state::write_both_read_old) {
+                    for (auto& n : _topology_state_machine._topology.transition_nodes) {
+                        if (!_address_map.find(locator::host_id{n.first.uuid()})) {
+                            rtlogger.error("The topology transition is in a double write state but the IP of the node in transition is not known");
+                            break;
+                        }
+                    }
+                }
+                co_await container().invoke_on_all([version] (storage_service& ss) -> future<> {
+                    const auto current_version = ss._shared_token_metadata.get()->get_version();
+                    rtlogger.info("Got raft_topology_cmd::barrier_and_drain, version {}, current version {}",
+                        version, current_version);
+
+                    // This shouldn't happen under normal operation, it's only plausible
+                    // if the topology change coordinator has
+                    // moved to another node and managed to update the topology
+                    // parallel to this method. The previous coordinator
+                    // should be inactive now, so it won't observe this
+                    // exception. By returning exception we aim
+                    // to reveal any other conditions where this may arise.
+                    if (current_version != version) {
+                        co_await coroutine::return_exception(std::runtime_error(
+                            ::format("raft topology: command::barrier_and_drain, the version has changed, "
+                                     "version {}, current_version {}, the topology change coordinator "
+                                     " had probably migrated to another node",
+                                version, current_version)));
+                    }
+
+                    co_await ss._shared_token_metadata.stale_versions_in_use();
+                    co_await get_topology_session_manager().drain_closing_sessions();
+
+                    rtlogger.info("raft_topology_cmd::barrier_and_drain done");
+                });

                co_await utils::get_local_injector().inject("raft_topology_barrier_and_drain_fail", [this] (auto& handler) -> future<> {
                    auto ks = handler.get("keyspace");
@@ -7368,8 +7359,34 @@ future<locator::load_stats> storage_service::load_stats_for_tablet_based_tables(
            if (!table) {
                continue;
            }
+            auto erm = table->get_effective_replication_map();
+            auto& token_metadata = erm->get_token_metadata();
+            auto me = locator::tablet_replica { token_metadata.get_my_id(), this_shard_id() };

-            locator::combined_load_stats combined_ls { table->table_load_stats() };
+            // It's important to tackle the anomaly in reported size, since both leaving and
+            // pending replicas could otherwise be accounted during tablet migration.
+            // If transition hasn't reached cleanup stage, then leaving replicas are accounted.
+            // If transition is past cleanup stage, then pending replicas are accounted.
+            // This helps to reduce the discrepancy window.
+            auto tablet_filter = [&me] (const locator::tablet_map& tmap, locator::global_tablet_id id) {
+                auto transition = tmap.get_tablet_transition_info(id.tablet);
+                auto& info = tmap.get_tablet_info(id.tablet);
+
+                // if tablet is not in transit, it's filtered in.
+                if (!transition) {
+                    return true;
+                }
+
+                bool is_pending = transition->pending_replica == me;
+                bool is_leaving = locator::get_leaving_replica(info, *transition) == me;
+                auto s = transition->reads; // read selector
+
+                return (!is_pending && !is_leaving)
+                       || (is_leaving && s == locator::read_replica_set_selector::previous)
+                       || (is_pending && s == locator::read_replica_set_selector::next);
+            };
+
+            locator::combined_load_stats combined_ls { table->table_load_stats(tablet_filter) };
            load_stats.tables.emplace(id, std::move(combined_ls.table_ls));
            tablet_sizes_per_shard[this_shard_id()].size += load_stats.tablet_stats[this_host].add_tablet_sizes(combined_ls.tablet_ls);

--- a/service/storage_service.hh
+++ b/service/storage_service.hh
@@ -944,9 +944,6 @@ public:
    future<bool> ongoing_rf_change(const group0_guard& guard, sstring ks) const;
    future<> raft_initialize_discovery_leader(const join_node_request_params& params);
    future<> initialize_done_topology_upgrade_state();
-    // Does the local part of global_token_metadata_barrier(), without a raft group0 barrier.
-    // In particular, waits for non-latest local erms to go die.
-    future<> local_topology_barrier();
 private:
     // State machine that is responsible for topology change
    topology_state_machine& _topology_state_machine;
--- a/service/task_manager_module.cc
+++ b/service/task_manager_module.cc
@@ -21,6 +21,7 @@ namespace service {

 struct status_helper {
    tasks::task_status status;
+    utils::chunked_vector<locator::tablet_id> tablets;
    std::optional<locator::tablet_replica> pending_replica;
 };

@@ -147,40 +148,18 @@ future<std::optional<tasks::task_status>> tablet_virtual_task::wait(tasks::task_
    }

    tasks::tmlogger.info("tablet_virtual_task: wait until tablet operation is finished");
-    co_await utils::get_local_injector().inject("tablet_virtual_task_wait", utils::wait_for_message(60s));
-    while (true) {
-        co_await _ss._topology_state_machine.event.wait([&] {
-            if (!_ss.get_token_metadata().tablets().has_tablet_map(table)) {
-                return true;
-            }
-            auto& tmap = _ss.get_token_metadata().tablets().get_tablet_map(table);
-            if (is_resize_task(task_type)) {    // Resize task.
-                return tmap.resize_task_info().tablet_task_id.uuid() != id.uuid();
-            } else if (tablet_id_opt.has_value()) {    // Migration task.
-                return tmap.get_tablet_info(tablet_id_opt.value()).migration_task_info.tablet_task_id.uuid() != id.uuid();
-            } else {    // Repair task.
-                return true;
-            }
-        });
-
-        if (!is_repair_task(task_type)) {
-            break;
+    co_await _ss._topology_state_machine.event.wait([&] {
+        auto& tmap = _ss.get_token_metadata().tablets().get_tablet_map(table);
+        if (is_resize_task(task_type)) {    // Resize task.
+            return tmap.resize_task_info().tablet_task_id.uuid() != id.uuid();
+        } else if (tablet_id_opt.has_value()) {    // Migration task.
+            return tmap.get_tablet_info(tablet_id_opt.value()).migration_task_info.tablet_task_id.uuid() != id.uuid();
+        } else {    // Repair task.
+            return std::all_of(res->tablets.begin(), res->tablets.end(), [&] (const locator::tablet_id& tablet) {
+                return tmap.get_tablet_info(tablet).repair_task_info.tablet_task_id.uuid() != id.uuid();
+            });
        }
-
-        auto tmptr = _ss.get_token_metadata_ptr();
-        if (!_ss.get_token_metadata().tablets().has_tablet_map(table)) {
-            break;
-        }
-        auto& tmap = tmptr->tablets().get_tablet_map(table);
-        bool repair_still_running = false;
-        co_await tmap.for_each_tablet([&] (locator::tablet_id tid, const locator::tablet_info& info) {
-            repair_still_running = repair_still_running || (info.repair_task_info.is_valid() && info.repair_task_info.tablet_task_id.uuid() == id.uuid());
-            return make_ready_future();
-        });
-        if (!repair_still_running) {
-            break;
-        }
-    }
+    });

    res->status.state = tasks::task_manager::task_state::done; // Failed repair task is retried.
    if (is_migration_task(task_type)) {
@@ -190,9 +169,9 @@ future<std::optional<tasks::task_status>> tablet_virtual_task::wait(tasks::task_
    } else if (is_resize_task(task_type)) {
        auto new_tablet_count = _ss.get_token_metadata().tablets().get_tablet_map(table).tablet_count();
        res->status.state = new_tablet_count == tablet_count ? tasks::task_manager::task_state::suspended : tasks::task_manager::task_state::done;
-        res->status.children = task_type == locator::tablet_task_type::split ? co_await get_children(get_module(), id, _ss.get_token_metadata_ptr()) : utils::chunked_vector<tasks::task_identity>{};
+        res->status.children = task_type == locator::tablet_task_type::split ? co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper())) : utils::chunked_vector<tasks::task_identity>{};
    } else {
-        res->status.children = co_await get_children(get_module(), id, _ss.get_token_metadata_ptr());
+        res->status.children = co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper()));
    }
    res->status.end_time = db_clock::now(); // FIXME: Get precise end time.
    co_return res->status;
@@ -278,7 +257,6 @@ future<std::optional<status_helper>> tablet_virtual_task::get_status_helper(task
    auto& tmap = tmptr->tablets().get_tablet_map(table);
    bool repair_task_finished = false;
    bool repair_task_pending = false;
-    bool no_tablets_processed = true;
    if (is_repair_task(task_type)) {
        auto progress = co_await _ss._repair.local().get_tablet_repair_task_progress(id);
        if (progress) {
@@ -295,37 +273,37 @@ future<std::optional<status_helper>> tablet_virtual_task::get_status_helper(task
            auto& task_info = info.repair_task_info;
            if (task_info.tablet_task_id.uuid() == id.uuid()) {
                update_status(task_info, res.status, sched_nr);
-                no_tablets_processed = false;
+                res.tablets.push_back(tid);
            }
            return make_ready_future();
        });
-        res.status.children = co_await get_children(get_module(), id, _ss.get_token_metadata_ptr());
+        res.status.children = co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper()));
    } else if (is_migration_task(task_type)) {    // Migration task.
        auto tablet_id = hint.get_tablet_id();
        res.pending_replica = tmap.get_tablet_transition_info(tablet_id)->pending_replica;
        auto& task_info = tmap.get_tablet_info(tablet_id).migration_task_info;
        if (task_info.tablet_task_id.uuid() == id.uuid()) {
            update_status(task_info, res.status, sched_nr);
-            no_tablets_processed = false;
+            res.tablets.push_back(tablet_id);
        }
    } else {    // Resize task.
        auto& task_info = tmap.resize_task_info();
        if (task_info.tablet_task_id.uuid() == id.uuid()) {
            update_status(task_info, res.status, sched_nr);
            res.status.state = tasks::task_manager::task_state::running;
-            res.status.children = task_type == locator::tablet_task_type::split ? co_await get_children(get_module(), id, _ss.get_token_metadata_ptr()) : utils::chunked_vector<tasks::task_identity>{};
+            res.status.children = task_type == locator::tablet_task_type::split ? co_await get_children(get_module(), id, std::bind_front(&gms::gossiper::is_alive, &_ss.gossiper())) : utils::chunked_vector<tasks::task_identity>{};
            co_return res;
        }
    }

-    if (!no_tablets_processed) {
+    if (!res.tablets.empty()) {
        res.status.state = sched_nr == 0 ? tasks::task_manager::task_state::created : tasks::task_manager::task_state::running;
        co_return res;
    }

    if (repair_task_pending) {
        // When repair_task_pending is true, the res.tablets will be empty iff the request is aborted by user.
-        res.status.state = no_tablets_processed ? tasks::task_manager::task_state::failed : tasks::task_manager::task_state::running;
+        res.status.state = res.tablets.empty() ? tasks::task_manager::task_state::failed : tasks::task_manager::task_state::running;
        co_return res;
    }
    if (repair_task_finished) {
--- a/service/topology_coordinator.cc
+++ b/service/topology_coordinator.cc
@@ -2193,19 +2193,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber
                _tablet_allocator.set_load_stats(reconciled_stats);
            }
        }
-
-        // Wait for the background storage group merge to finish before releasing the state machine.
-        // Background merge holds the old erm, so a successful barrier joins with it.
-        // This guarantees that the background merge doesn't run concurrently with the next merge.
-        // Replica-side storage group merge takes compaction locks on the tablet's main compaction group, released
-        // by the background merge. If the next merge starts before the background merge finishes, it can cause a deadlock.
-        // The background merge fiber will try to stop a compaction group which is locked, and the lock is held
-        // by the background merge fiber.
-        tm = nullptr;
-        if (!guard) {
-            guard = co_await start_operation();
-        }
-        co_await global_tablet_token_metadata_barrier(std::move(guard));
    }

    future<> handle_truncate_table(group0_guard guard) {
--- a/sstables/index_entry.hh
+++ b/sstables/index_entry.hh
@@ -201,47 +201,95 @@ public:
    virtual future<std::optional<entry_info>> next_entry() = 0;
 };

-// Promoted index information produced by the parser.
-struct parsed_promoted_index_entry {
-    deletion_time del_time;
-    uint64_t promoted_index_start;
-    uint32_t promoted_index_size;
-    uint32_t num_blocks;
-};
+// Allocated inside LSA.
+class promoted_index {
+    deletion_time _del_time;
+    uint64_t _promoted_index_start;
+    uint32_t _promoted_index_size;
+    uint32_t _num_blocks;
+public:
+    promoted_index(const schema& s,
+        deletion_time del_time,
+        uint64_t promoted_index_start,
+        uint32_t promoted_index_size,
+        uint32_t num_blocks)
+            : _del_time{del_time}
+            , _promoted_index_start(promoted_index_start)
+            , _promoted_index_size(promoted_index_size)
+            , _num_blocks(num_blocks)
+    { }

-using promoted_index = parsed_promoted_index_entry;
+    [[nodiscard]] deletion_time get_deletion_time() const { return _del_time; }
+    [[nodiscard]] uint32_t get_promoted_index_size() const { return _promoted_index_size; }
+
+    // Call under allocating_section.
+    // For sstable versions >= mc the returned cursor will be of type `bsearch_clustered_cursor`.
+    std::unique_ptr<clustered_index_cursor> make_cursor(shared_sstable,
+        reader_permit,
+        tracing::trace_state_ptr,
+        file_input_stream_options,
+        use_caching);
+};

 // A partition index element.
 // Allocated inside LSA.
-struct [[gnu::packed]] index_entry {
-    mutable int64_t raw_token;
-    uint64_t data_file_offset;
-    uint32_t key_offset;
+class index_entry {
+private:
+    managed_bytes _key;
+    mutable std::optional<dht::token> _token;
+    uint64_t _position;
+    managed_ref<promoted_index> _index;

-    uint64_t position() const { return data_file_offset; }
-    dht::raw_token token() const { return dht::raw_token(raw_token); }
+public:
+
+    key_view get_key() const {
+        return key_view{_key};
+    }
+
+    // May allocate so must be called under allocating_section.
+    decorated_key_view get_decorated_key(const schema& s) const {
+        if (!_token) {
+            _token.emplace(s.get_partitioner().get_token(get_key()));
+        }
+        return decorated_key_view(*_token, get_key());
+    }
+
+    uint64_t position() const { return _position; };
+
+    std::optional<deletion_time> get_deletion_time() const {
+        if (_index) {
+            return _index->get_deletion_time();
+        }
+
+        return {};
+    }
+
+    index_entry(managed_bytes&& key, uint64_t position, managed_ref<promoted_index>&& index)
+        : _key(std::move(key))
+        , _position(position)
+        , _index(std::move(index))
+    {}
+
+    index_entry(index_entry&&) = default;
+    index_entry& operator=(index_entry&&) = default;
+
+    // Can be nullptr
+    const managed_ref<promoted_index>& get_promoted_index() const { return _index; }
+    managed_ref<promoted_index>& get_promoted_index() { return _index; }
+    uint32_t get_promoted_index_size() const { return _index ? _index->get_promoted_index_size() : 0; }
+
+    size_t external_memory_usage() const {
+        return _key.external_memory_usage() + _index.external_memory_usage();
+    }
 };

-// Required for optimized LSA migration of storage of managed_vector.
-static_assert(std::is_trivially_move_assignable_v<index_entry>);
-static_assert(std::is_trivially_move_assignable_v<parsed_promoted_index_entry>);
-
 // A partition index page.
 //
 // Allocated in the standard allocator space but with an LSA allocator as the current allocator.
 // So the shallow part is in the standard allocator but all indirect objects are inside LSA.
 class partition_index_page {
 public:
-    lsa::chunked_managed_vector<index_entry> _entries;
-    managed_bytes _key_storage;
-
-    // Stores promoted index information of index entries.
-    // The i-th element corresponds to the i-th entry in _entries.
-    // Can be smaller than _entries. If _entries[i] doesn't have a matching element in _promoted_indexes then
-    // that entry doesn't have a promoted index.
-    // Kept separately to avoid paying for storage cost in pages where no entry has a promoted index,
-    // which is typical in workloads with small partitions.
-    lsa::chunked_managed_vector<promoted_index> _promoted_indexes;
+    lsa::chunked_managed_vector<managed_ref<index_entry>> _entries;
 public:
    partition_index_page() = default;
    partition_index_page(partition_index_page&&) noexcept = default;
@@ -250,68 +298,15 @@ public:
    bool empty() const { return _entries.empty(); }
    size_t size() const { return _entries.size(); }

-    stop_iteration clear_gently() {
-        // Vectors have trivial storage, so are fast to destroy.
-        return stop_iteration::yes;
-    }
-
    void clear_one_entry() {
        _entries.pop_back();
    }

-    bool has_promoted_index(size_t i) const {
-        return i < _promoted_indexes.size() && _promoted_indexes[i].promoted_index_size > 0;
-    }
-
-    /// Get promoted index for the i-th entry.
-    /// Call only when has_promoted_index(i) is true.
-    const promoted_index& get_promoted_index(size_t i) const {
-        return _promoted_indexes[i];
-    }
-
-    /// Get promoted index for the i-th entry.
-    /// Call only when has_promoted_index(i) is true.
-    promoted_index& get_promoted_index(size_t i) {
-        return _promoted_indexes[i];
-    }
-
-    /// Get promoted index size for the i-th entry.
-    uint32_t get_promoted_index_size(size_t i) const {
-        return has_promoted_index(i) ? get_promoted_index(i).promoted_index_size : 0;
-    }
-
-    /// Get deletion_time for partition represented by the i-th entry.
-    /// Returns disengaged optional if the entry doesn't have a promoted index, so we don't know the deletion_time.
-    /// It has to be read from the data file.
-    std::optional<deletion_time> get_deletion_time(size_t i) const {
-        if (has_promoted_index(i)) {
-            return get_promoted_index(i).del_time;
-        }
-        return {};
-    }
-
-    key_view get_key(size_t i) const {
-        auto start = _entries[i].key_offset;
-        auto end = i + 1 < _entries.size() ? _entries[i + 1].key_offset : _key_storage.size();
-        auto v = managed_bytes_view(_key_storage).prefix(end);
-        v.remove_prefix(start);
-        return key_view(v);
-    }
-
-    decorated_key_view get_decorated_key(const schema& s, size_t i) const {
-        auto key = get_key(i);
-        auto t = _entries[i].token();
-        if (!t) {
-            t = dht::raw_token(s.get_partitioner().get_token(key));
-            _entries[i].raw_token = t.value;
-        }
-        return decorated_key_view(dht::token(t), key);
-    }
-
    size_t external_memory_usage() const {
        size_t size = _entries.external_memory_usage();
-        size += _promoted_indexes.external_memory_usage();
-        size += _key_storage.external_memory_usage();
+        for (auto&& e : _entries) {
+            size += sizeof(index_entry) + e->external_memory_usage();
+        }
        return size;
    }
 };
--- a/sstables/index_reader.hh
+++ b/sstables/index_reader.hh
@@ -25,6 +25,14 @@ namespace sstables {
 extern seastar::logger sstlog;
 extern thread_local mc::cached_promoted_index::metrics promoted_index_cache_metrics;

+// Promoted index information produced by the parser.
+struct parsed_promoted_index_entry {
+    deletion_time del_time;
+    uint64_t promoted_index_start;
+    uint32_t promoted_index_size;
+    uint32_t num_blocks;
+};
+
 // Partition index entry information produced by the parser.
 struct parsed_partition_index_entry {
    temporary_buffer<char> key;
@@ -45,10 +53,9 @@ class index_consumer {
    schema_ptr _s;
    logalloc::allocating_section _alloc_section;
    logalloc::region& _region;
-    utils::chunked_vector<parsed_partition_index_entry> _parsed_entries;
-    size_t _max_promoted_index_entry_plus_one = 0; // Highest index +1 in _parsed_entries which has a promoted index.
-    size_t _key_storage_size = 0;
 public:
+    index_list indexes;
+
    index_consumer(logalloc::region& r, schema_ptr s)
        : _s(s)
        , _alloc_section(abstract_formatter([s] (fmt::format_context& ctx) {
@@ -57,63 +64,36 @@ public:
        , _region(r)
    { }

-    void consume_entry(parsed_partition_index_entry&& e) {
-        _key_storage_size += e.key.size();
-        _parsed_entries.emplace_back(std::move(e));
-        if (e.promoted_index) {
-            _max_promoted_index_entry_plus_one = std::max(_max_promoted_index_entry_plus_one, _parsed_entries.size());
-        }
+    ~index_consumer() {
+        with_allocator(_region.allocator(), [&] {
+            indexes._entries.clear_and_release();
+        });
    }

-    future<index_list> finalize() {
-        index_list result;
-        // In case of exception, need to deallocate under region allocator.
-        auto delete_result = seastar::defer([&] {
+    void consume_entry(parsed_partition_index_entry&& e) {
+        _alloc_section(_region, [&] {
            with_allocator(_region.allocator(), [&] {
-                result._entries = {};
-                result._promoted_indexes = {};
-                result._key_storage = {};
+                managed_ref<promoted_index> pi;
+                if (e.promoted_index) {
+                    pi = make_managed<promoted_index>(*_s,
+                            e.promoted_index->del_time,
+                            e.promoted_index->promoted_index_start,
+                            e.promoted_index->promoted_index_size,
+                            e.promoted_index->num_blocks);
+                }
+                auto key = managed_bytes(reinterpret_cast<const bytes::value_type*>(e.key.get()), e.key.size());
+                indexes._entries.emplace_back(make_managed<index_entry>(std::move(key), e.data_file_offset, std::move(pi)));
            });
        });
-        auto i = _parsed_entries.begin();
-        size_t key_offset = 0;
-        while (i != _parsed_entries.end()) {
-            _alloc_section(_region, [&] {
-                with_allocator(_region.allocator(), [&] {
-                    result._entries.reserve(_parsed_entries.size());
-                    result._promoted_indexes.resize(_max_promoted_index_entry_plus_one);
-                    if (result._key_storage.empty()) {
-                        result._key_storage = managed_bytes(managed_bytes::initialized_later(), _key_storage_size);
-                    }
-                    managed_bytes_mutable_view key_out(result._key_storage);
-                    key_out.remove_prefix(key_offset);
-                    while (i != _parsed_entries.end()) {
-                        parsed_partition_index_entry& e = *i;
-                        if (e.promoted_index) {
-                            result._promoted_indexes[result._entries.size()] = *e.promoted_index;
-                        }
-                        write_fragmented(key_out, std::string_view(e.key.begin(), e.key.size()));
-                        result._entries.emplace_back(index_entry{dht::raw_token().value, e.data_file_offset, key_offset});
-                        ++i;
-                        key_offset += e.key.size();
-                        if (need_preempt()) {
-                            break;
-                        }
-                    }
-                });
-            });
-            co_await coroutine::maybe_yield();
-        }
-        delete_result.cancel();
-        _parsed_entries.clear();
-        co_return std::move(result);
    }

    void prepare(uint64_t size) {
-        _max_promoted_index_entry_plus_one = 0;
-        _key_storage_size = 0;
-        _parsed_entries.clear();
-        _parsed_entries.reserve(size);
+        _alloc_section = logalloc::allocating_section();
+        _alloc_section(_region, [&] {
+            with_allocator(_region.allocator(), [&] {
+                indexes._entries.reserve(size);
+            });
+        });
    }
 };

@@ -218,14 +198,10 @@ public:

        switch (_state) {
        // START comes first, to make the handling of the 0-quantity case simpler
-            state_START:
        case state::START:
            sstlog.trace("{}: pos {} state {} - data.size()={}", fmt::ptr(this), current_pos(), state::START, data.size());
            _state = state::KEY_SIZE;
-            if (data.size() == 0) {
-                break;
-            }
-            [[fallthrough]];
+            break;
        case state::KEY_SIZE:
            sstlog.trace("{}: pos {} state {}", fmt::ptr(this), current_pos(), state::KEY_SIZE);
            _entry_offset = current_pos();
@@ -251,16 +227,7 @@ public:
        case state::PROMOTED_SIZE:
            sstlog.trace("{}: pos {} state {}", fmt::ptr(this), current_pos(), state::PROMOTED_SIZE);
            _position = this->_u64;
-            if (is_mc_format() && data.size() && *data.begin() == 0) { // promoted_index_size == 0
-                data.trim_front(1);
-                _consumer.consume_entry(parsed_partition_index_entry{
-                    .key = std::move(_key),
-                    .data_file_offset = _position,
-                    .index_offset = _entry_offset,
-                    .promoted_index = std::nullopt
-                });
-                goto state_START;
-            } else if (read_vint_or_uint32(data) != continuous_data_consumer::read_status::ready) {
+            if (read_vint_or_uint32(data) != continuous_data_consumer::read_status::ready) {
                _state = state::PARTITION_HEADER_LENGTH_1;
                break;
            }
@@ -372,6 +339,33 @@ inline file make_tracked_index_file(sstable& sst, reader_permit permit, tracing:
    return tracing::make_traced_file(std::move(f), std::move(trace_state), format("{}:", sst.index_filename()));
 }

+inline
+std::unique_ptr<clustered_index_cursor> promoted_index::make_cursor(shared_sstable sst,
+    reader_permit permit,
+    tracing::trace_state_ptr trace_state,
+    file_input_stream_options options,
+    use_caching caching)
+{
+    if (sst->get_version() >= sstable_version_types::mc) [[likely]] {
+        seastar::shared_ptr<cached_file> cached_file_ptr = caching
+                ? sst->_cached_index_file
+                : seastar::make_shared<cached_file>(make_tracked_index_file(*sst, permit, trace_state, caching),
+                                                    sst->manager().get_cache_tracker().get_index_cached_file_stats(),
+                                                    sst->manager().get_cache_tracker().get_lru(),
+                                                    sst->manager().get_cache_tracker().region(),
+                                                    sst->_index_file_size);
+        return std::make_unique<mc::bsearch_clustered_cursor>(*sst->get_schema(),
+            _promoted_index_start, _promoted_index_size,
+            promoted_index_cache_metrics, permit,
+            sst->get_column_translation(), cached_file_ptr, _num_blocks, trace_state, sst->features());
+    }
+
+    auto file = make_tracked_index_file(*sst, permit, std::move(trace_state), caching);
+    auto promoted_index_stream = make_file_input_stream(std::move(file), _promoted_index_start, _promoted_index_size,options);
+    return std::make_unique<scanning_clustered_index_cursor>(*sst->get_schema(), permit,
+        std::move(promoted_index_stream), _promoted_index_size, _num_blocks, std::nullopt);
+}
+
 // Less-comparator for lookups in the partition index.
 class index_comparator {
    dht::ring_position_comparator_for_sstables _tri_cmp;
@@ -382,16 +376,26 @@ public:
        return _tri_cmp(e.get_decorated_key(), rp) < 0;
    }

+    bool operator()(const index_entry& e, dht::ring_position_view rp) const {
+        return _tri_cmp(e.get_decorated_key(_tri_cmp.s), rp) < 0;
+    }
+
+    bool operator()(const managed_ref<index_entry>& e, dht::ring_position_view rp) const {
+        return operator()(*e, rp);
+    }
+
+    bool operator()(dht::ring_position_view rp, const managed_ref<index_entry>& e) const {
+        return operator()(rp, *e);
+    }
+
    bool operator()(dht::ring_position_view rp, const summary_entry& e) const {
        return _tri_cmp(e.get_decorated_key(), rp) > 0;
    }
-};

-inline
-std::strong_ordering index_entry_tri_cmp(const schema& s, partition_index_page& page, size_t idx, dht::ring_position_view rp) {
-    dht::ring_position_comparator_for_sstables tri_cmp(s);
-    return tri_cmp(page.get_decorated_key(s, idx), rp);
-}
+    bool operator()(dht::ring_position_view rp, const index_entry& e) const {
+        return _tri_cmp(e.get_decorated_key(_tri_cmp.s), rp) > 0;
+    }
+};

 // Contains information about index_reader position in the index file
 struct index_bound {
@@ -533,7 +537,7 @@ private:
                    if (ex) {
                        return make_exception_future<index_list>(std::move(ex));
                    }
-                    return bound.consumer->finalize();
+                    return make_ready_future<index_list>(std::move(bound.consumer->indexes));
                });
            });
        };
@@ -546,18 +550,17 @@ private:
            if (bound.current_list->empty()) {
                throw malformed_sstable_exception(format("missing index entry for summary index {} (bound {})", summary_idx, fmt::ptr(&bound)), _sstable->index_filename());
            }
-            bound.data_file_position = bound.current_list->_entries[0].position();
+            bound.data_file_position = bound.current_list->_entries[0]->position();
            bound.element = indexable_element::partition;
            bound.end_open_marker.reset();

            if (sstlog.is_enabled(seastar::log_level::trace)) {
                sstlog.trace("index {} bound {}: page:", fmt::ptr(this), fmt::ptr(&bound));
                logalloc::reclaim_lock rl(_region);
-                for (size_t i = 0; i < bound.current_list->_entries.size(); ++i) {
-                    auto& e = bound.current_list->_entries[i];
+                for (auto&& e : bound.current_list->_entries) {
                    auto dk = dht::decorate_key(*_sstable->_schema,
-                        bound.current_list->get_key(i).to_partition_key(*_sstable->_schema));
-                    sstlog.trace("  {} -> {}", dk, e.position());
+                        e->get_key().to_partition_key(*_sstable->_schema));
+                    sstlog.trace("  {} -> {}", dk, e->position());
                }
            }

@@ -601,13 +604,7 @@ private:
    // Valid if partition_data_ready(bound)
    index_entry& current_partition_entry(index_bound& bound) {
        parse_assert(bool(bound.current_list), _sstable->index_filename());
-        return bound.current_list->_entries[bound.current_index_idx];
-    }
-
-    // Valid if partition_data_ready(bound)
-    partition_index_page& current_page(index_bound& bound) {
-        parse_assert(bool(bound.current_list), _sstable->index_filename());
-        return *bound.current_list;
+        return *bound.current_list->_entries[bound.current_index_idx];
    }

    future<> advance_to_next_partition(index_bound& bound) {
@@ -620,7 +617,7 @@ private:
        if (bound.current_index_idx + 1 < bound.current_list->size()) {
            ++bound.current_index_idx;
            bound.current_pi_idx = 0;
-            bound.data_file_position = bound.current_list->_entries[bound.current_index_idx].position();
+            bound.data_file_position = bound.current_list->_entries[bound.current_index_idx]->position();
            bound.element = indexable_element::partition;
            bound.end_open_marker.reset();
            return reset_clustered_cursor(bound);
@@ -683,13 +680,9 @@ private:
        return advance_to_page(bound, summary_idx).then([this, &bound, pos, summary_idx] {
            sstlog.trace("index {}: old page index = {}", fmt::ptr(this), bound.current_index_idx);
            auto i = _alloc_section(_region, [&] {
-                auto& page = *bound.current_list;
-                auto& s = *_sstable->_schema;
-                auto r = std::views::iota(bound.current_index_idx, page._entries.size());
-                auto it = std::ranges::partition_point(r, [&] (int idx) {
-                    return index_entry_tri_cmp(s, page, idx, pos) < 0;
-                });
-                return page._entries.begin() + bound.current_index_idx + std::ranges::distance(r.begin(), it);
+                auto& entries = bound.current_list->_entries;
+                return std::lower_bound(std::begin(entries) + bound.current_index_idx, std::end(entries), pos,
+                    index_comparator(*_sstable->_schema));
            });
            // i is valid until next allocation point
            auto& entries = bound.current_list->_entries;
@@ -704,7 +697,7 @@ private:
            }
            bound.current_index_idx = std::distance(std::begin(entries), i);
            bound.current_pi_idx = 0;
-            bound.data_file_position = (*i).position();
+            bound.data_file_position = (*i)->position();
            bound.element = indexable_element::partition;
            bound.end_open_marker.reset();
            sstlog.trace("index {}: new page index = {}, pos={}", fmt::ptr(this), bound.current_index_idx, bound.data_file_position);
@@ -807,34 +800,6 @@ public:
        }
    }

-    static
-    std::unique_ptr<clustered_index_cursor> make_cursor(const parsed_promoted_index_entry& pi,
-        shared_sstable sst,
-        reader_permit permit,
-        tracing::trace_state_ptr trace_state,
-        file_input_stream_options options,
-        use_caching caching)
-    {
-        if (sst->get_version() >= sstable_version_types::mc) [[likely]] {
-            seastar::shared_ptr<cached_file> cached_file_ptr = caching
-                    ? sst->_cached_index_file
-                    : seastar::make_shared<cached_file>(make_tracked_index_file(*sst, permit, trace_state, caching),
-                                                        sst->manager().get_cache_tracker().get_index_cached_file_stats(),
-                                                        sst->manager().get_cache_tracker().get_lru(),
-                                                        sst->manager().get_cache_tracker().region(),
-                                                        sst->_index_file_size);
-            return std::make_unique<mc::bsearch_clustered_cursor>(*sst->get_schema(),
-                pi.promoted_index_start, pi.promoted_index_size,
-                promoted_index_cache_metrics, permit,
-                sst->get_column_translation(), cached_file_ptr, pi.num_blocks, trace_state, sst->features());
-        }
-
-        auto file = make_tracked_index_file(*sst, permit, std::move(trace_state), caching);
-        auto promoted_index_stream = make_file_input_stream(std::move(file), pi.promoted_index_start, pi.promoted_index_size,options);
-        return std::make_unique<scanning_clustered_index_cursor>(*sst->get_schema(), permit,
-            std::move(promoted_index_stream), pi.promoted_index_size, pi.num_blocks, std::nullopt);
-    }
-
    // Ensures that partition_data_ready() returns true.
    // Can be called only when !eof()
    future<> read_partition_data() override {
@@ -870,10 +835,10 @@ public:
    clustered_index_cursor* current_clustered_cursor(index_bound& bound) {
        if (!bound.clustered_cursor) {
            _alloc_section(_region, [&] {
-                partition_index_page& page = current_page(bound);
-                if (page.has_promoted_index(bound.current_index_idx)) {
-                    promoted_index& pi = page.get_promoted_index(bound.current_index_idx);
-                    bound.clustered_cursor = make_cursor(pi, _sstable, _permit, _trace_state,
+                index_entry& e = current_partition_entry(bound);
+                promoted_index* pi = e.get_promoted_index().get();
+                if (pi) {
+                    bound.clustered_cursor = pi->make_cursor(_sstable, _permit, _trace_state,
                        get_file_input_stream_options(), _use_caching);
                }
            });
@@ -896,15 +861,15 @@ public:
    // It may be unavailable for old sstables for which this information was not generated.
    // Can be called only when partition_data_ready().
    std::optional<sstables::deletion_time> partition_tombstone() override {
-        return current_page(_lower_bound).get_deletion_time(_lower_bound.current_index_idx);
+        return current_partition_entry(_lower_bound).get_deletion_time();
    }

    // Returns the key for current partition.
    // Can be called only when partition_data_ready().
    std::optional<partition_key> get_partition_key() override {
        return _alloc_section(_region, [this] {
-            return current_page(_lower_bound).get_key(_lower_bound.current_index_idx)
-                .to_partition_key(*_sstable->_schema);
+            index_entry& e = current_partition_entry(_lower_bound);
+            return e.get_key().to_partition_key(*_sstable->_schema);
        });
    }

@@ -918,8 +883,8 @@ public:
    // Returns the number of promoted index entries for the current partition.
    // Can be called only when partition_data_ready().
    uint64_t get_promoted_index_size() {
-        partition_index_page& page = current_page(_lower_bound);
-        return page.get_promoted_index_size(_lower_bound.current_index_idx);
+        index_entry& e = current_partition_entry(_lower_bound);
+        return e.get_promoted_index_size();
    }

    bool partition_data_ready() const override {
@@ -1010,9 +975,9 @@ public:
                return make_ready_future<bool>(false);
            }
            return read_partition_data().then([this, key] {
+                index_comparator cmp(*_sstable->_schema);
                bool found = _alloc_section(_region, [&] {
-                    auto& page = current_page(_lower_bound);
-                    return index_entry_tri_cmp(*_sstable->_schema, page, _lower_bound.current_index_idx, key) == 0;
+                    return cmp(key, current_partition_entry(_lower_bound)) == 0;
                });
                return make_ready_future<bool>(found);
            });
--- a/sstables/partition_index_cache.hh
+++ b/sstables/partition_index_cache.hh
@@ -257,11 +257,14 @@ public:
        while (partial_page || i != _cache.end()) {
            if (partial_page) {
                auto preempted = with_allocator(_region.allocator(), [&] {
-                    while (partial_page->clear_gently() != stop_iteration::yes) {
-                        return true;
+                    while (!partial_page->empty()) {
+                        partial_page->clear_one_entry();
+                        if (need_preempt()) {
+                            return true;
+                        }
                    }
                    partial_page.reset();
-                    return need_preempt();
+                    return false;
                });
                if (preempted) {
                    auto key = (i != _cache.end()) ? std::optional(i->key()) : std::nullopt;
--- a/sstables/sstables.hh
+++ b/sstables/sstables.hh
@@ -1094,6 +1094,7 @@ public:

    friend class mc::writer;
    friend class index_reader;
+    friend class promoted_index;
    friend class sstables_manager;
    template <typename DataConsumeRowsContext>
    friend future<std::unique_ptr<DataConsumeRowsContext>>
--- a/streaming/stream_blob.cc
+++ b/streaming/stream_blob.cc
@@ -436,10 +436,7 @@ tablet_stream_files(netw::messaging_service& ms, std::list<stream_blob_info> sou
    stream_options.buffer_size = file_stream_buffer_size;
    stream_options.read_ahead = file_stream_read_ahead;

-    for (auto&& source_info : sources) {
-        // Keep stream_blob_info alive only at duration of streaming. Allowing the file descriptor
-        // of the sstable component to be released right after it has been streamed.
-        auto info = std::exchange(source_info, {});
+    for (auto& info : sources) {
        auto& filename = info.filename;
        std::optional<input_stream<char>> fstream;
        bool fstream_closed = false;
@@ -620,7 +617,6 @@ tablet_stream_files(netw::messaging_service& ms, std::list<stream_blob_info> sou
                    ops_id, filename, targets, total_size, get_bw(total_size, start_time));
        }
    }
-    co_await utils::get_local_injector().inject("tablet_stream_files_end_wait", utils::wait_for_message(std::chrono::seconds(60)));
    if (error) {
        blogger.warn("fstream[{}] Master failed sending files_nr={} files={} targets={} send_size={} bw={} error={}",
                ops_id, sources.size(), sources, targets, ops_total_size, get_bw(ops_total_size, ops_start_time), error);
@@ -684,20 +680,15 @@ future<stream_files_response> tablet_stream_files_handler(replica::database& db,
    if (files.empty()) {
        co_return resp;
    }
-    auto sstable_nr = sstables.size();
-    // Release reference to sstables to be streamed here. Since one sstable is streamed at a time,
-    // a sstable - that has been compacted - can have its space released from disk right after
-    // that sstable's content has been fully streamed.
-    sstables.clear();
    blogger.debug("stream_sstables[{}] Started sending sstable_nr={} files_nr={} files={} range={}",
-            req.ops_id, sstable_nr, files.size(), files, req.range);
+            req.ops_id, sstables.size(), files.size(), files, req.range);
    auto ops_start_time = std::chrono::steady_clock::now();
    auto files_nr = files.size();
    size_t stream_bytes = co_await tablet_stream_files(ms, std::move(files), req.targets, req.table, req.ops_id, req.topo_guard);
    resp.stream_bytes = stream_bytes;
    auto duration = std::chrono::steady_clock::now() - ops_start_time;
    blogger.info("stream_sstables[{}] Finished sending sstable_nr={} files_nr={} range={} stream_bytes={} stream_time={} stream_bw={}",
-            req.ops_id, sstable_nr, files_nr, req.range, stream_bytes, duration, get_bw(stream_bytes, ops_start_time));
+            req.ops_id, sstables.size(), files_nr, req.range, stream_bytes, duration, get_bw(stream_bytes, ops_start_time));
    co_return resp;
 }

--- a/table_helper.cc
+++ b/table_helper.cc
@@ -75,7 +75,7 @@ future<bool> table_helper::try_prepare(bool fallback, cql3::query_processor& qp,
    auto& stmt = fallback ? _insert_cql_fallback.value() : _insert_cql;
    try {
        shared_ptr<cql_transport::messages::result_message::prepared> msg_ptr = co_await qp.prepare(stmt, qs.get_client_state(), dialect);
-        _prepared_stmt = msg_ptr->get_prepared();
+        _prepared_stmt = std::move(msg_ptr->get_prepared());
        shared_ptr<cql3::cql_statement> cql_stmt = _prepared_stmt->statement;
        _insert_stmt = dynamic_pointer_cast<cql3::statements::modification_statement>(cql_stmt);
        _is_fallback_stmt = fallback;
--- a/tasks/task_manager.cc
+++ b/tasks/task_manager.cc
@@ -400,7 +400,7 @@ task_manager::virtual_task::impl::impl(module_ptr module) noexcept
    : _module(std::move(module))
 {}

-future<utils::chunked_vector<task_identity>> task_manager::virtual_task::impl::get_children(module_ptr module, task_id parent_id, locator::token_metadata_ptr tmptr) {
+future<utils::chunked_vector<task_identity>> task_manager::virtual_task::impl::get_children(module_ptr module, task_id parent_id, std::function<bool(locator::host_id)> is_host_alive) {
    auto ms = module->get_task_manager()._messaging;
    if (!ms) {
        auto ids = co_await module->get_task_manager().get_virtual_task_children(parent_id);
@@ -417,18 +417,19 @@ future<utils::chunked_vector<task_identity>> task_manager::virtual_task::impl::g
        tmlogger.info("tasks_vt_get_children: waiting");
        co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::seconds{10});
    });
-    co_return co_await map_reduce(nodes, [ms, parent_id] (auto host_id) -> future<utils::chunked_vector<task_identity>> {
-        return ser::tasks_rpc_verbs::send_tasks_get_children(ms, host_id, parent_id).then([host_id] (auto resp) {
-            return resp | std::views::transform([host_id] (auto id) {
-                return task_identity{
-                    .host_id = host_id,
-                    .task_id = id
-                };
-            }) | std::ranges::to<utils::chunked_vector<task_identity>>();
-        }).handle_exception_type([host_id, parent_id] (const rpc::closed_error& ex) {
-            tmlogger.warn("Failed to get children of virtual task with id={} from node {}: {}", parent_id, host_id, ex);
-            return utils::chunked_vector<task_identity>{};
-        });
+    co_return co_await map_reduce(nodes, [ms, parent_id, is_host_alive = std::move(is_host_alive)] (auto host_id) -> future<utils::chunked_vector<task_identity>> {
+        if (is_host_alive(host_id)) {
+            return ser::tasks_rpc_verbs::send_tasks_get_children(ms, host_id, parent_id).then([host_id] (auto resp) {
+                return resp | std::views::transform([host_id] (auto id) {
+                    return task_identity{
+                        .host_id = host_id,
+                        .task_id = id
+                    };
+                }) | std::ranges::to<utils::chunked_vector<task_identity>>();
+            });
+        } else {
+            return make_ready_future<utils::chunked_vector<task_identity>>();
+        }
    }, utils::chunked_vector<task_identity>{}, [] (auto a, auto&& b) {
        std::move(b.begin(), b.end(), std::back_inserter(a));
        return a;
--- a/tasks/task_manager.hh
+++ b/tasks/task_manager.hh
@@ -19,7 +19,6 @@
 #include "db_clock.hh"
 #include "utils/log.hh"
 #include "locator/host_id.hh"
-#include "locator/token_metadata_fwd.hh"
 #include "schema/schema_fwd.hh"
 #include "tasks/types.hh"
 #include "utils/chunked_vector.hh"
@@ -283,7 +282,7 @@ public:
            impl& operator=(impl&&) = delete;
            virtual ~impl() = default;
        protected:
-            static future<utils::chunked_vector<task_identity>> get_children(module_ptr module, task_id parent_id, locator::token_metadata_ptr tmptr);
+            static future<utils::chunked_vector<task_identity>> get_children(module_ptr module, task_id parent_id, std::function<bool(locator::host_id)> is_host_alive);
        public:
            virtual task_group get_group() const noexcept = 0;
            // Returns std::nullopt if an operation with task_id isn't tracked by this virtual_task.
--- a/test/boost/cache_algorithm_test.cc
+++ b/test/boost/cache_algorithm_test.cc
@@ -62,11 +62,7 @@ SEASTAR_TEST_CASE(test_index_doesnt_flood_cache_in_small_partition_workload) {
    // cfg.db_config->index_cache_fraction.set(1.0);
    return do_with_cql_env_thread([] (cql_test_env& e) {
        // We disable compactions because they cause confusing cache mispopulations.
-        // We disable compression because the sstable writer targets a specific
-        // (*compressed* data file size : summary file size) ratio,
-        // so the number of keys per index page becomes hard to control,
-        // and might be arbitrarily large.
-        e.execute_cql("CREATE TABLE ks.t(pk blob PRIMARY KEY) WITH compaction = { 'class' : 'NullCompactionStrategy' } AND compression = {'sstable_compression': ''};").get();
+        e.execute_cql("CREATE TABLE ks.t(pk blob PRIMARY KEY) WITH compaction = { 'class' : 'NullCompactionStrategy' };").get();
        auto insert_query = e.prepare("INSERT INTO ks.t(pk) VALUES (?)").get();
        auto select_query = e.prepare("SELECT * FROM t WHERE pk = ?").get();

@@ -158,11 +154,7 @@ SEASTAR_TEST_CASE(test_index_is_cached_in_big_partition_workload) {
    // cfg.db_config->index_cache_fraction.set(0.0);
    return do_with_cql_env_thread([] (cql_test_env& e) {
        // We disable compactions because they cause confusing cache mispopulations.
-        // We disable compression because the sstable writer targets a specific
-        // (*compressed* data file size : summary file size) ratio,
-        // so the number of keys per index page becomes hard to control,
-        // and might be arbitrarily large.
-        e.execute_cql("CREATE TABLE ks.t(pk bigint, ck bigint, v blob, primary key (pk, ck)) WITH compaction = { 'class' : 'NullCompactionStrategy' } AND compression = {'sstable_compression': ''};").get();
+        e.execute_cql("CREATE TABLE ks.t(pk bigint, ck bigint, v blob, primary key (pk, ck)) WITH compaction = { 'class' : 'NullCompactionStrategy' };").get();
        auto insert_query = e.prepare("INSERT INTO ks.t(pk, ck, v) VALUES (?, ?, ?)").get();
        auto select_query = e.prepare("SELECT * FROM t WHERE pk = ? AND ck = ?").get();

--- a/test/boost/database_test.cc
+++ b/test/boost/database_test.cc
@@ -1111,30 +1111,6 @@ SEASTAR_TEST_CASE(test_snapshot_ctl_true_snapshots_size) {
    });
 }

-SEASTAR_TEST_CASE(test_snapshot_ctl_details_exception_handling) {
-#ifndef SCYLLA_ENABLE_ERROR_INJECTION
-    testlog.debug("Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n");
-    return make_ready_future();
-#endif
-    return do_with_some_data_in_thread({"cf"}, [] (cql_test_env& e) {
-        sharded<db::snapshot_ctl> sc;
-        sc.start(std::ref(e.db()), std::ref(e.get_task_manager()), std::ref(e.get_sstorage_manager()), db::snapshot_ctl::config{}).get();
-        auto stop_sc = deferred_stop(sc);
-
-        auto& cf = e.local_db().find_column_family("ks", "cf");
-        take_snapshot(e).get();
-
-        utils::get_local_injector().enable("get_snapshot_details", true);
-        BOOST_REQUIRE_THROW(cf.get_snapshot_details().get(), std::runtime_error);
-
-        utils::get_local_injector().enable("per-snapshot-get_snapshot_details", true);
-        BOOST_REQUIRE_THROW(cf.get_snapshot_details().get(), std::runtime_error);
-
-        auto details = cf.get_snapshot_details().get();
-        BOOST_REQUIRE_EQUAL(details.size(), 1);
-    });
-}
-
 // toppartitions_query caused a lw_shared_ptr to cross shards when moving results, #5104
 SEASTAR_TEST_CASE(toppartitions_cross_shard_schema_ptr) {
    return do_with_cql_env_and_compaction_groups([] (cql_test_env& e) {
@@ -1881,7 +1857,7 @@ SEASTAR_THREAD_TEST_CASE(test_tombstone_gc_state_snapshot) {

    schema_builder::register_schema_initializer([] (schema_builder& builder) {
        if (builder.ks_name() == "test" && builder.cf_name() == "table_gc_mode_group0") {
-            builder.set_is_group0_table();
+            builder.set_is_group0_table(true);
        }
    });
    auto table_gc_mode_group0 = schema_builder("test", "table_gc_mode_group0")
--- a/test/boost/group0_test.cc
+++ b/test/boost/group0_test.cc
@@ -252,7 +252,7 @@ SEASTAR_TEST_CASE(test_group0_batch) {
        // (group0 mutations are not allowed on non-group0 tables)
        schema_builder::register_schema_initializer([](schema_builder& builder) {
            if (builder.cf_name() == "test_group0_batch") {
-                builder.set_is_group0_table();
+                builder.set_is_group0_table(true);
            }
        });

@@ -345,29 +345,4 @@ SEASTAR_TEST_CASE(test_group0_batch) {
    });
 }

-SEASTAR_TEST_CASE(test_group0_tables_use_schema_commitlog) {
-    return do_with_cql_env([] (cql_test_env& e) {
-        schema_builder::register_schema_initializer([](schema_builder& builder) {
-            if (builder.cf_name() == "test_group0_tables_use_schema_commitlog1") {
-                builder.set_is_group0_table();
-            }
-        });
-
-        auto test_group0_tables_use_schema_commitlog1 = schema_builder("test", "test_group0_tables_use_schema_commitlog1")
-            .with_column("pk", utf8_type, column_kind::partition_key)
-            .build();
-
-        auto test_group0_tables_use_schema_commitlog2 = schema_builder("test", "test_group0_tables_use_schema_commitlog2")
-            .with_column("pk", utf8_type, column_kind::partition_key)
-            .build();
-
-        BOOST_REQUIRE(test_group0_tables_use_schema_commitlog1->static_props().is_group0_table);
-        BOOST_REQUIRE(test_group0_tables_use_schema_commitlog1->static_props().use_schema_commitlog);
-        BOOST_REQUIRE(!test_group0_tables_use_schema_commitlog2->static_props().is_group0_table);
-        BOOST_REQUIRE(!test_group0_tables_use_schema_commitlog2->static_props().use_schema_commitlog);
-
-        return make_ready_future();
-    });
-}
-
 BOOST_AUTO_TEST_SUITE_END()
--- a/test/boost/network_topology_strategy_test.cc
+++ b/test/boost/network_topology_strategy_test.cc
@@ -1499,7 +1499,7 @@ SEASTAR_THREAD_TEST_CASE(tablets_simple_rack_aware_view_pairing_test) {
            base_host,
            base_erm,
            view_erm,
-            true, // uses NTS
+            *ars_ptr,
            base_token,
            view_token,
            use_tablets,
--- a/test/boost/partitioner_test.cc
+++ b/test/boost/partitioner_test.cc
@@ -719,7 +719,7 @@ SEASTAR_THREAD_TEST_CASE(test_dht_subtract_ranges) {

    auto get_random_ranges = [&] (size_t max_count) {
        auto count = tests::random::get_int<size_t>(1, max_count);
-        utils::chunked_vector<dht::partition_range> ranges;
+        dht::partition_range_vector ranges;
        ranges.reserve(count);

        for (size_t i = 0; i < count; i++) {
--- a/test/boost/sstable_partition_index_cache_test.cc
+++ b/test/boost/sstable_partition_index_cache_test.cc
@@ -20,24 +20,16 @@ static void add_entry(logalloc::region& r,
      const schema& s,
      partition_index_page& page,
      const partition_key& key,
-      uint64_t position,
-      std::optional<parsed_promoted_index_entry> promoted_index = std::nullopt)
+      uint64_t position)
 {
    logalloc::allocating_section as;
    as(r, [&] {
        with_allocator(r.allocator(), [&] {
            sstables::key sst_key = sstables::key::from_partition_key(s, key);
-            auto key_offset = page._key_storage.size();
-            auto old_storage = std::move(page._key_storage);
-            page._key_storage = managed_bytes(managed_bytes::initialized_later(), key_offset + sst_key.get_bytes().size());
-            auto out = managed_bytes_mutable_view(page._key_storage);
-            write_fragmented(out, managed_bytes_view(old_storage));
-            write_fragmented(out, single_fragmented_view(bytes_view(sst_key)));
-            page._entries.push_back(index_entry{dht::raw_token_opt()->value, position, key_offset});
-            if (promoted_index) {
-                page._promoted_indexes.resize(page._entries.size());
-                page._promoted_indexes[page._entries.size() - 1] = *promoted_index;
-            }
+            page._entries.push_back(make_managed<index_entry>(
+                    managed_bytes(sst_key.get_bytes()),
+                    position,
+                    managed_ref<promoted_index>()));
        });
    });
 }
@@ -62,10 +54,10 @@ static partition_index_page make_page0(logalloc::region& r, simple_schema& s) {
 static void has_page0(partition_index_cache::entry_ptr ptr) {
    BOOST_REQUIRE(!ptr->empty());
    BOOST_REQUIRE_EQUAL(ptr->_entries.size(), 4);
-    BOOST_REQUIRE_EQUAL(ptr->_entries[0].position(), 0);
-    BOOST_REQUIRE_EQUAL(ptr->_entries[1].position(), 1);
-    BOOST_REQUIRE_EQUAL(ptr->_entries[2].position(), 2);
-    BOOST_REQUIRE_EQUAL(ptr->_entries[3].position(), 3);
+    BOOST_REQUIRE_EQUAL(ptr->_entries[0]->position(), 0);
+    BOOST_REQUIRE_EQUAL(ptr->_entries[1]->position(), 1);
+    BOOST_REQUIRE_EQUAL(ptr->_entries[2]->position(), 2);
+    BOOST_REQUIRE_EQUAL(ptr->_entries[3]->position(), 3);
 };

 SEASTAR_THREAD_TEST_CASE(test_caching) {
@@ -147,59 +139,6 @@ SEASTAR_THREAD_TEST_CASE(test_caching) {
    }
 }

-SEASTAR_THREAD_TEST_CASE(test_sparse_promoted_index) {
-    ::lru lru;
-    simple_schema s;
-    logalloc::region r;
-    partition_index_cache_stats stats;
-    partition_index_cache cache(lru, r, stats);
-
-    auto page0_loader = [&] (partition_index_cache::key_type k) -> future<partition_index_page> {
-        partition_index_page page;
-        auto destroy_page = defer([&] {
-            with_allocator(r.allocator(), [&] {
-                auto p = std::move(page);
-            });
-        });
-
-        add_entry(r, *s.schema(), page, s.make_pkey(0).key(), 0);
-        add_entry(r, *s.schema(), page, s.make_pkey(1).key(), 1, parsed_promoted_index_entry{
-            .promoted_index_start = 1,
-            .promoted_index_size = 10,
-            .num_blocks = 3
-        });
-        add_entry(r, *s.schema(), page, s.make_pkey(2).key(), 2);
-        add_entry(r, *s.schema(), page, s.make_pkey(3).key(), 3, parsed_promoted_index_entry{
-            .promoted_index_start = 2,
-            .promoted_index_size = 13,
-            .num_blocks = 1
-        });
-        add_entry(r, *s.schema(), page, s.make_pkey(4).key(), 4);
-        destroy_page.cancel();
-        co_return std::move(page);
-    };
-
-    auto page = cache.get_or_load(0, page0_loader).get();
-
-    BOOST_REQUIRE_EQUAL(page->has_promoted_index(0), false);
-    BOOST_REQUIRE_EQUAL(page->has_promoted_index(1), true);
-    BOOST_REQUIRE_EQUAL(page->has_promoted_index(2), false);
-    BOOST_REQUIRE_EQUAL(page->has_promoted_index(3), true);
-    BOOST_REQUIRE_EQUAL(page->has_promoted_index(4), false);
-
-    BOOST_REQUIRE_EQUAL(page->get_promoted_index(1).promoted_index_start, 1);
-    BOOST_REQUIRE_EQUAL(page->get_promoted_index(1).promoted_index_size, 10);
-    BOOST_REQUIRE_EQUAL(page->get_promoted_index(1).num_blocks, 3);
-
-    BOOST_REQUIRE_EQUAL(page->get_promoted_index(3).promoted_index_start, 2);
-    BOOST_REQUIRE_EQUAL(page->get_promoted_index(3).promoted_index_size, 13);
-    BOOST_REQUIRE_EQUAL(page->get_promoted_index(3).num_blocks, 1);
-
-    with_allocator(r.allocator(), [&] {
-        lru.evict_all();
-    });
-}
-
 template <typename T>
 static future<> ignore_result(future<T>&& f) {
    return f.then_wrapped([] (auto&& f) {
--- a/test/boost/tablets_test.cc
+++ b/test/boost/tablets_test.cc
@@ -1607,29 +1607,6 @@ future<> apply_resize_plan(token_metadata& tm, const migration_plan& plan) {
    }
 }

-static
-future<group0_guard> save_token_metadata(cql_test_env& e, group0_guard guard) {
-    auto& stm = e.local_db().get_shared_token_metadata();
-    auto tm = stm.get();
-
-    e.get_topology_state_machine().local()._topology.version = tm->get_version();
-
-    co_await save_tablet_metadata(e.local_db(), tm->tablets(), guard.write_timestamp());
-    utils::chunked_vector<frozen_mutation> muts;
-    muts.push_back(freeze(topology_mutation_builder(guard.write_timestamp())
-                                  .set_version(tm->get_version())
-                                  .build().to_mutation(db::system_keyspace::topology())));
-    co_await e.local_db().apply(muts, db::no_timeout);
-    co_await e.get_storage_service().local().update_tablet_metadata({});
-
-    // Need a new guard to make sure later changes use later timestamp.
-    // Also, so that the table layer processes the changes we persisted, which is important for splits.
-    // Before we can finalize a split, the storage group needs to process the split by creating split-ready compaction groups.
-    release_guard(std::move(guard));
-    abort_source as;
-    co_return co_await e.get_raft_group0_client().start_operation(as);
-}
-
 static
 future<> handle_resize_finalize(cql_test_env& e, group0_guard& guard, const migration_plan& plan, shared_load_stats* load_stats) {
    auto& talloc = e.get_tablet_allocator().local();
@@ -1649,14 +1626,19 @@ future<> handle_resize_finalize(cql_test_env& e, group0_guard& guard, const migr
        co_await stm.mutate_token_metadata([table_id, &new_tmap, &changed] (token_metadata& tm) {
            changed = true;
            tm.tablets().set_tablet_map(table_id, std::move(new_tmap));
-            tm.set_version(tm.get_version() + 1);
            return make_ready_future<>();
        });
    }

    if (changed) {
        // Need to reload on each resize because table object expects tablet count to change by a factor of 2.
-        guard = co_await save_token_metadata(e, std::move(guard));
+        co_await save_tablet_metadata(e.local_db(), stm.get()->tablets(), guard.write_timestamp());
+        co_await e.get_storage_service().local().update_tablet_metadata({});
+
+        // Need a new guard to make sure later changes use later timestamp.
+        release_guard(std::move(guard));
+        abort_source as;
+        guard = co_await e.get_raft_group0_client().start_operation(as);

        if (load_stats) {
            auto new_tm = stm.get();
@@ -1665,11 +1647,6 @@ future<> handle_resize_finalize(cql_test_env& e, group0_guard& guard, const migr
                load_stats->stats = *reconciled_stats;
            }
        }
-
-        testlog.debug("Calling local_topology_barrier()");
-        old_tm = nullptr;
-        co_await e.get_storage_service().local().local_topology_barrier();
-        testlog.debug("Finished local_topology_barrier()");
    }
 }

@@ -1773,22 +1750,13 @@ void do_rebalance_tablets(cql_test_env& e,
        }).get();

        if (auto_split && load_stats) {
-            bool reload = false;
            auto& tm = *stm.get();
            for (const auto& [table, tmap]: tm.tablets().all_tables_ungrouped()) {
                if (std::holds_alternative<resize_decision::split>(tmap->resize_decision().way)) {
-                    if (load_stats->stats.tables[table].split_ready_seq_number != tmap->resize_decision().sequence_number) {
-                        testlog.debug("set_split_ready_seq_number({}, {})", table, tmap->resize_decision().sequence_number);
-                        load_stats->set_split_ready_seq_number(table, tmap->resize_decision().sequence_number);
-                        reload = true;
-                    }
+                    testlog.debug("set_split_ready_seq_number({}, {})", table, tmap->resize_decision().sequence_number);
+                    load_stats->set_split_ready_seq_number(table, tmap->resize_decision().sequence_number);
                }
            }
-
-            // Need to order split-ack before split finalization, storage_group assumes that.
-            if (reload) {
-                guard = save_token_metadata(e, std::move(guard)).get();
-            }
        }

        handle_resize_finalize(e, guard, plan, load_stats).get();
--- a/test/boost/token_metadata_test.cc
+++ b/test/boost/token_metadata_test.cc
@@ -331,28 +331,4 @@ SEASTAR_THREAD_TEST_CASE(test_stale_version_notification) {
    std::cerr.rdbuf(oldCerr);

    BOOST_TEST(my_stream.str().find("topology version 0 held for") != std::string::npos);
-}
-
-SEASTAR_THREAD_TEST_CASE(test_raw_token) {
-    const auto t1 = dht::token::from_int64(1);
-    const auto t2 = dht::token::from_int64(2);
-
-    dht::raw_token_opt rt_opt;
-    BOOST_REQUIRE(!rt_opt);
-    rt_opt = dht::raw_token(t1);
-    BOOST_REQUIRE(*rt_opt == t1);
-
-    BOOST_REQUIRE(dht::raw_token() == dht::minimum_token());
-    BOOST_REQUIRE(dht::raw_token() < dht::raw_token(dht::first_token()));
-    BOOST_REQUIRE(dht::raw_token() < dht::first_token());
-    BOOST_REQUIRE(dht::raw_token() < dht::maximum_token());
-
-    auto rt1 = dht::raw_token(t1);
-    BOOST_REQUIRE(bool(rt1));
-    BOOST_REQUIRE(rt1 > dht::raw_token());
-    BOOST_REQUIRE(rt1 > dht::minimum_token());
-    BOOST_REQUIRE_EQUAL(rt1, t1);
-    BOOST_REQUIRE(rt1 == t1);
-    BOOST_REQUIRE(rt1 < t2);
-    BOOST_REQUIRE(rt1 < dht::maximum_token());
-}
+}
--- a/test/boost/view_schema_test.cc
+++ b/test/boost/view_schema_test.cc
@@ -3221,87 +3221,6 @@ SEASTAR_TEST_CASE(test_view_update_generating_writetime) {
    });
 }

-// Usually if only an unselected column in the base table is modified, we expect an optimization that a view
-// update is not done, but we had an bug(https://scylladb.atlassian.net/browse/SCYLLADB-808) where the existence
-// of a collection selected in the view caused us to skip this optimization, even when it was not modified.
-// This test reproduces this bug.
-SEASTAR_TEST_CASE(test_view_update_unmodified_collection) {
-    // In this test we verify that we correctly skip (or not) view updates to a view that selects
-    // a collection column. We use two MVs, similarly as in the test above test.
-    return do_with_cql_env_thread([] (cql_test_env& e) {
-
-        auto f1 = e.local_view_builder().wait_until_built("ks", "mv1");
-        auto f2 = e.local_view_builder().wait_until_built("ks", "mv2");
-
-        e.execute_cql("CREATE TABLE t (k int, c int, a int, b list<int>, g int, primary key(k, c))").get();
-        e.execute_cql("CREATE MATERIALIZED VIEW mv1 AS SELECT k,c,a,b FROM t "
-                         "WHERE k IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c, k)").get();
-        e.execute_cql("CREATE MATERIALIZED VIEW mv2 AS SELECT k,c,a,b FROM t "
-                         "WHERE k IS NOT NULL AND c IS NOT NULL AND a IS NOT NULL PRIMARY KEY (c, k, a)").get();
-
-        f1.get();
-        f2.get();
-
-        auto total_t_view_updates = [&] {
-            return e.db().map_reduce0([] (replica::database& local_db) {
-                const db::view::stats& local_stats = local_db.find_column_family("ks", "t").get_view_stats();
-                return local_stats.view_updates_pushed_local + local_stats.view_updates_pushed_remote;
-            }, 0, std::plus<int64_t>()).get();
-        };
-
-        auto total_mv1_updates = [&] {
-            return e.db().map_reduce0([] (replica::database& local_db) {
-                return local_db.find_column_family("ks", "mv1").get_stats().writes.hist.count;
-            }, 0, std::plus<int64_t>()).get();
-        };
-
-        auto total_mv2_updates = [&] {
-            return e.db().map_reduce0([] (replica::database& local_db) {
-                return local_db.find_column_family("ks", "mv2").get_stats().writes.hist.count;
-            }, 0, std::plus<int64_t>()).get();
-        };
-
-        ::shared_ptr<cql_transport::messages::result_message> msg;
-
-        e.execute_cql("INSERT INTO t (k, c, a) VALUES (1, 1, 1)").get();
-        eventually([&] {
-            const update_counter results{total_mv1_updates(), total_mv2_updates(), total_t_view_updates()};
-            const update_counter expected{1, 1, 2};
-
-            BOOST_REQUIRE_EQUAL(results, expected);
-        });
-
-        // We update an unselected column and the collection remains NULL, so we should generate an
-        // update to the virtual column in mv1 but not to mv2.
-        e.execute_cql("UPDATE t SET g=1 WHERE k=1 AND c=1;").get();
-        eventually([&] {
-            const update_counter results{total_mv1_updates(), total_mv2_updates(), total_t_view_updates()};
-            const update_counter expected{2, 1, 3};
-
-            BOOST_REQUIRE_EQUAL(results, expected);
-        });
-
-        // We update the collection with an initial value
-        e.execute_cql("UPDATE t SET b=[1] WHERE k=1 AND c=1;").get();
-        eventually([&] {
-            const update_counter results{total_mv1_updates(), total_mv2_updates(), total_t_view_updates()};
-            const update_counter expected{3, 2, 5};
-
-            BOOST_REQUIRE_EQUAL(results, expected);
-        });
-
-        // We update an unselected column again with a non-NULL selected collection. Because the liveness of the updated column is unchanged
-        // and no other selected column is updated (in particular, the collection column), we should generate no view updates.
-        e.execute_cql("UPDATE t SET g=2 WHERE k=1 AND c=1;").get();
-        eventually([&] {
-            const update_counter results{total_mv1_updates(), total_mv2_updates(), total_t_view_updates()};
-            const update_counter expected{3, 2, 5};
-
-            BOOST_REQUIRE_EQUAL(results, expected);
-        });
-    });
-}
-
 SEASTAR_TEST_CASE(test_conflicting_batch) {
    return do_with_cql_env_thread([] (cql_test_env& e) {

--- a/test/cluster/auth_cluster/test_raft_service_levels.py
+++ b/test/cluster/auth_cluster/test_raft_service_levels.py
@@ -114,6 +114,27 @@ async def test_service_levels_upgrade(request, manager: ManagerClient, build_mod
    result_with_sl_v2 = await cql.run_async(f"SELECT service_level FROM system.service_levels_v2")
    assert set([sl.service_level for sl in result_with_sl_v2]) == set(sls + [DRIVER_SL_NAME] + [sl_v2])

+@pytest.mark.asyncio
+async def test_service_levels_upgrade_with_empty_legacy_table(manager: ManagerClient):
+    cfg = {**auth_config, "force_gossip_topology_changes": True, "tablets_mode_for_new_keyspaces": "disabled"}
+
+    servers = [await manager.server_add(config=cfg)]
+    cfg.pop("force_gossip_topology_changes")
+    servers += [await manager.server_add(config=cfg) for _ in range(2)]
+
+    cql = manager.get_cql()
+    assert cql
+    hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
+
+    rows = await cql.run_async("SELECT service_level FROM system_distributed.service_levels")
+    assert list(rows) == []
+
+    await manager.api.upgrade_to_raft_topology(hosts[0].address)
+    await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts))
+
+    sl_version = await cql.run_async("SELECT value FROM system.scylla_local WHERE key = 'service_level_version'")
+    assert sl_version[0].value == "2"
+
@pytest.mark.asyncio
 async def test_service_levels_work_during_recovery(manager: ManagerClient):
    # FIXME: move this test to the Raft-based recovery procedure or remove it if unneeded.
--- a/test/cluster/tasks/test_node_ops_tasks.py
+++ b/test/cluster/tasks/test_node_ops_tasks.py
@@ -254,3 +254,27 @@ async def test_node_ops_task_wait(manager: ManagerClient):

    await decommission_task
    await waiting_task
+
+@pytest.mark.asyncio
+async def test_get_children(manager: ManagerClient):
+    module_name = "node_ops"
+    tm = TaskManagerClient(manager.api)
+    servers = [await manager.server_add(cmdline=cmdline) for _ in range(2)]
+
+    injection = "tasks_vt_get_children"
+    handler = await inject_error_one_shot(manager.api, servers[0].ip_addr, injection)
+
+    log = await manager.server_open_log(servers[0].server_id)
+    mark = await log.mark()
+
+    bootstrap_task = [task for task in await tm.list_tasks(servers[0].ip_addr, module_name) if task.kind == "cluster"][0]
+
+    async def _decommission():
+        await log.wait_for('tasks_vt_get_children: waiting', from_mark=mark)
+        await manager.decommission_node(servers[1].server_id)
+        await handler.message()
+
+    async def _get_status():
+        await tm.get_task_status(servers[0].ip_addr, bootstrap_task.task_id)
+
+    await asyncio.gather(*(_decommission(), _get_status()))
--- a/test/cluster/tasks/test_tablet_tasks.py
+++ b/test/cluster/tasks/test_tablet_tasks.py
@@ -12,11 +12,9 @@ import pytest
 from test.pylib.internal_types import ServerInfo
 from test.pylib.manager_client import ManagerClient
 from test.pylib.repair import create_table_insert_data_for_repair, get_tablet_task_id
-from test.pylib.rest_client import read_barrier
 from test.pylib.tablets import get_all_tablet_replicas
 from test.cluster.conftest import skip_mode
-from test.cluster.util import create_new_test_keyspace, new_test_keyspace, get_topology_coordinator, find_server_by_host_id
-from test.cluster.test_incremental_repair import trigger_tablet_merge
+from test.cluster.util import create_new_test_keyspace, new_test_keyspace
 from test.cluster.test_tablets2 import inject_error_on
 from test.cluster.tasks.task_manager_client import TaskManagerClient
 from test.cluster.tasks.task_manager_types import TaskStatus, TaskStats
@@ -153,45 +151,6 @@ async def test_tablet_repair_task_list(manager: ManagerClient):

    await asyncio.gather(run_repair(0, "test"), run_repair(1, "test2"), run_repair(2, "test3"), check_repair_task_list(tm, servers, module_name, ks))

-@pytest.mark.asyncio
-@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
-async def test_tablet_repair_wait(manager: ManagerClient):
-    module_name = "tablets"
-    tm = TaskManagerClient(manager.api)
-
-    stop_repair_injection = "repair_tablet_repair_task_impl_run"
-    servers, cql, hosts, ks, table_id = await create_table_insert_data_for_repair(manager)
-    assert module_name in await tm.list_modules(servers[0].ip_addr), "tablets module wasn't registered"
-
-    await inject_error_on(manager, stop_repair_injection, servers)
-    await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", "all", await_completion=False)
-
-    repair_tasks = await wait_tasks_created(tm, servers[0], module_name, 1, "user_repair", keyspace=ks)
-    task = repair_tasks[0]
-
-    log = await manager.server_open_log(servers[0].server_id)
-    mark = await log.mark()
-
-    async def wait_for_task():
-        await enable_injection(manager, servers, "tablet_virtual_task_wait")
-        status_wait = await tm.wait_for_task(servers[0].ip_addr, task.task_id)
-
-    async def merge_tablets():
-        await log.wait_for('tablet_virtual_task: wait until tablet operation is finished', from_mark=mark)
-
-        # Resume repair.
-        await message_injection(manager, servers, stop_repair_injection)
-
-        # Merge tablets.
-        coord = await find_server_by_host_id(manager, servers, await get_topology_coordinator(manager))
-        log2 = await manager.server_open_log(coord.server_id)
-        await trigger_tablet_merge(manager, servers, [log2])
-
-        await read_barrier(manager.api, servers[0].ip_addr)
-        await message_injection(manager, servers, "tablet_virtual_task_wait")
-
-    await asyncio.gather(wait_for_task(), merge_tablets())
-
@pytest.mark.asyncio
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
 async def test_tablet_repair_task_children(manager: ManagerClient):
--- a/test/cluster/test_bootstrap_with_quick_group0_join.py
+++ b/test/cluster/test_bootstrap_with_quick_group0_join.py
@@ -1,70 +0,0 @@
-#
-# Copyright (C) 2026-present ScyllaDB
-#
-# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
-#
-import logging
-import asyncio
-import time
-
-import pytest
-
-from test.cluster.util import get_current_group0_config
-from test.pylib.manager_client import ManagerClient
-from test.pylib.rest_client import read_barrier
-from test.pylib.util import wait_for
-
-
-logger = logging.getLogger(__name__)
-
-
-@pytest.mark.asyncio
-@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
-async def test_bootstrap_with_quick_group0_join(manager: ManagerClient):
-    """Regression test for https://scylladb.atlassian.net/browse/SCYLLADB-959.
-
-    The bug was that when the bootstrapping node joined group0 before reaching
-    post_server_start, it skipped post_server_start and thus hung forever.
-
-    The test simulates the scenario by starting the second node with the
-    join_group0_pause_before_config_check injection. Without the fix, the
-    startup times out.
-    """
-    logger.info("Adding first server")
-    s1 = await manager.server_add()
-
-    logger.info("Adding second server with join_group0_pause_before_config_check enabled")
-    s2 = await manager.server_add(start=False, config={
-        'error_injections_at_startup': ['join_group0_pause_before_config_check']
-    })
-
-    logger.info(f"Starting {s2}")
-    start_task = asyncio.create_task(manager.server_start(s2.server_id))
-
-    s2_log = await manager.server_open_log(s2.server_id)
-
-    await s2_log.wait_for("join_group0_pause_before_config_check: waiting for message", timeout=60)
-
-    s1_host_id = await manager.get_host_id(s1.server_id)
-    s2_host_id = await manager.get_host_id(s2.server_id)
-
-    async def s2_in_group0_config_on_s1():
-        config = await get_current_group0_config(manager, s1)
-        ids = {m[0] for m in config}
-        assert s1_host_id in ids  # sanity check
-        return True if s2_host_id in ids else None
-
-    # Note: we would like to wait for s2 to see itself in the group0 config, but we can't execute
-    # get_current_group0_config for s2, as s2 doesn't handle CQL requests at this point. As a workaround, we wait for s1
-    # to see s2 and then perform a read barrier on s2.
-    logger.info(f"Waiting for {s1} to see {s2} in the group0 config")
-    await wait_for(s2_in_group0_config_on_s1, deadline=time.time() + 60, period=0.1)
-
-    logger.info(f"Performing read barrier on {s2} to make sure it sees itself in the group0 config")
-    await read_barrier(manager.api, s2.ip_addr)
-
-    logger.info(f"Unblocking {s2}")
-    await manager.api.message_injection(s2.ip_addr, 'join_group0_pause_before_config_check')
-
-    logger.info(f"Waiting for {s2} to complete bootstrap")
-    await asyncio.wait_for(start_task, timeout=60)
--- a/test/cluster/test_encryption.py
+++ b/test/cluster/test_encryption.py
@@ -433,8 +433,7 @@ async def test_non_existant_table_master_key(manager: ManagerClient, tmpdir):

 async def test_system_auth_encryption(manager: ManagerClient, tmpdir):
    cfg = {"authenticator": "org.apache.cassandra.auth.PasswordAuthenticator", 
-               "authorizer": "org.apache.cassandra.auth.CassandraAuthorizer",
-                "commitlog_sync": "batch" }
+               "authorizer": "org.apache.cassandra.auth.CassandraAuthorizer"}

    servers: list[ServerInfo] = await manager.servers_add(servers_num = 1, config=cfg, 
                                                          driver_connect_opts={'auth_provider': PlainTextAuthProvider(username='cassandra', password='cassandra')})
@@ -451,14 +450,11 @@ async def test_system_auth_encryption(manager: ManagerClient, tmpdir):
            file_paths = [f for f in file_paths if os.path.isfile(f) and not os.path.islink(f)]

            for file_path in file_paths:
-                try:
-                    with open(file_path, 'rb') as f:
-                        data = f.read()
-                        if pbytes in data:
-                            pattern_found_counter += 1
-                            logger.debug("Pattern '%s' found in %s", pattern, file_path)
-                except FileNotFoundError:
-                    pass # assume just compacted away
+                with open(file_path, 'rb') as f:
+                    data = f.read()
+                    if pbytes in data:
+                        pattern_found_counter += 1
+                        logger.debug("Pattern '%s' found in %s", pattern, file_path)

        if expect:
            assert pattern_found_counter > 0
@@ -466,15 +462,15 @@ async def test_system_auth_encryption(manager: ManagerClient, tmpdir):
            assert pattern_found_counter == 0

    async def verify_system_info(expect: bool):
-        user = f"user_{str(uuid.uuid4())}".replace('-','_')
+        user = f"user_{str(uuid.uuid4())}"
        pwd = f"pwd_{str(uuid.uuid4())}"
        cql.execute(f"CREATE USER {user} WITH PASSWORD '{pwd}' NOSUPERUSER")
        assert_one(cql, f"LIST ROLES of {user}", [user, False, True, {}])

        logger.debug("Verify PART 1: check commitlogs -------------")

-        await grep_database_files(pwd, "commitlog", "**/*.log", False)
-        await grep_database_files(user, "commitlog", "**/*.log", expect)
+        grep_database_files(pwd, "commitlog", "**/*.log", expect)
+        grep_database_files(user, "commitlog", "**/*.log", True)

        salted_hash = None
        system_auth = None
@@ -491,38 +487,39 @@ async def test_system_auth_encryption(manager: ManagerClient, tmpdir):

        assert salted_hash is not None
        assert system_auth is not None
-        await grep_database_files(salted_hash, "commitlog", "**/*.log", expect)
+        grep_database_files(salted_hash, "commitlog", "**/*.log", expect)

        rand_comment = f"comment_{str(uuid.uuid4())}"

        async with await create_ks(manager) as ks:
-            async with new_test_table(manager, ks, "key text PRIMARY KEY, c1 text, c2 text") as table:
+            async with await new_test_table(cql, ks, "key text PRIMARY KEY, c1 text, c2 text") as table:
                cql.execute(f"ALTER TABLE {table} WITH comment = '{rand_comment}'")
-                await grep_database_files(rand_comment, "commitlog/schema", "**/*.log", expect)
-                # Note: original test did greping in sstables. This does no longer work
-                # since all system tables are compressed, and thus binary greping will 
-                # not work. We could do scylla sstable dump-data and grep in the json,
-                # but this is somewhat pointless as this would, if it handles it, just
-                # decrypt the info from the sstable, thus we can't really verify anything.
-                # We could maybe check that the expected system tables are in fact encrypted,
-                # though this is more a promise than guarantee... Also, the only tables
-                # encrypted are paxos and batchlog -> pointless
+                grep_database_files(rand_comment, "commitlog/schema", "**/*.log", expect)
+                nodetool.flush_all(cql)

-    await verify_system_info(True) # not encrypted
+                logger.debug("Verify PART 2: check sstable files -------------\n`system_info_encryption` won't encrypt sstable files on disk")
+                logger.debug("GREP_DB_FILES: Check PM key user in sstable file ....")
+                grep_database_files(user, f"data/{system_auth}/", "**/*-Data.db", expect=True)
+                logger.debug("GREP_DB_FILES: Check original password in commitlogs .... Original password should never be saved")
+                grep_database_files(pwd, f"data/{system_auth}/", "**/*-Data.db", expect=False)
+                logger.debug("GREP_DB_FILES: Check salted_hash of password in sstable file ....")
+                grep_database_files(salted_hash, f"data/{system_auth}/", "**/*-Data.db", expect=False)
+                logger.debug("GREP_DB_FILES: Check table comment in sstable file ....")
+                grep_database_files(rand_comment, "data/system_schema/", "**/*-Data.db", expect=True)
+
+    verify_system_info(True) # not encrypted

    cfg = {"system_info_encryption": {
        "enabled": True, 
-        "key_provider": "LocalFileSystemKeyProviderFactory"},
-        "system_key_directory": os.path.join(tmpdir, "resources/system_keys")
+        "key_provider": "LocalFileSystemKeyProviderFactory"}
        }

    for server in servers:
-        await manager.server_update_config(server.server_id, config_options=cfg)
-        await manager.server_restart(server.server_id)
+        manager.server_update_config(server.server_id, config_options=cfg)

    await manager.rolling_restart(servers)

-    await verify_system_info(False) # should not see stuff now
+    verify_system_info(False) # should not see stuff now


 async def test_system_encryption_reboot(manager: ManagerClient, tmpdir):
--- a/test/cluster/test_incremental_repair.py
+++ b/test/cluster/test_incremental_repair.py
@@ -609,19 +609,14 @@ async def do_test_tablet_incremental_repair_merge_error(manager, error):

    scylla_path = get_scylla_path(cql)

-    coord = await get_topology_coordinator(manager)
-    coord_serv = await find_server_by_host_id(manager, servers, coord)
-    coord_log = await manager.server_open_log(coord_serv.server_id)
-
    # Trigger merge and error in merge
-    mark = await coord_log.mark()
-    await inject_error_on(manager, error, [coord_serv])
+    s1_mark = await logs[0].mark()
+    await inject_error_on(manager, error, servers[:1])
    await inject_error_on(manager, "tablet_force_tablet_count_decrease", servers)
-    await inject_error_on(manager, "tablet_force_tablet_count_decrease_once", servers)
-    await coord_log.wait_for(f'Got {error}', from_mark=mark)
+    await logs[0].wait_for(f'Got {error}', from_mark=s1_mark)
    await inject_error_off(manager, "tablet_force_tablet_count_decrease", servers)
-    await manager.server_stop(coord_serv.server_id)
-    await manager.server_start(coord_serv.server_id)
+    await manager.server_stop(servers[0].server_id)
+    await manager.server_start(servers[0].server_id)

    for server in servers:
        await manager.server_stop_gracefully(server.server_id)
@@ -867,6 +862,50 @@ async def test_repair_sigsegv_with_diff_shard_count(manager: ManagerClient, use_
            logger.info("Starting vnode repair")
            await manager.api.repair(servers[1].ip_addr, ks, "test")

+# Reproducer for https://github.com/scylladb/scylladb/issues/27365
+# Incremental repair vs tablet merge
+@pytest.mark.asyncio
+@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
+async def test_tablet_incremental_repair_tablet_merge_compaction_group_gone(manager: ManagerClient):
+    cmdline = ['--logger-log-level', 'repair=debug']
+    servers, cql, hosts, ks, table_id, logs, _, _, _, _ = await preapre_cluster_for_incremental_repair(manager, cmdline=cmdline)
+
+    coord = await get_topology_coordinator(manager)
+    coord_serv = await find_server_by_host_id(manager, servers, coord)
+    coord_log = await manager.server_open_log(coord_serv.server_id)
+
+    # Trigger merge and wait until the merge fiber starts
+    s1_mark = await coord_log.mark()
+    await inject_error_on(manager, "merge_completion_fiber", servers)
+    await inject_error_on(manager, "tablet_force_tablet_count_decrease_once", servers)
+    await inject_error_on(manager, "tablet_force_tablet_count_decrease", servers)
+    await coord_log.wait_for(f'Detected tablet merge for table', from_mark=s1_mark)
+    await inject_error_off(manager, "tablet_force_tablet_count_decrease", servers)
+    await coord_log.wait_for(f'merge_completion_fiber: waiting for message', from_mark=s1_mark)
+
+    # Trigger repair and wait for the inc repair prepare preparation to start
+    s1_mark = await coord_log.mark()
+    await inject_error_on(manager, "wait_after_prepare_sstables_for_incremental_repair", servers)
+    await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token=-1, await_completion=False, incremental_mode='incremental')
+    # Wait for preparation to start.
+    await coord_log.wait_for('Disabling compaction for range', from_mark=s1_mark)
+    # Without the serialization, sleep to increase chances of preparation finishing before merge fiber.
+    # With the serialization, preparation will wait for merge fiber to finish.
+    await asyncio.sleep(0.1)
+
+    # Continue to execute the merge fiber so that the compaction group is removed
+    await inject_error_on(manager, "replica_merge_completion_wait", servers)
+    for s in servers:
+        await manager.api.message_injection(s.ip_addr, "merge_completion_fiber")
+
+    await coord_log.wait_for(f'Merge completion fiber finished', from_mark=s1_mark)
+
+    # Continue the repair to trigger use-after-free
+    for s in servers:
+        await manager.api.message_injection(s.ip_addr, "wait_after_prepare_sstables_for_incremental_repair")
+
+    await coord_log.wait_for(f'Finished tablet repair', from_mark=s1_mark)
+
 # Reproducer for https://github.com/scylladb/scylladb/issues/27365
 # Incremental repair vs table drop
@pytest.mark.asyncio
--- a/test/cluster/test_internode_compression.py
+++ b/test/cluster/test_internode_compression.py
@@ -162,12 +162,7 @@ async def do_test_internode_compression_between_datacenters(manager: ManagerClie

    await asyncio.gather(*[manager.server_stop(s.server_id) for s,_ in servers])
    await asyncio.gather(*[p.stop() for p in proxies])
-    # these will all except, because we just stopped them above
-    for coro in proxy_futs:
-        try:
-            await coro
-        except:
-            pass
+

 async def test_internode_compression_compress_packets_between_nodes(request, manager: ManagerClient) -> None:
    def check_expected(msg_size, node1_proxy, node2_proxy, node3_proxy):
--- a/test/cluster/test_prepare_race.py
+++ b/test/cluster/test_prepare_race.py
@@ -1,65 +0,0 @@
-#
-# Copyright (C) 2026-present ScyllaDB
-#
-# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
-#
-
-import asyncio
-import pytest
-
-from test.cluster.util import new_test_keyspace, new_test_table
-from test.pylib.manager_client import ManagerClient
-from test.pylib.rest_client import inject_error_one_shot
-
-
-@pytest.mark.asyncio
-@pytest.mark.skip_mode(mode="release", reason="error injections are not supported in release mode")
-async def test_prepare_fails_if_cached_statement_is_invalidated_mid_prepare(manager: ManagerClient):
-    server = await manager.server_add()
-    cql = manager.get_cql()
-    log = await manager.server_open_log(server.server_id)
-    
-    async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1};") as ks:
-        async with new_test_table(manager, ks, "pk int PRIMARY KEY") as table:
-            query = f"SELECT * FROM {table} WHERE pk = ?"
-            loop = asyncio.get_running_loop()
-            await cql.run_async(f"INSERT INTO {table} (pk) VALUES (7)")
-            await cql.run_async(f"INSERT INTO {table} (pk) VALUES (8)")
-
-            handler = await inject_error_one_shot(manager.api, server.ip_addr, "query_processor_prepare_wait_after_cache_get")
-            mark = await log.mark()
-            prepare_future = loop.run_in_executor(None, lambda: cql.prepare(query))
-            await log.wait_for("query_processor_prepare_wait_after_cache_get: waiting for message", from_mark=mark, timeout=60)
-
-            # Trigger table schema update (metadata-only) to invalidate prepared statements while PREPARE is paused.
-            await cql.run_async(f"ALTER TABLE {table} WITH comment = 'invalidate-prepared-race'")
-
-            await handler.message()
-            done, _ = await asyncio.wait({prepare_future}, timeout=15)
-            if not done:
-                pytest.fail("Timed out waiting for PREPARE to complete after signaling injection")
-
-            result = done.pop().result()
-            print(f"PREPARE succeeded as expected: {result!r}")
-
-            rows = cql.execute(result, [7])
-            row = rows.one()
-            assert row is not None and row.pk == 7
-
-            # Invalidate prepared statements again, then execute the same prepared object.
-            # The driver should transparently re-prepare and re-request execution.
-            await cql.run_async(f"ALTER TABLE {table} WITH comment = 'invalidate-prepared-race-again'")
-
-            reprepare_handler = await inject_error_one_shot(manager.api, server.ip_addr, "query_processor_prepare_wait_after_cache_get")
-            reprepare_mark = await log.mark()
-            execute_future = loop.run_in_executor(None, lambda: cql.execute(result, [8]))
-            await log.wait_for("query_processor_prepare_wait_after_cache_get: waiting for message", from_mark=reprepare_mark, timeout=60)
-
-            await reprepare_handler.message()
-            execute_done, _ = await asyncio.wait({execute_future}, timeout=15)
-            if not execute_done:
-                pytest.fail("Timed out waiting for driver execute to finish after re-prepare signaling")
-
-            retried_rows = execute_done.pop().result()
-            retried_row = retried_rows.one()
-            assert retried_row is not None and retried_row.pk == 8
--- a/test/cluster/test_proxy_protocol.py
+++ b/test/cluster/test_proxy_protocol.py
@@ -16,10 +16,8 @@ import pytest
 import socket
 import ssl
 import struct
-import time

 from test.pylib.manager_client import ManagerClient
-from test.pylib.util import wait_for

 logger = logging.getLogger(__name__)

@@ -271,28 +269,6 @@ async def send_cql_with_proxy_header_tls(
            sock.close()


-async def wait_for_results(cql, query: str, expected_count: int, timeout: float = 30.0, filter_fn=None):
-    """
-    Polls `query` until at least `expected_count` rows satisfy `filter_fn` (all rows if no filter is given).
-    On timeout, logs the full result set from the last poll to aid debugging.
-    """
-    last_rows: list = []
-
-    async def check_resultset():
-        nonlocal last_rows
-        last_rows = list(await cql.run_async(query))
-        matching = filter_fn(last_rows) if filter_fn is not None else last_rows
-        if len(matching) >= expected_count:
-            return matching
-        return None
-
-    try:
-        return await wait_for(check_resultset, time.time() + timeout, period=0.1)
-    except Exception:
-        logger.error('Timed out waiting for %d matching rows in system.clients. Last poll returned %d total rows:\n%s',
-                     expected_count, len(last_rows),'\n'.join(str(r) for r in last_rows))
-        raise
-
 # Shared server configuration for all tests
 # We configure explicit SSL ports to keep the standard ports unencrypted
 # so the Python driver can connect without TLS.
@@ -392,12 +368,9 @@ async def test_proxy_protocol_shard_aware(proxy_server):
            await do_cql_handshake(reader, writer)

        # Now query system.clients to verify shard assignments
-        rows = await wait_for_results(
-            cql,
-            'SELECT address, port, shard_id FROM system.clients',
-            expected_count=num_shards,
-            filter_fn=lambda all_rows: [r for r in all_rows if str(r.address) == fake_src_addr],
-        )
+        rows = list(cql.execute(
+            f"SELECT address, port, shard_id FROM system.clients WHERE address = '{fake_src_addr}' ALLOW FILTERING"
+        ))

        # Build a map of port -> shard_id from the results
        port_to_shard = {row.port: row.shard_id for row in rows}
@@ -473,12 +446,9 @@ async def test_proxy_protocol_port_preserved_in_system_clients(proxy_server):

        # Now query system.clients using the driver to see our connection
        cql = manager.get_cql()
-        rows = await wait_for_results(
-            cql,
-            'SELECT address, port FROM system.clients',
-            expected_count=1,
-            filter_fn=lambda all_rows: [r for r in all_rows if str(r.address) == fake_src_addr],
-        )
+        rows = list(cql.execute(
+            f"SELECT address, port FROM system.clients WHERE address = '{fake_src_addr}' ALLOW FILTERING"
+        ))

        # We should find our connection with the fake source address and port
        assert len(rows) > 0, f"Expected to find connection from {fake_src_addr} in system.clients"
@@ -599,12 +569,9 @@ async def test_proxy_protocol_ssl_shard_aware(proxy_server):
                ssl_sock.recv(4096)

        # Now query system.clients to verify shard assignments
-        rows = await wait_for_results(
-            cql,
-            'SELECT address, port, shard_id, ssl_enabled FROM system.clients',
-            expected_count=num_shards,
-            filter_fn=lambda all_rows: [r for r in all_rows if str(r.address) == fake_src_addr],
-        )
+        rows = list(cql.execute(
+            f"SELECT address, port, shard_id, ssl_enabled FROM system.clients WHERE address = '{fake_src_addr}' ALLOW FILTERING"
+        ))

        # Build a map of port -> (shard_id, ssl_enabled) from the results
        port_to_info = {row.port: (row.shard_id, row.ssl_enabled) for row in rows}
@@ -689,12 +656,9 @@ async def test_proxy_protocol_ssl_port_preserved(proxy_server):

        # Now query system.clients using the driver to see our connection
        cql = manager.get_cql()
-        rows = await wait_for_results(
-            cql,
-            'SELECT address, port, ssl_enabled FROM system.clients',
-            expected_count=1,
-            filter_fn=lambda all_rows: [r for r in all_rows if str(r.address) == fake_src_addr],
-        )
+        rows = list(cql.execute(
+            f"SELECT address, port, ssl_enabled FROM system.clients WHERE address = '{fake_src_addr}' ALLOW FILTERING"
+        ))

        # We should find our connection
        assert len(rows) > 0, f"Expected to find connection from {fake_src_addr} in system.clients"
--- a/test/cluster/test_raft_no_quorum.py
+++ b/test/cluster/test_raft_no_quorum.py
@@ -7,7 +7,6 @@ import logging

 import pytest
 import asyncio
-from test.pylib.internal_types import ServerNum
 from test.pylib.manager_client import ManagerClient
 from test.cluster.conftest import skip_mode
 from test.pylib.rest_client import inject_error_one_shot, InjectionHandler, read_barrier
@@ -21,20 +20,6 @@ def fixture_raft_op_timeout(build_mode):
    return 10000 if build_mode == 'debug' else 1000


-async def update_group0_raft_op_timeout(server_id: ServerNum, manager: ManagerClient, timeout: int) -> None:
-    logger.info(f"Updating group0_raft_op_timeout_in_ms on server {server_id} to {timeout}")
-    running_ids = [srv.server_id for srv in await manager.running_servers()]
-    if server_id in running_ids:
-        # If the node is alive, server_update_config only sends the SIGHUP signal to the Scylla process, so awaiting it
-        # doesn't guarantee that the new config file is active. Work around this by looking at the logs.
-        log_file = await manager.server_open_log(server_id)
-        mark = await log_file.mark()
-        await manager.server_update_config(server_id, 'group0_raft_op_timeout_in_ms', timeout)
-        await log_file.wait_for("completed re-reading configuration file", from_mark=mark, timeout=60)
-    else:
-        await manager.server_update_config(server_id, 'group0_raft_op_timeout_in_ms', timeout)
-
-
@pytest.mark.asyncio
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
@pytest.mark.skip_mode(mode='debug', reason='aarch64/debug is unpredictably slow', platform_key='aarch64')
@@ -57,6 +42,7 @@ async def test_cannot_add_new_node(manager: ManagerClient, raft_op_timeout: int)

    config = {
        'direct_failure_detector_ping_timeout_in_ms': 300,
+        'group0_raft_op_timeout_in_ms': raft_op_timeout,
        'error_injections_at_startup': [
            {
                'name': 'raft-group-registry-fd-threshold-in-ms',
@@ -78,10 +64,6 @@ async def test_cannot_add_new_node(manager: ManagerClient, raft_op_timeout: int)
                         manager.server_stop_gracefully(servers[3].server_id),
                         manager.server_stop_gracefully(servers[4].server_id))

-    # Do it here to prevent unexpected timeouts before quorum loss.
-    await asyncio.gather(*(update_group0_raft_op_timeout(srv.server_id, manager, raft_op_timeout)
-                           for srv in servers[:2]))
-
    logger.info("starting a sixth node with no quorum")
    await manager.server_add(expected_error="raft operation \\[read_barrier\\] timed out, there is no raft quorum",
                             timeout=60)
@@ -94,6 +76,7 @@ async def test_cannot_add_new_node(manager: ManagerClient, raft_op_timeout: int)
@pytest.mark.skip_mode(mode='debug', reason='aarch64/debug is unpredictably slow', platform_key='aarch64')
 async def test_quorum_lost_during_node_join(manager: ManagerClient, raft_op_timeout: int) -> None:
    config = {
+        'group0_raft_op_timeout_in_ms': raft_op_timeout,
        'error_injections_at_startup': [
            {
                'name': 'raft-group-registry-fd-threshold-in-ms',
@@ -124,9 +107,6 @@ async def test_quorum_lost_during_node_join(manager: ManagerClient, raft_op_time
    await asyncio.gather(manager.server_stop_gracefully(servers[1].server_id),
                         manager.server_stop_gracefully(servers[2].server_id))

-    # Do it here to prevent unexpected timeouts before quorum loss.
-    await update_group0_raft_op_timeout(servers[0].server_id, manager, raft_op_timeout)
-
    logger.info("release join-node-before-add-entry injection")
    await injection_handler.message()

@@ -146,6 +126,7 @@ async def test_quorum_lost_during_node_join_response_handler(manager: ManagerCli

    logger.info("adding a fourth node")
    servers += [await manager.server_add(config={
+        'group0_raft_op_timeout_in_ms': raft_op_timeout,
        'error_injections_at_startup': [
            {
                'name': 'raft-group-registry-fd-threshold-in-ms',
@@ -172,9 +153,6 @@ async def test_quorum_lost_during_node_join_response_handler(manager: ManagerCli
    await asyncio.gather(manager.server_stop_gracefully(servers[1].server_id),
                         manager.server_stop_gracefully(servers[2].server_id))

-    # Do it here to prevent unexpected timeouts before quorum loss.
-    await update_group0_raft_op_timeout(servers[3].server_id, manager, raft_op_timeout)
-
    logger.info("release join-node-response_handler-before-read-barrier injection")
    injection_handler = InjectionHandler(manager.api,
                                         'join-node-response_handler-before-read-barrier',
@@ -191,6 +169,7 @@ async def test_quorum_lost_during_node_join_response_handler(manager: ManagerCli
 async def test_cannot_run_operations(manager: ManagerClient, raft_op_timeout: int) -> None:
    logger.info("starting a first node (the leader)")
    servers = [await manager.server_add(config={
+        'group0_raft_op_timeout_in_ms': raft_op_timeout,
        'error_injections_at_startup': [
            {
                'name': 'raft-group-registry-fd-threshold-in-ms',
@@ -210,9 +189,6 @@ async def test_cannot_run_operations(manager: ManagerClient, raft_op_timeout: in
    await asyncio.gather(manager.server_stop_gracefully(servers[1].server_id),
                         manager.server_stop_gracefully(servers[2].server_id))

-    # Do it here to prevent unexpected timeouts before quorum loss.
-    await update_group0_raft_op_timeout(servers[0].server_id, manager, raft_op_timeout)
-
    logger.info("attempting removenode for the second node")
    await manager.remove_node(servers[0].server_id, servers[1].server_id,
                            expected_error="raft operation [read_barrier] timed out, there is no raft quorum",
@@ -256,7 +232,9 @@ async def test_can_restart(manager: ManagerClient, raft_op_timeout: int) -> None
    await asyncio.gather(*(manager.server_stop(srv.server_id) for srv in servers))

    # This ensures the read barriers below fail quickly without group 0 quorum.
-    await asyncio.gather(*(update_group0_raft_op_timeout(srv.server_id, manager, raft_op_timeout) for srv in servers))
+    logger.info(f"Decreasing group0_raft_op_timeout_in_ms on {servers}")
+    await asyncio.gather(*(manager.server_update_config(srv.server_id, 'group0_raft_op_timeout_in_ms', raft_op_timeout)
+                           for srv in servers))

    logger.info(f"Restarting {servers[:2]} with no group 0 quorum")
    for idx, srv in enumerate(servers[:2]):
@@ -268,7 +246,8 @@ async def test_can_restart(manager: ManagerClient, raft_op_timeout: int) -> None

    # Increase the timeout back to 300s to ensure the new group 0 leader is elected before the first read barrier below
    # times out.
-    await asyncio.gather(*(update_group0_raft_op_timeout(srv.server_id, manager, 300000) for srv in servers))
+    await asyncio.gather(*(manager.server_update_config(srv.server_id, 'group0_raft_op_timeout_in_ms', 300000)
+                           for srv in servers))

    logger.info(f"Restarting {servers[2:]} with group 0 quorum")
    for srv in servers[2:]:
--- a/test/cluster/test_tablets_lwt.py
+++ b/test/cluster/test_tablets_lwt.py
@@ -978,7 +978,7 @@ async def test_tablets_merge_waits_for_lwt(manager: ManagerClient):
        await wait_for_tablet_count(manager, s0, ks, 'test', lambda c: c == 1, 1, timeout_s=15)

        logger.info("Ensure the guard decided to retain the erm")
-        m, _ = await log0.wait_for("tablet_metadata_guard::check: retain the erm and abort the guard",
+        await log0.wait_for("tablet_metadata_guard::check: retain the erm and abort the guard",
                            from_mark=m, timeout=10)

        tablets = await get_all_tablet_replicas(manager, s0, ks, 'test')
@@ -986,11 +986,7 @@ async def test_tablets_merge_waits_for_lwt(manager: ManagerClient):
        tablet = tablets[0]
        assert tablet.replicas == [(s0_host_id, 0)]

-        # Since merge now waits for erms before releasing the state machine,
-        # the migration initiated below will not start until paxos released the erm.
-        # The barrier which is blocked is the one in merge finalization.
-        # I keep the tablet movement as a guard against regressions in case the behavior changes.
-
+        m = await log0.mark()
        migration_task = asyncio.create_task(manager.api.move_tablet(s0.ip_addr, ks, "test",
                                                                     s0_host_id, 0,
                                                                     s0_host_id, 1,
--- a/test/cluster/test_tablets_merge.py
+++ b/test/cluster/test_tablets_merge.py
@@ -441,6 +441,84 @@ async def test_tablet_split_merge_with_many_tables(build_mode: str, manager: Man

    await check_logs("after merge completion")

+# Reproduces use-after-free when migration right after merge, but concurrently to background
+# merge completion handler.
+# See: https://github.com/scylladb/scylladb/issues/24045
+@pytest.mark.asyncio
+@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
+async def test_migration_running_concurrently_to_merge_completion_handling(manager: ManagerClient):
+    cmdline = []
+    # Size based balancing can attempt to migrate the merged tablet as soon as the merge is complete
+    # because of a lower transient effective_capacity on the node with the merged tablet.
+    # This migration will timeout on cleanup because the compaction group still has an active task,
+    # which is held by the merge_completion_fiber injection, so the tablet's compaction group gate
+    # can not be closed, resulting in cleanup getting stuck. We force capacity based balancing to
+    # avoid this problem.
+    cfg = {'force_capacity_based_balancing': True}
+    servers = [await manager.server_add(cmdline=cmdline, config=cfg)]
+
+    await manager.disable_tablet_balancing()
+
+    cql = manager.get_cql()
+
+    async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 2}") as ks:
+        await cql.run_async(f"CREATE TABLE {ks}.test (pk int PRIMARY KEY, c int);")
+
+        tablet_count = await get_tablet_count(manager, servers[0], ks, 'test')
+        assert tablet_count == 2
+
+        old_tablet_count = tablet_count
+
+        keys = range(100)
+        await asyncio.gather(*[cql.run_async(f"INSERT INTO {ks}.test (pk, c) VALUES ({k}, {k});") for k in keys])
+
+        await cql.run_async(f"ALTER KEYSPACE {ks} WITH tablets = {{'initial': 1}};")
+
+        s0_log = await manager.server_open_log(servers[0].server_id)
+        s0_mark = await s0_log.mark()
+
+        await manager.api.enable_injection(servers[0].ip_addr, "merge_completion_fiber", one_shot=True)
+        await manager.api.enable_injection(servers[0].ip_addr, "replica_merge_completion_wait", one_shot=True)
+        await manager.enable_tablet_balancing()
+
+        servers.append(await manager.server_add(cmdline=cmdline, config=cfg))
+        s1_host_id = await manager.get_host_id(servers[1].server_id)
+
+        async def finished_merging():
+            tablet_count = await get_tablet_count(manager, servers[0], ks, 'test')
+            return tablet_count < old_tablet_count or None
+
+        await wait_for(finished_merging, time.time() + 120)
+
+        await manager.disable_tablet_balancing()
+        await manager.api.enable_injection(servers[0].ip_addr, "take_storage_snapshot", one_shot=True)
+
+        await s0_log.wait_for(f"merge_completion_fiber: waiting", from_mark=s0_mark)
+
+        tablet_count = await get_tablet_count(manager, servers[0], ks, 'test')
+        assert tablet_count == 1
+
+        tablet_token = 0 # Doesn't matter since there is one tablet
+        replica = await get_tablet_replica(manager, servers[0], ks, 'test', tablet_token)
+
+        s0_host_id = await manager.get_host_id(servers[0].server_id)
+        src_shard = replica[1]
+        dst_shard = src_shard
+
+        migration = asyncio.create_task(manager.api.move_tablet(servers[0].ip_addr, ks, "test", replica[0], src_shard, s1_host_id, dst_shard, tablet_token))
+
+        await s0_log.wait_for(f"take_storage_snapshot: waiting", from_mark=s0_mark)
+
+        await manager.api.message_injection(servers[0].ip_addr, "merge_completion_fiber")
+        await s0_log.wait_for(f"Merge completion fiber finished", from_mark=s0_mark)
+
+        await manager.api.message_injection(servers[0].ip_addr, "take_storage_snapshot")
+
+        await migration
+
+        rows = await cql.run_async(f"SELECT * FROM {ks}.test;")
+        assert len(rows) == len(keys)
+
@pytest.mark.asyncio
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
 async def test_missing_data(manager: ManagerClient):
@@ -577,77 +655,3 @@ async def test_merge_with_drop(manager: ManagerClient):
        await asyncio.sleep(0.1)
        await manager.api.message_injection(server.ip_addr, "compaction_group_stop_wait")
        await drop_table_fut
-
-
-@pytest.mark.asyncio
-@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
-async def test_background_merge_deadlock(manager: ManagerClient):
-    """
-    Reproducer for https://scylladb.atlassian.net/browse/SCYLLADB-928
-
-    Reproduces a deadlock in the background merge completion handler that can happen when multiple merges accumulate.
-    If we accumulate more than 1 merge cycle for the fiber, deadlock occurs due to compaction lock taken
-    on the main group (post-merge). The lock is held until compaction groups are precessed by the background merge
-    fiber
-
-    Example:
-
-    Initial state:
-
-      cg0: main,
-      cg1: main
-      cg2: main
-      cg3: main
-
-    After 1st merge:
-
-      cg0': main [locked], merging_groups=[cg0.main, cg1.main]
-      cg1': main [locked], merging_groups=[cg2.main, cg3.main]
-
-    After 2nd merge:
-
-      cg0'': main [locked], merging_groups=[cg0'.main [locked], cg0.main, cg1.main, cg1'.main [locked], cg2.main, cg3.main]
-
-    The test reproduces this by doing a tablet merge from 8 tablets to 1 (8 -> 4 -> 2 -> 1). The background merge fiber
-    is blocked until after the first merge (to 4), so that there is a higher chance of two merges queueing in the fiber.
-
-    If deadlock occurs, node shutdown will hang waiting for the background merge fiber. That's why the test
-    tries to stop the node at the end.
-    """
-
-    cmdline = [
-        '--logger-log-level', 'load_balancer=debug',
-        '--logger-log-level', 'raft_topology=debug',
-    ]
-
-    servers = [await manager.server_add(cmdline=cmdline)]
-    cql, _ = await manager.get_ready_cql(servers)
-
-    ks = await create_new_test_keyspace(cql, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}")
-
-    # Create a table which will go through 3 merge cycles.
-    await cql.run_async(f"CREATE TABLE {ks}.test (pk int PRIMARY KEY, c int) with tablets = {{'min_tablet_count': 8}};")
-
-    await manager.api.enable_injection(servers[0].ip_addr, "merge_completion_fiber", one_shot=True)
-    log = await manager.server_open_log(servers[0].server_id)
-    mark = await log.mark()
-
-    # Trigger tablet merging
-    await cql.run_async(f"ALTER TABLE {ks}.test WITH tablets = {{'min_tablet_count': 1}};")
-
-    async def produced_one_merge():
-        tablet_count = await get_tablet_count(manager, servers[0], ks, 'test')
-        return tablet_count == 4 or None
-    await wait_for(produced_one_merge, time.time() + 120)
-
-    mark, _ = await log.wait_for(f"merge_completion_fiber: waiting", from_mark=mark)
-    await manager.api.message_injection(servers[0].ip_addr, "merge_completion_fiber")
-    mark, _ = await log.wait_for(f"merge_completion_fiber: message received", from_mark=mark)
-
-    async def finished_merge():
-        tablet_count = await get_tablet_count(manager, servers[0], ks, 'test')
-        return tablet_count == 1 or None
-
-    await wait_for(finished_merge, time.time() + 120)
-
-    await manager.server_stop(servers[0].server_id)
--- a/test/cluster/test_topology_remove_garbage_group0.py
+++ b/test/cluster/test_topology_remove_garbage_group0.py
@@ -94,8 +94,6 @@ async def test_remove_garbage_group0_members(manager: ManagerClient):
    logging.info(f'stop {servers[1]}')
    await manager.server_stop_gracefully(servers[1].server_id)

-    await wait_for_token_ring_and_group0_consistency(manager, time.time() + 60)
-
    logging.info(f'removenode {servers[1]} using {servers[2]}')
    await manager.remove_node(servers[2].server_id, servers[1].server_id)

--- a/test/lib/cql_test_env.cc
+++ b/test/lib/cql_test_env.cc
@@ -559,9 +559,6 @@ private:
            cfg->ring_delay_ms.set(500);
            cfg->shutdown_announce_in_ms.set(0);
            cfg->broadcast_to_all_shards().get();
-            smp::invoke_on_all([&] {
-                sstables::global_cache_index_pages = cfg->cache_index_pages.operator utils::updateable_value<bool>();
-            }).get();
            create_directories((data_dir_path + "/system").c_str());
            create_directories(cfg->commitlog_directory().c_str());
            create_directories(cfg->schema_commitlog_directory().c_str());
--- a/test/nodetool/test_cluster_repair.py
+++ b/test/nodetool/test_cluster_repair.py
@@ -449,68 +449,3 @@ def test_repair_incremenatal_repair(nodetool, mode):
 Starting repair with task_id={id1} keyspace=ks table=table1
 Repair with task_id={id1} finished
 """
-
-def test_cluster_repair_table_dropped(nodetool):
-    id1 = "ef1b7a61-66c8-494c-bb03-6f65724e6eee"
-    res = nodetool("cluster", "repair", "ks", expected_requests=[
-        expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
-        expected_request("GET", "/storage_service/keyspaces", params={"replication": "tablets"}, response=["ks"]),
-        expected_request("GET", "/column_family", response=[{"ks": "ks", "cf": "table1"}, {"ks": "ks", "cf": "table2"}]),
-        expected_request(
-            "POST",
-            "/storage_service/tablets/repair",
-            params={
-                "ks": "ks",
-                "table": "table1",
-                "tokens": "all"},
-            response={"message": "Can't find a column family table1 in keyspace ks", "code": 400}, response_status=400),
-        expected_request(
-            "POST",
-            "/storage_service/tablets/repair",
-            params={
-                "ks": "ks",
-                "table": "table2",
-                "tokens": "all"},
-            response={"tablet_task_id": id1}),
-        expected_request(
-            "GET",
-            f"/task_manager/wait_task/{id1}",
-            response={"state": "done"}),
-        ])
-
-    assert _remove_log_timestamp(res.stdout) == f"""\
-Starting repair with task_id={id1} keyspace=ks table=table2
-Repair with task_id={id1} finished
-"""
-
-def test_cluster_repair_specified_table_dropped(nodetool):
-    id1 = "ef1b7a61-66c8-494c-bb03-6f65724e6eee"
-    check_nodetool_fails_with_error_contains(
-            nodetool,
-            ("cluster", "repair", "ks", "table1", "table2"),
-            {"expected_requests": [
-                expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
-                expected_request("GET", "/storage_service/keyspaces", params={"replication": "tablets"}, response=["ks"]),
-                expected_request(
-                    "POST",
-                    "/storage_service/tablets/repair",
-                    params={
-                        "ks": "ks",
-                        "table": "table1",
-                        "tokens": "all"},
-                    response={"message": "Can't find a column family table1 in keyspace ks", "code": 400}, response_status=400),
-                expected_request(
-                    "POST",
-                    "/storage_service/tablets/repair",
-                    params={
-                        "ks": "ks",
-                        "table": "table2",
-                        "tokens": "all"},
-                    response={"tablet_task_id": id1}),
-                expected_request(
-                    "GET",
-                    f"/task_manager/wait_task/{id1}",
-                    response={"state": "done"}),
-                ]
-            },
-            [f"Can't find a column family table1 in keyspace ks"])
--- a/test/perf/perf_alternator.cc
+++ b/test/perf/perf_alternator.cc
@@ -10,7 +10,6 @@
 #include <memory>
 #include <signal.h>
 #include <seastar/core/future.hh>
-#include <seastar/core/sleep.hh>
 #include <seastar/core/thread.hh>
 #include <seastar/core/app-template.hh>
 #include <seastar/http/client.hh>
@@ -79,23 +78,6 @@ static future<> make_request(http::experimental::client& cli, sstring operation,
    });
 }

-static void wait_for_alternator(const test_config& c) {
-    for (int attempt = 0; attempt < 3000; ++attempt) {
-        try {
-            auto cli = get_client(c);
-            auto close = defer([&] { cli.close().get(); });
-            make_request(cli, "ListTables", "{}").get();
-            return;
-        } catch (...) {
-        }
-        seastar::sleep(std::chrono::milliseconds(100)).get();
-        if (attempt >= 100 && attempt % 10 == 0) {
-            std::cout << fmt::format("Retrying connect to alternator port (attempt {})", attempt + 1) << std::endl;
-        }
-    }
-    throw std::runtime_error("Timed out waiting for alternator port to become ready");
-}
-
 static void delete_alternator_table(http::experimental::client& cli) {
    try {
        make_request(cli, "DeleteTable", R"({"TableName": "workloads_test"})").get();
@@ -391,8 +373,6 @@ auto make_client_pool(const test_config& c) {
 void workload_main(const test_config& c) {
    std::cout << "Running test with config: " << c << std::endl;

-    wait_for_alternator(c);
-
    auto cli = get_client(c);
    auto finally = defer([&] {
        delete_alternator_table(cli);
--- a/test/perf/perf_simple_query.cc
+++ b/test/perf/perf_simple_query.cc
@@ -330,13 +330,10 @@ int scylla_simple_query_main(int argc, char** argv) {
        ("counters", "test counters")
        ("tablets", "use tablets")
        ("initial-tablets", bpo::value<unsigned>()->default_value(128), "initial number of tablets")
-        ("sstable-summary-ratio", bpo::value<double>(), "Generate summary entry, so that summary file size / data file size ~= this ratio")
-        ("sstable-format", bpo::value<std::string>(), "SSTable format name to use")
        ("flush", "flush memtables before test")
        ("memtable-partitions", bpo::value<unsigned>(), "apply this number of partitions to memtable, then flush")
        ("json-result", bpo::value<std::string>(), "name of the json result file")
        ("enable-cache", bpo::value<bool>()->default_value(true), "enable row cache")
-        ("enable-index-cache", bpo::value<bool>()->default_value(true), "enable partition index cache")
        ("stop-on-error", bpo::value<bool>()->default_value(true), "stop after encountering the first error")
        ("timeout", bpo::value<std::string>()->default_value(""), "use timeout")
        ("bypass-cache", "use bypass cache when querying")
@@ -360,19 +357,8 @@ int scylla_simple_query_main(int argc, char** argv) {
            auto db_cfg = ::make_shared<db::config>(ext);

            const auto enable_cache = app.configuration()["enable-cache"].as<bool>();
-            const auto enable_index_cache = app.configuration()["enable-index-cache"].as<bool>();
            std::cout << "enable-cache=" << enable_cache << '\n';
-            std::cout << "enable-index-cache=" << enable_index_cache << '\n';
            db_cfg->enable_cache(enable_cache);
-            db_cfg->cache_index_pages(enable_index_cache);
-            if (app.configuration().contains("sstable-summary-ratio")) {
-                db_cfg->sstable_summary_ratio(app.configuration()["sstable-summary-ratio"].as<double>());
-            }
-            std::cout << "sstable-summary-ratio=" << db_cfg->sstable_summary_ratio() << '\n';
-            if (app.configuration().contains("sstable-format")) {
-                db_cfg->sstable_format(app.configuration()["sstable-format"].as<std::string>());
-            }
-            std::cout << "sstable-format=" << db_cfg->sstable_format() << '\n';
            cql_test_config cfg(db_cfg);
            if (app.configuration().contains("tablets")) {
                cfg.db_config->tablets_mode_for_new_keyspaces.set(db::tablets_mode_t::mode::enabled);
--- a/test/storage/test_out_of_space_prevention.py
+++ b/test/storage/test_out_of_space_prevention.py
@@ -15,8 +15,9 @@ from cassandra.cluster import ConsistencyLevel
 from cassandra.query import SimpleStatement
 from typing import Callable

-from test.cluster.util import get_topology_coordinator, find_server_by_host_id, new_test_keyspace, new_test_table, reconnect_driver
-from test.pylib.manager_client import ManagerClient, wait_for_cql_and_get_hosts
+from test.cluster.conftest import skip_mode
+from test.cluster.util import get_topology_coordinator, find_server_by_host_id, new_test_keyspace, new_test_table
+from test.pylib.manager_client import ManagerClient
 from test.pylib.tablets import get_tablet_count
 from test.pylib.util import Host
 from test.storage.conftest import space_limited_servers
@@ -80,7 +81,6 @@ async def test_user_writes_rejection(manager: ManagerClient, volumes_factory: Ca
                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("database - Set critical disk utilization mode: true", from_mark=mark)

@@ -91,9 +91,8 @@ async def test_user_writes_rejection(manager: ManagerClient, volumes_factory: Ca
                    logger.info("Restart the node")
                    mark = await log.mark()
                    await manager.server_restart(servers[0].server_id)
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
-                    cql = await reconnect_driver(manager)
-                    await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
+                    await manager.driver_connect()
+                    cql = manager.get_cql()
                    for _ in range(2):
                        mark, _ = await log.wait_for("database - Set critical disk utilization mode: true", from_mark=mark)

@@ -105,7 +104,6 @@ async def test_user_writes_rejection(manager: ManagerClient, volumes_factory: Ca
                    await validate_data_existence(cql, hosts[1:], [hosts[0]], cf, 1)

                logger.info("With blob file removed, wait for DB to drop below the critical disk utilization level")
-                mark, _ = await log.wait_for("Dropped below the critical disk utilization level", from_mark=mark)
                for _ in range(2):
                    mark, _ = await log.wait_for("database - Set critical disk utilization mode: false", from_mark=mark)

@@ -114,7 +112,7 @@ async def test_user_writes_rejection(manager: ManagerClient, volumes_factory: Ca


@pytest.mark.asyncio
-async def test_autotoggle_compaction(manager: ManagerClient, volumes_factory: Callable) -> None:
+async def test_autotoogle_compaction(manager: ManagerClient, volumes_factory: Callable) -> None:
    cmdline = [*global_cmdline,
               "--logger-log-level", "compaction=debug"]
    async with space_limited_servers(manager, volumes_factory, ["100M"]*3, cmdline=cmdline) as servers:
@@ -138,20 +136,15 @@ async def test_autotoggle_compaction(manager: ManagerClient, volumes_factory: Ca
                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("compaction_manager - Drained", from_mark=mark)

                    logger.info("Restart the node")
-                    mark = await log.mark()
                    await manager.server_restart(servers[0].server_id)
-                    await reconnect_driver(manager)
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("compaction_manager - Drained", from_mark=mark)

                logger.info("With blob file removed, wait for DB to drop below the critical disk utilization level")
-                mark, _ = await log.wait_for("Dropped below the critical disk utilization level", from_mark=mark)
                for _ in range(2):
                    mark, _ = await log.wait_for("compaction_manager - Enabled", from_mark=mark)

@@ -242,8 +235,7 @@ async def test_reject_split_compaction(manager: ManagerClient, volumes_factory:
                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
-                    await log.wait_for(f"Split task .* for table {cf} .* stopped, reason: Compaction for {cf} was stopped due to: drain", from_mark=mark)
+                    await log.wait_for(f"Split task .* for table {cf} .* stopped, reason: Compaction for {cf} was stopped due to: drain")


@pytest.mark.asyncio
@@ -268,7 +260,6 @@ async def test_split_compaction_not_triggered(manager: ManagerClient, volumes_fa
                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    s1_mark, _ = await s1_log.wait_for("Reached the critical disk utilization level", from_mark=s1_mark)
                    for _ in range(2):
                        s1_mark, _ = await s1_log.wait_for("compaction_manager - Drained", from_mark=s1_mark)

@@ -303,13 +294,10 @@ async def test_tablet_repair(manager: ManagerClient, volumes_factory: Callable)
                await manager.server_stop_gracefully(servers[0].server_id)
                await manager.server_wipe_sstables(servers[0].server_id, ks, table)
                await manager.server_start(servers[0].server_id)
-                cql = await reconnect_driver(manager)
-                await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)

                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("repair - Drained", from_mark=mark)

@@ -340,18 +328,16 @@ async def test_tablet_repair(manager: ManagerClient, volumes_factory: Callable)
                    logger.info("Restart the node")
                    mark = await log.mark()
                    await manager.server_restart(servers[0].server_id, wait_others=2)
-                    await reconnect_driver(manager)
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
+                    await manager.driver_connect()
                    for _ in range(2):
                        mark, _ = await log.wait_for("repair - Drained", from_mark=mark)

                logger.info("With blob file removed, wait for the tablet repair to succeed")
-                mark, _ = await log.wait_for("Dropped below the critical disk utilization level", from_mark=mark)
                await manager.api.wait_task(servers[0].ip_addr, task_id)


@pytest.mark.asyncio
-async def test_autotoggle_reject_incoming_migrations(manager: ManagerClient, volumes_factory: Callable) -> None:
+async def test_autotoogle_reject_incoming_migrations(manager: ManagerClient, volumes_factory: Callable) -> None:
    cfg = {
        'tablet_load_stats_refresh_interval_in_seconds': 1,
        }
@@ -391,7 +377,6 @@ async def test_autotoggle_reject_incoming_migrations(manager: ManagerClient, vol

                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("database - Set critical disk utilization mode: true", from_mark=mark)

@@ -402,7 +387,6 @@ async def test_autotoggle_reject_incoming_migrations(manager: ManagerClient, vol
                    mark, _ = await log.wait_for("Streaming for tablet migration .* failed", from_mark=mark)

                logger.info("With blob file removed, wait for DB to drop below the critical disk utilization level")
-                mark, _ = await log.wait_for("Dropped below the critical disk utilization level", from_mark=mark)
                for _ in range(2):
                    mark, _ = await log.wait_for("database - Set critical disk utilization mode: false", from_mark=mark)

@@ -451,7 +435,6 @@ async def test_node_restart_while_tablet_split(manager: ManagerClient, volumes_f
                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("compaction_manager - Drained", from_mark=mark)

@@ -464,11 +447,7 @@ async def test_node_restart_while_tablet_split(manager: ManagerClient, volumes_f
                    await cql.run_async(f"ALTER TABLE {cf} WITH tablets = {{'min_tablet_count': 2}};")
                    await coord_log.wait_for(f"Generating resize decision for table {table_id} of type split")

-                    mark = await log.mark()
                    await manager.server_restart(servers[0].server_id, wait_others=2)
-                    cql = await reconnect_driver(manager)
-                    await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)

                    logger.info("Check if tablet split happened")
                    await assert_resize_task_info(table_id, lambda response: len(response) == 1 and response[0].resize_task_info is not None)
@@ -477,7 +456,6 @@ async def test_node_restart_while_tablet_split(manager: ManagerClient, volumes_f
                    assert await log.grep(f"compaction.*Split {cf}", from_mark=mark) == []

                logger.info("With blob file removed, wait for DB to drop below the critical disk utilization level")
-                mark, _ = await log.wait_for("Dropped below the critical disk utilization level", from_mark=mark)
                for _ in range(2):
                    mark, _ = await log.wait_for("compaction_manager - Enabled", from_mark=mark)
                mark, _ = await log.wait_for(f"Detected tablet split for table {cf}, increasing from 1 to 2 tablets", from_mark=mark)
@@ -543,7 +521,6 @@ async def test_repair_failure_on_split_rejection(manager: ManagerClient, volumes
                logger.info("Create a big file on the target node to reach critical disk utilization level")
                disk_info = psutil.disk_usage(workdir)
                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    mark, _ = await log.wait_for("Reached the critical disk utilization level", from_mark=mark)
                    for _ in range(2):
                        mark, _ = await log.wait_for("compaction_manager - Drained", from_mark=mark)

@@ -556,100 +533,9 @@ async def test_repair_failure_on_split_rejection(manager: ManagerClient, volumes
                    assert await log.grep(f"compaction.*Split {cf}", from_mark=mark) == []

                logger.info("With blob file removed, wait for DB to drop below the critical disk utilization level")
-                mark, _ = await log.wait_for("Dropped below the critical disk utilization level", from_mark=mark)
                for _ in range(2):
                    mark, _ = await log.wait_for("compaction_manager - Enabled", from_mark=mark)

                await repair_task

                mark, _ = await log.wait_for(f"Detected tablet split for table {cf}", from_mark=mark)
-
-# Since we create 20M volumes, we need to reduce the commitlog segment size
-# otherwise we hit out of space.
-global_cmdline_with_disabled_monitor = [
-    "--disk-space-monitor-normal-polling-interval-in-seconds", "1",
-    "--critical-disk-utilization-level", "1.0",
-    "--commitlog-segment-size-in-mb", "2",
-    "--schema-commitlog-segment-size-in-mb", "4",
-    "--tablet-load-stats-refresh-interval-in-seconds", "1",
-]
-@pytest.mark.asyncio
-@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
-async def test_sstables_incrementally_released_during_streaming(manager: ManagerClient, volumes_factory: Callable) -> None:
-    """
-    Test that source node will not run out of space if major compaction rewrites the sstables being streamed.
-    Expects the file streaming and major will both release sstables incrementally, reducing chances of 2x
-    space amplification.
-
-    Scenario:
-      - Create a 2-node cluster with limited disk space.
-      - Create a table with 2 tablets, one in each node
-      - Write 20% of node capacity to each tablet.
-      - Start decommissioning one node.
-      - During streaming, create a large file on the source node to push it over 85%
-      - Run major expecting the file streaming released the sstables incrementally. Had it not, source node runs out of space.
-      - Unblock streaming
-      - Verify that the decommission operation succeeds.
-    """
-    cmdline = [*global_cmdline_with_disabled_monitor,
-               "--logger-log-level", "load_balancer=debug",
-               "--logger-log-level", "debug_error_injection=debug"
-               ]
-    # the coordinator needs more space, so creating a 40M volume for it.
-    async with space_limited_servers(manager, volumes_factory, ["40M", "20M"], cmdline=cmdline,
-                                     property_file=[{"dc": "dc1", "rack": "r1"}]*2) as servers:
-        cql, _ = await manager.get_ready_cql(servers)
-
-        workdir = await manager.server_get_workdir(servers[1].server_id)
-        log = await manager.server_open_log(servers[1].server_id)
-
-        async with new_test_keyspace(manager, f"WITH replication = {{'class': 'NetworkTopologyStrategy', 'dc1': ['{servers[1].rack}'] }}"
-                                              " AND tablets = {'initial': 2}") as ks:
-            await manager.disable_tablet_balancing()
-
-            # Needs 1mb fragments in order to stress incremental release in file streaming
-            extra_table_param = "WITH compaction = {'class' : 'IncrementalCompactionStrategy', 'sstable_size_in_mb' : '1'} and compression = {}"
-            async with new_test_table(manager, ks, "pk int PRIMARY KEY, t text", extra_table_param) as cf:
-                before_disk_info = psutil.disk_usage(workdir)
-                # About 4mb per tablet
-                await asyncio.gather(*[cql.run_async(query) for query in write_generator(cf, 8000)])
-
-                # split data into 1mb fragments
-                await manager.api.keyspace_flush(servers[1].ip_addr, ks)
-                await manager.api.keyspace_compaction(servers[1].ip_addr, ks)
-
-                after_disk_info = psutil.disk_usage(workdir)
-                percent_by_writes = after_disk_info.percent - before_disk_info.percent
-                logger.info(f"Percent taken by writes {percent_by_writes}")
-
-                # assert sstable data content account for more than 20% of node's storage.
-                assert percent_by_writes > 20
-
-                # We want to trap only migrations which happened during decommission
-                await manager.api.quiesce_topology(servers[0].ip_addr)
-
-                await manager.api.enable_injection(servers[1].ip_addr, "tablet_stream_files_end_wait", one_shot=True)
-                mark = await log.mark()
-
-                logger.info(f"Workdir {workdir}")
-
-                decomm_task = asyncio.create_task(manager.decommission_node(servers[1].server_id))
-                await manager.enable_tablet_balancing()
-                mark, _ = await log.wait_for("tablet_stream_files_end_wait: waiting", from_mark=mark)
-
-                disk_info = psutil.disk_usage(workdir)
-                with random_content_file(workdir, int(disk_info.total*0.85) - disk_info.used):
-                    disk_info = psutil.disk_usage(workdir)
-                    logger.info(f"Percent used before major {disk_info.percent}")
-
-                    # Run major in order to try to reproduce 2x space amplification if files aren't released
-                    # incrementally by streamer.
-                    await manager.api.keyspace_compaction(servers[1].ip_addr, ks)
-                    await asyncio.gather(*[cql.run_async(query) for query in write_generator(cf, 100)])
-
-                    disk_info = psutil.disk_usage(workdir)
-                    logger.info(f"Percent used after major {disk_info.percent}")
-
-                    await manager.api.message_injection(servers[1].ip_addr, "tablet_stream_files_end_wait")
-
-                    await decomm_task
--- a/test/vector_search/vector_store_client_test.cc
+++ b/test/vector_search/vector_store_client_test.cc
@@ -1102,7 +1102,7 @@ SEASTAR_TEST_CASE(vector_store_client_https_wrong_hostname) {
            }));
 }

-SEASTAR_TEST_CASE(vector_store_client_https_wrong_cacert_verification_error) {
+SEASTAR_TEST_CASE(vector_store_client_https_different_ca_cert_verification_error) {
    auto broken_cert = co_await seastar::make_tmp_file();
    certificates certs;
    auto server = co_await make_vs_mock_server(co_await make_server_credentials(certs));
@@ -1129,33 +1129,6 @@ SEASTAR_TEST_CASE(vector_store_client_https_wrong_cacert_verification_error) {
            }));
 }

-SEASTAR_TEST_CASE(vector_store_client_https_wrong_cacert_verification_error_host_is_ip) {
-    auto broken_cert = co_await seastar::make_tmp_file();
-    certificates certs;
-    auto server = co_await make_vs_mock_server(co_await make_server_credentials(certs));
-    auto cfg = make_config();
-    cfg.db_config->vector_store_primary_uri.set(format("https://{}:{}", server->host(), server->port()));
-    cfg.db_config->vector_store_encryption_options.set({{"truststore", broken_cert.get_path().string()}});
-    co_await do_with_cql_env(
-            [&](cql_test_env& env) -> future<> {
-                auto as = abort_source_timeout();
-                auto schema = co_await create_test_table(env, "ks", "idx");
-                auto& vs = env.local_qp().vector_store_client();
-                configure(vs).with_dns({{server->host(), std::vector<std::string>{server->host()}}});
-                vs.start_background_tasks();
-
-                auto keys = co_await vs.ann("ks", "idx", schema, std::vector<float>{0.1, 0.2, 0.3}, 2, rjson::empty_object(), as.reset());
-
-                BOOST_REQUIRE(!keys);
-                BOOST_CHECK(std::holds_alternative<vector_store_client::service_unavailable>(keys.error()));
-            },
-            cfg)
-            .finally(seastar::coroutine::lambda([&] -> future<> {
-                co_await server->stop();
-                co_await remove(broken_cert);
-            }));
-}
-
 SEASTAR_TEST_CASE(vector_store_client_high_availability_unreachable) {
    auto server = co_await make_vs_mock_server();
    auto unreachable = co_await make_unreachable_socket();
--- a/tools/scylla-nodetool.cc
+++ b/tools/scylla-nodetool.cc
@@ -690,9 +690,6 @@ void cluster_repair_operation(scylla_rest_client& client, const bpo::variables_m
                        // will repair also their colocated tables.
                        continue;
                    }
-                    if (tables.empty() && std::string(ex.what()).contains("Can't find a column family")) {
-                        continue;
-                    }
                    log("ERROR: Repair request for keyspace={} table={} failed with {}", keyspace, table, ex);
                    exit_code = EXIT_FAILURE;
                }
--- a/transport/messages/result_message.cc
+++ b/transport/messages/result_message.cc
@@ -67,17 +67,14 @@ void result_message::visitor_base::visit(const result_message::exception& ex) {
    ex.throw_me();
 }

-result_message::prepared::prepared(cql3::prepared_statements_cache::pinned_value_type prepared_entry, bool support_lwt_opt)
-        : _prepared_entry(std::move(prepared_entry))
+result_message::prepared::prepared(cql3::statements::prepared_statement::checked_weak_ptr prepared, bool support_lwt_opt)
+        : _prepared(std::move(prepared))
        , _metadata(
-            (*_prepared_entry)->bound_names,
-            (*_prepared_entry)->partition_key_bind_indices,
-            support_lwt_opt ? (*_prepared_entry)->statement->is_conditional() : false)
-        , _result_metadata{extract_result_metadata((*_prepared_entry)->statement)}
+            _prepared->bound_names,
+            _prepared->partition_key_bind_indices,
+            support_lwt_opt ? _prepared->statement->is_conditional() : false)
+        , _result_metadata{extract_result_metadata(_prepared->statement)}
 {
-    for (const auto& w : (*_prepared_entry)->warnings){
-        add_warning(w);
-    }
 }

 ::shared_ptr<const cql3::metadata> result_message::prepared::extract_result_metadata(::shared_ptr<cql3::cql_statement> statement) {
--- a/transport/messages/result_message.hh
+++ b/transport/messages/result_message.hh
@@ -13,7 +13,6 @@
 #include <concepts>

 #include "cql3/result_set.hh"
-#include "cql3/prepared_statements_cache.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/query_options.hh"

@@ -31,14 +30,14 @@ namespace messages {

 class result_message::prepared : public result_message {
 private:
-    cql3::prepared_statements_cache::pinned_value_type _prepared_entry;
+    cql3::statements::prepared_statement::checked_weak_ptr _prepared;
    cql3::prepared_metadata _metadata;
    ::shared_ptr<const cql3::metadata> _result_metadata;
 protected:
-    prepared(cql3::prepared_statements_cache::pinned_value_type prepared_entry, bool support_lwt_opt);
+    prepared(cql3::statements::prepared_statement::checked_weak_ptr prepared, bool support_lwt_opt);
 public:
-    cql3::statements::prepared_statement::checked_weak_ptr get_prepared() {
-        return (*_prepared_entry)->checked_weak_from_this();
+    cql3::statements::prepared_statement::checked_weak_ptr& get_prepared() {
+        return _prepared;
    }

    const cql3::prepared_metadata& metadata() const {
@@ -50,7 +49,7 @@ public:
    }

    cql3::cql_metadata_id_type get_metadata_id() const {
-        return (*_prepared_entry)->get_metadata_id();
+        return _prepared->get_metadata_id();
    }

    class cql;
@@ -167,8 +166,8 @@ std::ostream& operator<<(std::ostream& os, const result_message::set_keyspace& m
 class result_message::prepared::cql : public result_message::prepared {
    bytes _id;
 public:
-    cql(const bytes& id, cql3::prepared_statements_cache::pinned_value_type prepared_entry, bool support_lwt_opt)
-        : result_message::prepared(std::move(prepared_entry), support_lwt_opt)
+    cql(const bytes& id, cql3::statements::prepared_statement::checked_weak_ptr p, bool support_lwt_opt)
+        : result_message::prepared(std::move(p), support_lwt_opt)
        , _id{id}
    { }

--- a/types/types.cc
+++ b/types/types.cc
@@ -715,6 +715,15 @@ void write_collection_value(bytes_ostream& out, atomic_cell_value_view val) {
    }
 }

+void write_fragmented(managed_bytes_mutable_view& out, std::string_view val) {
+    while (val.size() > 0) {
+        size_t current_n = std::min(val.size(), out.current_fragment().size());
+        memcpy(out.current_fragment().data(), val.data(), current_n);
+        val.remove_prefix(current_n);
+        out.remove_prefix(current_n);
+    }
+}
+
 template<std::integral T>
 void write_simple(managed_bytes_mutable_view& out, std::type_identity_t<T> val) {
    val = net::hton(val);
--- a/utils/managed_bytes.hh
+++ b/utils/managed_bytes.hh
@@ -566,16 +566,6 @@ inline managed_bytes::managed_bytes(const managed_bytes& o) {
    }
 }

-inline
-void write_fragmented(managed_bytes_mutable_view& out, std::string_view val) {
-    while (val.size() > 0) {
-        size_t current_n = std::min(val.size(), out.current_fragment().size());
-        memcpy(out.current_fragment().data(), val.data(), current_n);
-        val.remove_prefix(current_n);
-        out.remove_prefix(current_n);
-    }
-}
-
 template<>
 struct appending_hash<managed_bytes_view> {
    template<Hasher Hasher>
--- a/utils/managed_vector.hh
+++ b/utils/managed_vector.hh
@@ -10,7 +10,6 @@

 #include <array>
 #include <type_traits>
-#include <algorithm>

 #include "utils/allocation_strategy.hh"

@@ -28,8 +27,10 @@ private:
        T _data[0];

        external(external&& other) noexcept : _backref(other._backref) {
-            std::uninitialized_move(other._data, other._data + other._backref->_size, _data);
-            std::destroy(other._data, other._data + other._backref->_size);
+            for (unsigned i = 0; i < _backref->size(); i++) {
+                new (_data + i) T(std::move(other._data[i]));
+                other._data[i].~T();
+            }
            _backref->_data = _data;
        }
        size_t storage_size() const noexcept {
--- a/vector_search/client.cc
+++ b/vector_search/client.cc
@@ -21,7 +21,6 @@
 #include <chrono>
 #include <fmt/format.h>
 #include <netinet/tcp.h>
-#include <seastar/net/inet_address.hh>

 using namespace seastar;
 using namespace std::chrono_literals;
@@ -29,10 +28,6 @@ using namespace std::chrono_literals;
 namespace vector_search {
 namespace {

-bool is_ip_address(const sstring& host) {
-    return net::inet_address::parse_numerical(host).has_value();
-}
-
 class client_connection_factory : public http::experimental::connection_factory {
    client::endpoint_type _endpoint;
    shared_ptr<tls::certificate_credentials> _creds;
@@ -60,11 +55,7 @@ private:
    future<connected_socket> connect() {
        auto addr = socket_address(_endpoint.ip, _endpoint.port);
        if (_creds) {
-            tls::tls_options opts;
-            if (!is_ip_address(_endpoint.host)) {
-                opts.server_name = _endpoint.host;
-            }
-            auto socket = co_await tls::connect(_creds, addr, std::move(opts));
+            auto socket = co_await tls::connect(_creds, addr, tls::tls_options{.server_name = _endpoint.host});
            // tls::connect() only performs the TCP handshake — the TLS handshake is deferred until the first I/O operation.
            // Force the TLS handshake to happen here so that the connection timeout applies to it.
            co_await tls::check_session_is_resumed(socket);
@@ -133,7 +124,7 @@ seastar::future<client::request_result> client::request(
            co_return std::unexpected{aborted_error{}};
        }
        if (is_server_problem(err)) {
-            handle_server_unavailable(err);
+            handle_server_unavailable();
        }
        co_return std::unexpected{co_await map_err(err)};
    }
@@ -174,9 +165,8 @@ seastar::future<> client::close() {
    co_await _http_client.close();
 }

-void client::handle_server_unavailable(std::exception_ptr err) {
+void client::handle_server_unavailable() {
    if (!is_checking_status_in_progress()) {
-        _logger.warn("Request to vector store {} {}:{} failed: {}", _endpoint.host, _endpoint.ip, _endpoint.port, err);
        _checking_status_future = run_checking_status();
    }
 }
--- a/vector_search/client.hh
+++ b/vector_search/client.hh
@@ -12,7 +12,6 @@
 #include "utils/log.hh"
 #include "utils/updateable_value.hh"
 #include <chrono>
-#include <exception>
 #include <seastar/core/future.hh>
 #include <seastar/core/sstring.hh>
 #include <seastar/core/abort_source.hh>
@@ -61,7 +60,7 @@ private:
    seastar::future<response> request_impl(seastar::httpd::operation_type method, seastar::sstring path, std::optional<seastar::sstring> content,
            std::optional<seastar::http::reply::status_type>&& expected, seastar::abort_source& as);
    seastar::future<bool> check_status();
-    void handle_server_unavailable(std::exception_ptr err);
+    void handle_server_unavailable();
    seastar::future<> run_checking_status();
    bool is_checking_status_in_progress() const;
    std::chrono::milliseconds backoff_retry_max() const;
--- a/vint-serialization.cc
+++ b/vint-serialization.cc
@@ -18,6 +18,15 @@

 static_assert(-1 == ~0, "Not a twos-complement architecture");

+// Accounts for the case that all bits are zero.
+static vint_size_type count_leading_zero_bits(uint64_t n) noexcept {
+    if (n == 0) {
+        return vint_size_type(std::numeric_limits<uint64_t>::digits);
+    }
+
+    return vint_size_type(count_leading_zeros(n));
+}
+
 static constexpr uint64_t encode_zigzag(int64_t n) noexcept {
    // The right shift has to be arithmetic and not logical.
    return (static_cast<uint64_t>(n) << 1) ^ static_cast<uint64_t>(n >> 63);
@@ -46,9 +55,16 @@ int64_t signed_vint::deserialize(bytes_view v) {
    return decode_zigzag(un);
 }

+vint_size_type signed_vint::serialized_size_from_first_byte(bytes::value_type first_byte) {
+    return unsigned_vint::serialized_size_from_first_byte(first_byte);
+}
+
 // The number of additional bytes that we need to read.
 static vint_size_type count_extra_bytes(int8_t first_byte) {
-    return std::countl_zero(static_cast<uint8_t>(~first_byte));
+    // Sign extension.
+    const int64_t v(first_byte);
+
+    return count_leading_zero_bits(static_cast<uint64_t>(~v)) - vint_size_type(64 - 8);
 }

 static void encode(uint64_t value, vint_size_type size, bytes::iterator out) {
@@ -123,3 +139,8 @@ uint64_t unsigned_vint::deserialize(bytes_view v) {
 #endif
    return result;
 }
+
+vint_size_type unsigned_vint::serialized_size_from_first_byte(bytes::value_type first_byte) {
+    int8_t first_byte_casted = first_byte;
+    return 1 + (first_byte_casted >= 0 ? 0 : count_extra_bytes(first_byte_casted));
+}
--- a/vint-serialization.hh
+++ b/vint-serialization.hh
@@ -35,7 +35,6 @@
 #include "bytes.hh"

 #include <cstdint>
-#include <bit>

 using vint_size_type = bytes::size_type;

@@ -50,9 +49,7 @@ struct unsigned_vint final {

    static value_type deserialize(bytes_view v);

-    static vint_size_type serialized_size_from_first_byte(bytes::value_type first_byte) {
-        return 1 + std::countl_zero(static_cast<uint8_t>(~first_byte));
-    }
+    static vint_size_type serialized_size_from_first_byte(bytes::value_type first_byte);
 };

 struct signed_vint final {
@@ -64,7 +61,5 @@ struct signed_vint final {

    static value_type deserialize(bytes_view v);

-    static vint_size_type serialized_size_from_first_byte(bytes::value_type first_byte) {
-        return unsigned_vint::serialized_size_from_first_byte(first_byte);
-    }
+    static vint_size_type serialized_size_from_first_byte(bytes::value_type first_byte);
 };