repair: Reduce max row buf size when small table optimization is on

If small_table_optimization is on, a repair works on a whole table simultaneously. It may be distributed across the whole cluster and all nodes might participate in repair. On a repair master, row buffer is copied for each repair peer. This means that the memory scales with the number of peers. In large clusters, repair with small_table_optimization leads to OOM. Divide the max_row_buf_size by the number of repair peers if small_table_optimization is on. Use max_row_buf_size to calculate number of units taken from mem_sem. Fixes: https://github.com/scylladb/scylladb/issues/22244. Closes scylladb/scylladb#24868 (cherry picked from commit 17272c2f3b) Closes scylladb/scylladb#24904
Update pgo profiles - aarch64
2025-07-15 09:38:16 +03:00 · 2025-07-15 04:37:19 +03:00 · 2025-07-15 04:36:48 +03:00 · 2025-07-14 14:28:05 +03:00 · 2025-07-14 11:44:44 +02:00 · 2025-07-14 12:06:29 +03:00
537 changed files with 19109 additions and 9644 deletions
--- a/.github/workflows/make-pr-ready-for-review.yaml
+++ b/.github/workflows/make-pr-ready-for-review.yaml
@@ -0,0 +1,27 @@
+name: Mark PR as Ready When Conflicts Label is Removed
+
+on:
+  pull_request_target:
+    types:
+      - unlabeled
+
+env:
+  DEFAULT_BRANCH: 'master'
+
+jobs:
+  mark-ready:
+    if: github.event.label.name == 'conflicts'
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ github.repository }}
+          ref: ${{ env.DEFAULT_BRANCH }}
+          token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
+          fetch-depth: 1
+      - name: Mark pull request as ready for review
+        run:  gh pr ready "${{ github.event.pull_request.number }}"
+        env:
+          GITHUB_TOKEN: ${{ secrets.AUTO_BACKPORT_TOKEN }}
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=2025.1.0-dev
+VERSION=2025.1.5

 if test -f version
 then
--- a/alternator/consumed_capacity.cc
+++ b/alternator/consumed_capacity.cc
@@ -24,7 +24,7 @@ static constexpr uint64_t KB = 1024ULL;
 static constexpr uint64_t RCU_BLOCK_SIZE_LENGTH = 4*KB;
 static constexpr uint64_t WCU_BLOCK_SIZE_LENGTH = 1*KB;

-static bool should_add_capacity(const rjson::value& request) {
+bool consumed_capacity_counter::should_add_capacity(const rjson::value& request) {
    const rjson::value* return_consumed = rjson::find(request, "ReturnConsumedCapacity");
    if (!return_consumed) {
        return false;
@@ -62,15 +62,22 @@ static uint64_t calculate_half_units(uint64_t unit_block_size, uint64_t total_by
 rcu_consumed_capacity_counter::rcu_consumed_capacity_counter(const rjson::value& request, bool is_quorum) :
        consumed_capacity_counter(should_add_capacity(request)),_is_quorum(is_quorum) {
 }
+uint64_t rcu_consumed_capacity_counter::get_half_units(uint64_t total_bytes, bool is_quorum) noexcept {
+    return calculate_half_units(RCU_BLOCK_SIZE_LENGTH, total_bytes, is_quorum);
+}

 uint64_t rcu_consumed_capacity_counter::get_half_units() const noexcept {
-    return calculate_half_units(RCU_BLOCK_SIZE_LENGTH, _total_bytes, _is_quorum);
+    return get_half_units(_total_bytes, _is_quorum);
 }

 uint64_t wcu_consumed_capacity_counter::get_half_units() const noexcept {
    return calculate_half_units(WCU_BLOCK_SIZE_LENGTH, _total_bytes, true);
 }

+uint64_t wcu_consumed_capacity_counter::get_units(uint64_t total_bytes) noexcept {
+    return calculate_half_units(WCU_BLOCK_SIZE_LENGTH, total_bytes, true) * HALF_UNIT_MULTIPLIER;
+}
+
 wcu_consumed_capacity_counter::wcu_consumed_capacity_counter(const rjson::value& request) :
        consumed_capacity_counter(should_add_capacity(request)) {
 }
--- a/alternator/consumed_capacity.hh
+++ b/alternator/consumed_capacity.hh
@@ -42,21 +42,25 @@ public:
     */
    virtual uint64_t get_half_units() const noexcept = 0;
    uint64_t _total_bytes = 0;
+    static bool should_add_capacity(const rjson::value& request);
 protected:
    bool _should_add_to_reponse = false;
 };

 class rcu_consumed_capacity_counter : public consumed_capacity_counter {
-    virtual uint64_t get_half_units() const noexcept;
    bool _is_quorum = false;
 public:
    rcu_consumed_capacity_counter(const rjson::value& request, bool is_quorum);
+    rcu_consumed_capacity_counter(): consumed_capacity_counter(false), _is_quorum(false){}
+    virtual uint64_t get_half_units() const noexcept;
+    static uint64_t get_half_units(uint64_t total_bytes, bool is_quorum) noexcept;
 };

 class wcu_consumed_capacity_counter : public consumed_capacity_counter {
    virtual uint64_t get_half_units() const noexcept;
 public:
    wcu_consumed_capacity_counter(const rjson::value& request);
+    static uint64_t get_units(uint64_t total_bytes) noexcept;
 };

 }
--- a/alternator/error.hh
+++ b/alternator/error.hh
@@ -88,6 +88,9 @@ public:
    static api_error table_not_found(std::string msg) {
        return api_error("TableNotFoundException", std::move(msg));
    }
+    static api_error limit_exceeded(std::string msg) {
+        return api_error("LimitExceededException", std::move(msg));
+    }
    static api_error internal(std::string msg) {
        return api_error("InternalServerError", std::move(msg), http::reply::status_type::internal_server_error);
    }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -241,7 +241,8 @@ public:
        const query::partition_slice&& slice,
        shared_ptr<cql3::selection::selection> selection,
        foreign_ptr<lw_shared_ptr<query::result>> query_result,
-        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get);
+        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
+        uint64_t& rcu_half_units);

    static void describe_single_item(const cql3::selection::selection&,
        const std::vector<managed_bytes_opt>&,
--- a/alternator/extract_from_attrs.hh
+++ b/alternator/extract_from_attrs.hh
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2024-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "utils/rjson.hh"
+#include "serialization.hh"
+#include "column_computation.hh"
+#include "db/view/regular_column_transformation.hh"
+
+namespace alternator {
+
+// An implementation of a "column_computation" which extracts a specific
+// non-key attribute from the big map (":attrs") of all non-key attributes,
+// and deserializes it if it has the desired type. GSI will use this computed
+// column as a materialized-view key when the view key attribute isn't a
+// full-fledged CQL column but rather stored in ":attrs".
+class extract_from_attrs_column_computation : public regular_column_transformation {
+    // The name of the CQL column name holding the attribute map. It is a
+    // constant defined in executor.cc (as ":attrs"), so doesn't need
+    // to be specified when constructing the column computation.
+    static const bytes MAP_NAME;
+    // The top-level attribute name to extract from the ":attrs" map.
+    bytes _attr_name;
+    // The type we expect for the value stored in the attribute. If the type
+    // matches the expected type, it is decoded from the serialized format
+    // we store in the map's values) into the raw CQL type value that we use
+    // for keys, and returned by compute_value(). Only the types "S" (string),
+    // "B" (bytes) and "N" (number) are allowed as keys in DynamoDB, and
+    // therefore in desired_type.
+    alternator_type _desired_type;
+public:
+    virtual column_computation_ptr clone() const override;
+    // TYPE_NAME is a unique string that distinguishes this class from other
+    // column_computation subclasses. column_computation::deserialize() will
+    // construct an object of this subclass if it sees a "type" TYPE_NAME.
+    static inline const std::string TYPE_NAME = "alternator_extract_from_attrs";
+    // Serialize the *definition* of this column computation into a JSON
+    // string with a unique "type" string - TYPE_NAME - which then causes
+    // column_computation::deserialize() to create an object from this class.
+    virtual bytes serialize() const override;
+    // Construct this object based on the previous output of serialize().
+    // Calls on_internal_error() if the string doesn't match the output format
+    // of serialize(). "type" is not checked column_computation::deserialize()
+    // won't call this constructor if "type" doesn't match.
+    extract_from_attrs_column_computation(const rjson::value &v);
+    extract_from_attrs_column_computation(bytes_view attr_name, alternator_type desired_type)
+        : _attr_name(attr_name), _desired_type(desired_type)
+        {}
+    // Implement regular_column_transformation's compute_value() that
+    // accepts the full row:
+    result compute_value(const schema& schema, const partition_key& key,
+        const db::view::clustering_or_static_row& row) const override;
+    // But do not implement column_computation's compute_value() that
+    // accepts only a partition key - that's not enough so our implementation
+    // of this function does on_internal_error().
+    bytes compute_value(const schema& schema, const partition_key& key) const override;
+    // This computed column does depend on a non-primary key column, so
+    // its result may change in the update and we need to compute it
+    // before and after the update.
+    virtual bool depends_on_non_primary_key_column() const override {
+        return true;
+    }
+};
+} // namespace alternator
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -245,6 +245,27 @@ rjson::value deserialize_item(bytes_view bv) {
    return deserialized;
 }

+// This function takes a bytes_view created earlier by serialize_item(), and
+// if has the type "expected_type", the function returns the value as a
+// raw Scylla type. If the type doesn't match, returns an unset optional.
+// This function only supports the key types S (string), B (bytes) and N
+// (number) - serialize_item() serializes those types as a single-byte type
+// followed by the serialized raw Scylla type, so all this function needs to
+// do is to remove the first byte. This makes this function much more
+// efficient than deserialize_item() above because it avoids transformation
+// to/from JSON.
+std::optional<bytes> serialized_value_if_type(bytes_view bv, alternator_type expected_type) {
+    if (bv.empty() || alternator_type(bv[0]) != expected_type) {
+        return std::nullopt;
+    }
+    // Currently, serialize_item() for types in alternator_type (notably S, B
+    // and N) are nothing more than Scylla's raw format for these types
+    // preceded by a type byte. So we just need to skip that byte and we are
+    // left by exactly what we need to return.
+    bv.remove_prefix(1);
+    return bytes(bv);
+}
+
 std::string type_to_string(data_type type) {
    static thread_local std::unordered_map<data_type, std::string> types = {
        {utf8_type, "S"},
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -43,6 +43,7 @@ type_representation represent_type(alternator_type atype);

 bytes serialize_item(const rjson::value& item);
 rjson::value deserialize_item(bytes_view bv);
+std::optional<bytes> serialized_value_if_type(bytes_view bv, alternator_type expected_type);

 std::string type_to_string(data_type type);

--- a/alternator/stats.cc
+++ b/alternator/stats.cc
@@ -94,16 +94,16 @@ stats::stats() : api_operations{} {
                    seastar::metrics::description("number of rows read during filtering operations")),
            seastar::metrics::make_total_operations("filtered_rows_matched_total", cql_stats.filtered_rows_matched_total,
                    seastar::metrics::description("number of rows read and matched during filtering operations")),
-            seastar::metrics::make_counter("rcu_total", rcu_total,
-                    seastar::metrics::description("total number of consumed read units, counted as half units")).set_skip_when_empty(),
+            seastar::metrics::make_counter("rcu_total", [this]{return 0.5 * rcu_half_units_total;},
+                    seastar::metrics::description("total number of consumed read units")).set_skip_when_empty(),
            seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::PUT_ITEM],
-                    seastar::metrics::description("total number of consumed write units, counted as half units"),{op("PutItem")}).set_skip_when_empty(),
+                    seastar::metrics::description("total number of consumed write units"),{op("PutItem")}).set_skip_when_empty(),
            seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::DELETE_ITEM],
-                    seastar::metrics::description("total number of consumed write units, counted as half units"),{op("DeleteItem")}).set_skip_when_empty(),
+                    seastar::metrics::description("total number of consumed write units"),{op("DeleteItem")}).set_skip_when_empty(),
            seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::UPDATE_ITEM],
-                    seastar::metrics::description("total number of consumed write units, counted as half units"),{op("UpdateItem")}).set_skip_when_empty(),
+                    seastar::metrics::description("total number of consumed write units"),{op("UpdateItem")}).set_skip_when_empty(),
            seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::INDEX],
-                    seastar::metrics::description("total number of consumed write units, counted as half units"),{op("Index")}).set_skip_when_empty(),
+                    seastar::metrics::description("total number of consumed write units"),{op("Index")}).set_skip_when_empty(),
            seastar::metrics::make_total_operations("filtered_rows_dropped_total", [this] { return cql_stats.filtered_rows_read_total - cql_stats.filtered_rows_matched_total; },
                    seastar::metrics::description("number of rows read and dropped during filtering operations")),
            seastar::metrics::make_counter("batch_item_count", seastar::metrics::description("The total number of items processed across all batches"),{op("BatchWriteItem")},
--- a/alternator/stats.hh
+++ b/alternator/stats.hh
@@ -84,7 +84,7 @@ public:
    uint64_t shard_bounce_for_lwt = 0;
    uint64_t requests_blocked_memory = 0;
    uint64_t requests_shed = 0;
-    uint64_t rcu_total = 0;
+    uint64_t rcu_half_units_total = 0;
    // wcu can results from put, update, delete and index
    // Index related will be done on top of the operation it comes with
    enum wcu_types {
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -808,6 +808,9 @@ future<executor::request_return_type> executor::get_records(client_state& client
    if (limit < 1) {
        throw api_error::validation("Limit must be 1 or more");
    }
+    if (limit > 1000) {
+        throw api_error::validation("Limit must be less than or equal to 1000");
+    }

    auto db = _proxy.data_dictionary();
    schema_ptr schema, base;
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -2836,7 +2836,7 @@
               "nickname":"repair_tablet",
               "method":"POST",
               "summary":"Repair a tablet",
-               "type":"void",
+               "type":"tablet_repair_result",
               "produces":[
                  "application/json"
               ],
@@ -2864,6 +2864,30 @@
                     "allowMultiple":false,
                     "type":"string",
                     "paramType":"query"
+                  },
+                  {
+                     "name":"hosts_filter",
+                     "description":"Repair replicas listed in the comma-separated host_id list.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"dcs_filter",
+                     "description":"Repair replicas listed in the comma-separated DC list",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"await_completion",
+                     "description":"Set true to wait for the repair to complete. Set false to skip waiting for the repair to complete. When the option is not provided, it defaults to false.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
                  }
               ]
            }
@@ -3037,6 +3061,22 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/storage_service/raft_topology/cmd_rpc_status",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get information about currently running topology cmd rpc",
+               "type":"string",
+               "nickname":"raft_topology_get_cmd_status",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
      }
   ],
   "models":{
@@ -3287,6 +3327,15 @@
                }
            }
        }
+      },
+      "tablet_repair_result":{
+        "id":"tablet_repair_result",
+        "description":"Tablet repair result",
+        "properties":{
+            "tablet_task_id":{
+                "type":"string"
+            }
+        }
      }
   }
 }
--- a/api/api-doc/task_manager.json
+++ b/api/api-doc/task_manager.json
@@ -253,6 +253,30 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/task_manager/drain/{module}",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Drain finished local tasks",
+               "type":"void",
+               "nickname":"drain_tasks",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"module",
+                     "description":"The module to drain",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  }
+               ]
+            }
+         ]
      }
   ],
   "models":{
--- a/api/cql_server_test.cc
+++ b/api/cql_server_test.cc
@@ -6,6 +6,8 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
 */

+#include "build_mode.hh"
+
 #ifndef SCYLLA_BUILD_MODE_RELEASE

 #include <seastar/core/coroutine.hh>
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -1475,6 +1475,16 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        co_return sstring(format("{}", ustate));
    });

+    ss::raft_topology_get_cmd_status.set(r, [&ss] (std::unique_ptr<http::request> req) -> future<json_return_type> {
+        const auto status = co_await ss.invoke_on(0, [] (auto& ss) {
+            return ss.get_topology_cmd_status();
+        });
+        if (status.active_dst.empty()) {
+            co_return sstring("none");
+        }
+        co_return sstring(fmt::format("{}[{}]: {}", status.current, status.index, fmt::join(status.active_dst, ",")));
+    });
+
    ss::move_tablet.set(r, [&ctx, &ss] (std::unique_ptr<http::request> req) -> future<json_return_type> {
        auto src_host_id = validate_host_id(req->get_query_param("src_host"));
        shard_id src_shard_id = validate_int(req->get_query_param("src_shard"));
@@ -1543,6 +1553,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        }
        auto ks = req->get_query_param("ks");
        auto table = req->get_query_param("table");
+        bool await_completion = false;
+        auto await = req->get_query_param("await_completion");
+        if (!await.empty()) {
+            await_completion = validate_bool(await);
+        }
        validate_table(ctx, ks, table);
        auto table_id = ctx.db.local().find_column_family(ks, table).schema()->id();
        std::variant<utils::chunked_vector<dht::token>, service::storage_service::all_tokens_tag> tokens_variant;
@@ -1551,8 +1566,22 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        } else {
            tokens_variant = tokens;
        }
+        auto hosts = req->get_query_param("hosts_filter");
+        auto dcs = req->get_query_param("dcs_filter");

-        auto res = co_await ss.local().add_repair_tablet_request(table_id, tokens_variant);
+        std::unordered_set<locator::host_id> hosts_filter;
+        if (!hosts.empty()) {
+            std::string delim = ",";
+            hosts_filter = std::ranges::views::split(hosts, delim) | std::views::transform([](auto&& h) {
+                try {
+                    return locator::host_id(utils::UUID(std::string_view{h}));
+                } catch (...) {
+                    throw httpd::bad_param_exception(fmt::format("Wrong host_id format {}", h));
+                }
+            }) | std::ranges::to<std::unordered_set>();
+        }
+        auto dcs_filter = locator::tablet_task_info::deserialize_repair_dcs_filter(dcs);
+        auto res = co_await ss.local().add_repair_tablet_request(table_id, tokens_variant, hosts_filter, dcs_filter, await_completion);
        co_return json::json_return_type(res);
    });

@@ -1653,6 +1682,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
    ss::reload_raft_topology_state.unset(r);
    ss::upgrade_to_raft_topology.unset(r);
    ss::raft_topology_upgrade_status.unset(r);
+    ss::raft_topology_get_cmd_status.unset(r);
    ss::move_tablet.unset(r);
    ss::add_tablet_replica.unset(r);
    ss::del_tablet_replica.unset(r);
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -232,6 +232,32 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
        uint32_t user_ttl = cfg.user_task_ttl_seconds();
        co_return json::json_return_type(user_ttl);
    });
+
+    tm::drain_tasks.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
+        co_await tm.invoke_on_all([&req] (tasks::task_manager& tm) -> future<> {
+            tasks::task_manager::module_ptr module;
+            try {
+                module = tm.find_module(req->get_path_param("module"));
+            } catch (...) {
+                throw bad_param_exception(fmt::format("{}", std::current_exception()));
+            }
+
+            const auto& local_tasks = module->get_local_tasks();
+            std::vector<tasks::task_id> ids;
+            ids.reserve(local_tasks.size());
+            std::transform(begin(local_tasks), end(local_tasks), std::back_inserter(ids), [] (const auto& task) {
+                return task.second->is_complete() ? task.first : tasks::task_id::create_null_id();
+            });
+
+            for (auto&& id : ids) {
+                if (id) {
+                    module->unregister_task(id);
+                }
+                co_await maybe_yield();
+            }
+        });
+        co_return json_void();
+    });
 }

 void unset_task_manager(http_context& ctx, routes& r) {
@@ -243,6 +269,7 @@ void unset_task_manager(http_context& ctx, routes& r) {
    tm::get_task_status_recursively.unset(r);
    tm::get_and_update_ttl.unset(r);
    tm::get_ttl.unset(r);
+    tm::drain_tasks.unset(r);
 }

 }
--- a/api/task_manager_test.cc
+++ b/api/task_manager_test.cc
@@ -6,6 +6,9 @@
 * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
 */

+
+#include "build_mode.hh"
+
 #ifndef SCYLLA_BUILD_MODE_RELEASE

 #include <seastar/core/coroutine.hh>
--- a/api/token_metadata.cc
+++ b/api/token_metadata.cc
@@ -74,6 +74,9 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
    });

    ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
+        if (!g.local().is_enabled()) {
+            throw std::runtime_error("The gossiper is not ready yet");
+        }
        std::vector<ss::mapper> res;
        auto map = tm.local().get()->get_host_ids() |
            std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |
--- a/audit/audit_syslog_storage_helper.cc
+++ b/audit/audit_syslog_storage_helper.cc
@@ -33,20 +33,6 @@ namespace audit {

 namespace {

-future<> syslog_send_helper(net::datagram_channel& sender,
-                            const socket_address& address,
-                            const sstring& msg) {
-    return sender.send(address, net::packet{msg.data(), msg.size()}).handle_exception([address](auto&& exception_ptr) {
-        auto error_msg = seastar::format(
-            "Syslog audit backend failed (sending a message to {} resulted in {}).",
-            address,
-            exception_ptr
-        );
-        logger.error("{}", error_msg);
-        throw audit_exception(std::move(error_msg));
-    });
-}
-
 static auto syslog_address_helper(const db::config& cfg)
 {
    return cfg.audit_unix_socket_path.is_set()
@@ -56,9 +42,26 @@ static auto syslog_address_helper(const db::config& cfg)

 }

+future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
+    try {
+        auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
+        co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
+    }
+    catch (const std::exception& e) {
+        auto error_msg = seastar::format(
+            "Syslog audit backend failed (sending a message to {} resulted in {}).",
+            _syslog_address,
+            e
+        );
+        logger.error("{}", error_msg);
+        throw audit_exception(std::move(error_msg));
+    }
+}
+
 audit_syslog_storage_helper::audit_syslog_storage_helper(cql3::query_processor& qp, service::migration_manager&) :
    _syslog_address(syslog_address_helper(qp.db().get_config())),
-    _sender(make_unbound_datagram_channel(AF_UNIX)) {
+    _sender(make_unbound_datagram_channel(AF_UNIX)),
+    _semaphore(1) {
 }

 audit_syslog_storage_helper::~audit_syslog_storage_helper() {
@@ -73,10 +76,10 @@ audit_syslog_storage_helper::~audit_syslog_storage_helper() {
 */
 future<> audit_syslog_storage_helper::start(const db::config& cfg) {
    if (this_shard_id() != 0) {
-        return make_ready_future();
+        co_return;
    }

-    return syslog_send_helper(_sender, _syslog_address, "Initializing syslog audit backend.");
+    co_await syslog_send_helper("Initializing syslog audit backend.");
 }

 future<> audit_syslog_storage_helper::stop() {
@@ -106,7 +109,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
                                    audit_info->table(),
                                    username);

-    return syslog_send_helper(_sender, _syslog_address, msg);
+    co_await syslog_send_helper(msg);
 }

 future<> audit_syslog_storage_helper::write_login(const sstring& username,
@@ -125,7 +128,7 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
                                    username,
                                    (error ? "true" : "false"));

-    co_await syslog_send_helper(_sender, _syslog_address, msg.c_str());
+    co_await syslog_send_helper(msg.c_str());
 }

 using registry = class_registrator<storage_helper, audit_syslog_storage_helper, cql3::query_processor&, service::migration_manager&>;
--- a/audit/audit_syslog_storage_helper.hh
+++ b/audit/audit_syslog_storage_helper.hh
@@ -24,6 +24,9 @@ namespace audit {
 class audit_syslog_storage_helper : public storage_helper {
    socket_address _syslog_address;
    net::datagram_channel _sender;
+    seastar::semaphore _semaphore;
+
+    future<> syslog_send_helper(const sstring& msg);
 public:
    explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
    virtual ~audit_syslog_storage_helper();
--- a/cache_mutation_reader.hh
+++ b/cache_mutation_reader.hh
@@ -123,6 +123,9 @@ class cache_mutation_reader final : public mutation_reader::impl {
    gc_clock::time_point _read_time;
    gc_clock::time_point _gc_before;

+    api::timestamp_type _max_purgeable_timestamp = api::missing_timestamp;
+    api::timestamp_type _max_purgeable_timestamp_shadowable = api::missing_timestamp;
+
    future<> do_fill_buffer();
    future<> ensure_underlying();
    void copy_from_cache_to_buffer();
@@ -207,6 +210,11 @@ class cache_mutation_reader final : public mutation_reader::impl {
        return gc_clock::time_point::min();
    }

+    bool can_gc(tombstone t, is_shadowable is) const {
+        const auto max_purgeable = is ? _max_purgeable_timestamp_shadowable : _max_purgeable_timestamp;
+        return t.timestamp < max_purgeable;
+    }
+
 public:
    cache_mutation_reader(schema_ptr s,
                               dht::decorated_key dk,
@@ -228,8 +236,19 @@ public:
        , _read_time(get_read_time())
        , _gc_before(get_gc_before(*_schema, dk, _read_time))
    {
-        clogger.trace("csm {}: table={}.{}, reversed={}, snap={}", fmt::ptr(this), _schema->ks_name(), _schema->cf_name(), _read_context.is_reversed(),
-                      fmt::ptr(&*_snp));
+        _max_purgeable_timestamp = ctx.get_max_purgeable(dk, is_shadowable::no);
+        _max_purgeable_timestamp_shadowable = ctx.get_max_purgeable(dk, is_shadowable::yes);
+
+        clogger.trace("csm {}: table={}.{}, dk={}, gc-before={}, max-purgeable-regular={}, max-purgeable-shadowable={}, reversed={}, snap={}",
+                fmt::ptr(this),
+                _schema->ks_name(),
+                _schema->cf_name(),
+                dk,
+                _gc_before,
+                _max_purgeable_timestamp,
+                _max_purgeable_timestamp_shadowable,
+                _read_context.is_reversed(),
+                fmt::ptr(&*_snp));
        push_mutation_fragment(*_schema, _permit, partition_start(std::move(dk), _snp->partition_tombstone()));
    }
    cache_mutation_reader(schema_ptr s,
@@ -787,12 +806,12 @@ void cache_mutation_reader::copy_from_cache_to_buffer() {
            t.apply(range_tomb);

            auto row_tomb_expired = [&](row_tombstone tomb) {
-                return (tomb && tomb.max_deletion_time() < _gc_before);
+                return (tomb && tomb.max_deletion_time() < _gc_before && can_gc(tomb.tomb(), tomb.is_shadowable()));
            };

            auto is_row_dead = [&](const deletable_row& row) {
                auto& m = row.marker();
-                return (!m.is_missing() && m.is_dead(_read_time) && m.deletion_time() < _gc_before);
+                return (!m.is_missing() && m.is_dead(_read_time) && m.deletion_time() < _gc_before && can_gc(tombstone(m.timestamp(), m.deletion_time()), is_shadowable::no));
            };

            if (row_tomb_expired(t) || is_row_dead(row)) {
@@ -800,9 +819,11 @@ void cache_mutation_reader::copy_from_cache_to_buffer() {

                _read_context.cache()._tracker.on_row_compacted();

+                auto mutation_can_gc = can_gc_fn([this] (tombstone t, is_shadowable is) { return can_gc(t, is); });
+
                with_allocator(_snp->region().allocator(), [&] {
                    deletable_row row_copy(row_schema, row);
-                    row_copy.compact_and_expire(row_schema, t.tomb(), _read_time, always_gc, _gc_before, nullptr);
+                    row_copy.compact_and_expire(row_schema, t.tomb(), _read_time, mutation_can_gc, _gc_before, nullptr);
                    std::swap(row, row_copy);
                });
                remove_row = row.empty();
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -365,6 +365,9 @@ cdc::topology_description make_new_generation_description(
        const noncopyable_function<std::pair<size_t, uint8_t>(dht::token)>& get_sharding_info,
        const locator::token_metadata_ptr tmptr) {
    const auto tokens = get_tokens(bootstrap_tokens, tmptr);
+    if (tokens.empty()) {
+        on_internal_error(cdc_log, "Attempted to create a CDC generation from an empty list of tokens");
+    }

    utils::chunked_vector<token_range_description> vnode_descriptions;
    vnode_descriptions.reserve(tokens.size());
@@ -1112,7 +1115,9 @@ future<bool> generation_service::legacy_do_handle_cdc_generation(cdc::generation
    auto sys_dist_ks = get_sys_dist_ks();
    auto gen = co_await retrieve_generation_data(gen_id, _sys_ks.local(), *sys_dist_ks, { _token_metadata.get()->count_normal_token_owners() });
    if (!gen) {
-        throw std::runtime_error(fmt::format(
+        // This may happen during raft upgrade when a node gossips about a generation that
+        // was propagated through raft and we didn't apply it yet.
+        throw generation_handling_nonfatal_exception(fmt::format(
            "Could not find CDC generation {} in distributed system tables (current time: {}),"
            " even though some node gossiped about it.",
            gen_id, db_clock::now()));
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -186,7 +186,7 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
    }

    auto ts = to_ts(tp);
-    auto emplaced = _gens.emplace(to_ts(tp), std::nullopt).second;
+    auto [it, emplaced] = _gens.emplace(to_ts(tp), std::nullopt);

    if (_last_stream_timestamp != api::missing_timestamp) {
        auto last_correct_gen = gen_used_at(_last_stream_timestamp);
@@ -201,5 +201,5 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
        }
    }

-    return emplaced;
+    return !it->second;
 }
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -15,6 +15,7 @@
 #include "sstables/sstables_manager.hh"
 #include <memory>
 #include <fmt/ranges.h>
+#include <seastar/core/future.hh>
 #include <seastar/core/metrics.hh>
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/switch_to.hh>
@@ -503,7 +504,7 @@ public:

    virtual ~sstables_task_executor() = default;

-    virtual void release_resources() noexcept override;
+    virtual future<> release_resources() noexcept override;

    virtual future<tasks::task_manager::task::progress> get_progress() const override {
        return compaction_task_impl::get_progress(_compaction_data, _progress_monitor);
@@ -788,9 +789,10 @@ compaction::compaction_state::~compaction_state() {
    compaction_done.broken();
 }

-void sstables_task_executor::release_resources() noexcept {
+future<> sstables_task_executor::release_resources() noexcept {
    _cm._stats.pending_tasks -= _sstables.size() - (_state == state::pending);
    _sstables = {};
+    return make_ready_future();
 }

 future<compaction_manager::compaction_stats_opt> compaction_task_executor::run_compaction() noexcept {
@@ -1565,10 +1567,10 @@ public:
        , _can_purge(can_purge)
    {}

-    virtual void release_resources() noexcept override {
+    virtual future<> release_resources() noexcept override {
        _compacting.release_all();
        _owned_ranges_ptr = nullptr;
-        sstables_task_executor::release_resources();
+        co_await sstables_task_executor::release_resources();
    }

 protected:
@@ -1846,11 +1848,12 @@ public:

    virtual ~cleanup_sstables_compaction_task_executor() = default;

-    virtual void release_resources() noexcept override {
+    virtual future<> release_resources() noexcept override {
        _cm._stats.pending_tasks -= _pending_cleanup_jobs.size();
        _pending_cleanup_jobs = {};
        _compacting.release_all();
        _owned_ranges_ptr = nullptr;
+        return make_ready_future();
    }

    virtual future<tasks::task_manager::task::progress> get_progress() const override {
--- a/compound.hh
+++ b/compound.hh
@@ -255,6 +255,9 @@ public:
    // Returns true iff given prefix has no missing components
    bool is_full(managed_bytes_view v) const {
        SCYLLA_ASSERT(AllowPrefixes == allow_prefixes::yes);
+        if (_types.size() == 0) {
+            return v.empty();
+        }
        return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
    }
    bool is_empty(managed_bytes_view v) const {
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -677,7 +677,9 @@ maintenance_socket: ignore
 # Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
 # enable_create_table_with_compact_storage: false

-# Enable tablets for new keyspaces.
+# Control tablets for new keyspaces.
+# Can be set to: disabled|enabled
+#
 # When enabled, newly created keyspaces will have tablets enabled by default.
 # That can be explicitly disabled in the CREATE KEYSPACE query
 # by using the `tablets = {'enabled': false}` replication option.
@@ -686,6 +688,15 @@ maintenance_socket: ignore
 # unless tablets are explicitly enabled in the CREATE KEYSPACE query
 # by using the `tablets = {'enabled': true}` replication option.
 #
+# When set to `enforced`, newly created keyspaces will always have tablets enabled by default.
+# This prevents explicitly disabling tablets in the CREATE KEYSPACE query
+# using the `tablets = {'enabled': false}` replication option.
+# It also mandates a replication strategy supporting tablets, like
+# NetworkTopologyStrategy
+#
 # Note that creating keyspaces with tablets enabled or disabled is irreversible.
 # The `tablets` option cannot be changed using `ALTER KEYSPACE`.
-enable_tablets: true
+tablets_mode_for_new_keyspaces: enabled
+
+# Enforce RF-rack-valid keyspaces.
+rf_rack_valid_keyspaces: false
--- a/configure.py
+++ b/configure.py
@@ -813,6 +813,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/rjson.cc',
                'utils/human_readable.cc',
                'utils/histogram_metrics_helper.cc',
+                'utils/io-wrappers.cc',
                'utils/on_internal_error.cc',
                'utils/pretty_printers.cc',
                'utils/stream_compressor.cc',
@@ -1001,6 +1002,7 @@ scylla_core = (['message/messaging_service.cc',
                'db/extensions.cc',
                'db/heat_load_balance.cc',
                'db/large_data_handler.cc',
+                'db/corrupt_data_handler.cc',
                'db/marshal/type_parser.cc',
                'db/batchlog_manager.cc',
                'db/tags/utils.cc',
@@ -1099,7 +1101,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/lister.cc',
                'repair/repair.cc',
                'repair/row_level.cc',
-                'repair/table_check.cc',
+                'streaming/table_check.cc',
                'exceptions/exceptions.cc',
                'auth/allow_all_authenticator.cc',
                'auth/allow_all_authorizer.cc',
@@ -1321,6 +1323,7 @@ idls = ['idl/gossip_digest.idl.hh',
        'idl/replica_exception.idl.hh',
        'idl/per_partition_rate_limit_info.idl.hh',
        'idl/position_in_partition.idl.hh',
+        'idl/full_position.idl.hh',
        'idl/experimental/broadcast_tables_lang.idl.hh',
        'idl/storage_service.idl.hh',
        'idl/join_node.idl.hh',
@@ -1338,6 +1341,7 @@ scylla_tests_generic_dependencies = [
    'test/lib/test_utils.cc',
    'test/lib/tmpdir.cc',
    'test/lib/sstable_run_based_compaction_strategy_for_tests.cc',
+    'test/lib/eventually.cc',
 ]

 scylla_tests_dependencies = scylla_core + alternator + idls + scylla_tests_generic_dependencies + [
@@ -1379,6 +1383,7 @@ scylla_perfs = ['test/perf/perf_alternator.cc',
                'test/lib/key_utils.cc',
                'test/lib/random_schema.cc',
                'test/lib/data_model.cc',
+                'test/lib/eventually.cc',
                'seastar/tests/perf/linux_perf_event.cc']

 deps = {
@@ -1564,7 +1569,7 @@ deps['test/boost/linearizing_input_stream_test'] = [
    "test/boost/linearizing_input_stream_test.cc",
    "test/lib/log.cc",
 ]
-deps['test/boost/expr_test'] = ['test/boost/expr_test.cc', 'test/lib/expr_test_utils.cc'] + scylla_core
+deps['test/boost/expr_test'] = ['test/boost/expr_test.cc', 'test/lib/expr_test_utils.cc'] + scylla_core + alternator
 deps['test/boost/rate_limiter_test'] = ['test/boost/rate_limiter_test.cc', 'db/rate_limiter.cc']
 deps['test/boost/exceptions_optimized_test'] = ['test/boost/exceptions_optimized_test.cc', 'utils/exceptions.cc']
 deps['test/boost/exceptions_fallback_test'] = ['test/boost/exceptions_fallback_test.cc', 'utils/exceptions.cc']
@@ -1573,16 +1578,16 @@ deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
 deps['test/boost/schema_loader_test'] += ['tools/schema_loader.cc', 'tools/read_mutation.cc']
 deps['test/boost/rust_test'] += ['rust/inc/src/lib.rs']

-deps['test/raft/replication_test'] = ['test/raft/replication_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
-deps['test/raft/raft_server_test'] = ['test/raft/raft_server_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
+deps['test/raft/replication_test'] = ['test/raft/replication_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc', 'test/lib/eventually.cc'] + scylla_raft_dependencies
+deps['test/raft/raft_server_test'] = ['test/raft/raft_server_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc', 'test/lib/eventually.cc'] + scylla_raft_dependencies
 deps['test/raft/randomized_nemesis_test'] = ['test/raft/randomized_nemesis_test.cc', 'direct_failure_detector/failure_detector.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
 deps['test/raft/failure_detector_test'] = ['test/raft/failure_detector_test.cc', 'direct_failure_detector/failure_detector.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
-deps['test/raft/many_test'] = ['test/raft/many_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
+deps['test/raft/many_test'] = ['test/raft/many_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc', 'test/lib/eventually.cc'] + scylla_raft_dependencies
 deps['test/raft/fsm_test'] =  ['test/raft/fsm_test.cc', 'test/raft/helpers.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
 deps['test/raft/etcd_test'] =  ['test/raft/etcd_test.cc', 'test/raft/helpers.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
 deps['test/raft/raft_sys_table_storage_test'] = ['test/raft/raft_sys_table_storage_test.cc'] + \
-    scylla_core + scylla_tests_generic_dependencies
-deps['test/boost/address_map_test'] = ['test/boost/address_map_test.cc'] + scylla_core
+    scylla_core + alternator + scylla_tests_generic_dependencies
+deps['test/boost/address_map_test'] = ['test/boost/address_map_test.cc'] + scylla_core + alternator
 deps['test/raft/discovery_test'] =  ['test/raft/discovery_test.cc',
                                     'test/raft/helpers.cc',
                                     'test/lib/log.cc',
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -709,17 +709,23 @@ batchStatement returns [std::unique_ptr<cql3::statements::raw::batch_statement>
    : K_BEGIN
      ( K_UNLOGGED { type = btype::UNLOGGED; } | K_COUNTER { type = btype::COUNTER; } )?
      K_BATCH ( usingClause[attrs] )?
-          ( s=batchStatementObjective ';'? { statements.push_back(std::move(s)); } )*
+          ( s=batchStatementObjective ';'?
+              {
+                  auto&& stmt = *$s.statement;
+                  stmt->add_raw(sstring{$s.text});
+                  statements.push_back(std::move(stmt));
+              } )*
      K_APPLY K_BATCH
      {
          $expr = std::make_unique<cql3::statements::raw::batch_statement>(type, std::move(attrs), std::move(statements));
      }
    ;

-batchStatementObjective returns [std::unique_ptr<cql3::statements::raw::modification_statement> statement]
-    : i=insertStatement  { $statement = std::move(i); }
-    | u=updateStatement  { $statement = std::move(u); }
-    | d=deleteStatement  { $statement = std::move(d); }
+batchStatementObjective returns [::lw_shared_ptr<std::unique_ptr<cql3::statements::raw::modification_statement>> statement]
+    @init { using original_ret_type = std::unique_ptr<cql3::statements::raw::modification_statement>; }
+    : i=insertStatement  { $statement = make_lw_shared<original_ret_type>(std::move(i)); }
+    | u=updateStatement  { $statement = make_lw_shared<original_ret_type>(std::move(u)); }
+    | d=deleteStatement  { $statement = make_lw_shared<original_ret_type>(std::move(d)); }
    ;

 dropAggregateStatement returns [std::unique_ptr<cql3::statements::drop_aggregate_statement> expr]
--- a/cql3/statements/alter_keyspace_statement.cc
+++ b/cql3/statements/alter_keyspace_statement.cc
@@ -13,6 +13,7 @@
 #include <seastar/core/on_internal_error.hh>
 #include <stdexcept>
 #include "alter_keyspace_statement.hh"
+#include "locator/tablets.hh"
 #include "prepared_statement.hh"
 #include "service/migration_manager.hh"
 #include "service/storage_proxy.hh"
@@ -25,6 +26,9 @@
 #include "create_keyspace_statement.hh"
 #include "gms/feature_service.hh"
 #include "replica/database.hh"
+#include "db/config.hh"
+
+using namespace std::string_literals;

 static logging::logger mylogger("alter_keyspace");

@@ -193,9 +197,9 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
        event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
        auto ks = qp.db().find_keyspace(_name);
        auto ks_md = ks.metadata();
-        const auto& tm = *qp.proxy().get_token_metadata_ptr();
+        const auto tmptr = qp.proxy().get_token_metadata_ptr();
        const auto& feat = qp.proxy().features();
-        auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, tm, feat);
+        auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, *tmptr, feat);
        std::vector<mutation> muts;
        std::vector<sstring> warnings;
        bool include_tablet_options = _attrs->get_map(_attrs->KW_TABLETS).has_value();
@@ -206,6 +210,25 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
        auto ts = mc.write_timestamp();
        auto global_request_id = mc.new_group0_state_id();

+        // #22688 - filter out any dc*:0 entries - consider these
+        // null and void (removed). Migration planning will treat it
+        // as dc*=0 still.
+        std::erase_if(ks_options, [](const auto& i) {
+            static constexpr std::string replication_prefix = ks_prop_defs::KW_REPLICATION + ":"s;
+            // Flattened map, replication entries starts with "replication:".
+            // Only valid options are replication_factor, class and per-dc rf:s. We want to
+            // filter out any dcN=0 entries.
+            auto& [key, val] = i;
+            if (key.starts_with(replication_prefix) && val == "0") {
+                std::string_view v(key);
+                v.remove_prefix(replication_prefix.size());
+                return v != ks_prop_defs::REPLICATION_FACTOR_KEY 
+                    && v != ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY
+                    ;
+            }
+            return false;
+        });
+
        // we only want to run the tablets path if there are actually any tablets changes, not only schema changes
        // TODO: the current `if (changes_tablets(qp))` is insufficient: someone may set the same RFs as before,
        //       and we'll unnecessarily trigger the processing path for ALTER tablets KS,
@@ -246,6 +269,36 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
            muts.insert(muts.begin(), schema_mutations.begin(), schema_mutations.end());
        }

+        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
+        // would lead to an RF-rack-valid keyspace. Verify that this change does not.
+        // For more context, see: scylladb/scylladb#23071.
+        if (qp.db().get_config().rf_rack_valid_keyspaces()) {
+            auto rs = locator::abstract_replication_strategy::create_replication_strategy(
+                    ks_md_update->strategy_name(),
+                    locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets()));
+
+            try {
+                // There are two things to note here:
+                // 1. We hold a group0_guard, so it's correct to check this here.
+                //    The topology or schema cannot change while we're performing this query.
+                // 2. The replication strategy we use here does NOT represent the actual state
+                //    we will arrive at after applying the schema change. For instance, if the user
+                //    did not specify the RF for some of the DCs, it's equal to 0 in the replication
+                //    strategy we pass to this function, while in reality that means that the RF
+                //    will NOT change. That is not a problem:
+                //    - RF=0 is valid for all DCs, so it won't trigger an exception on its own,
+                //    - the keyspace must've been RF-rack-valid before this change. We check that
+                //      condition for all keyspaces at startup.
+                //    The second hyphen is not really true because currently topological changes can
+                //    disturb it (see scylladb/scylladb#23345), but we ignore that.
+                locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
+            } catch (const std::exception& e) {
+                // There's no guarantee what the type of the exception will be, so we need to
+                // wrap it manually here in a type that can be passed to the user.
+                throw exceptions::invalid_request_exception(e.what());
+            }
+        }
+
        auto ret = ::make_shared<event::schema_change>(
                event::schema_change::change_type::UPDATED,
                target_type,
--- a/cql3/statements/alter_table_statement.cc
+++ b/cql3/statements/alter_table_statement.cc
@@ -276,7 +276,7 @@ void alter_table_statement::drop_column(const query_options& options, const sche
    }
 }

-std::pair<schema_builder, std::vector<view_ptr>> alter_table_statement::prepare_schema_update(data_dictionary::database db, const query_options& options) const {
+std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_schema_update(data_dictionary::database db, const query_options& options) const {
    auto s = validation::validate_column_family(db, keyspace(), column_family());
    if (s->is_view()) {
        throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View");
@@ -369,41 +369,45 @@ std::pair<schema_builder, std::vector<view_ptr>> alter_table_statement::prepare_

            validate_column_rename(db, *s, *from, *to);
            cfm.rename_column(from->name(), to->name());
-
-            // If the view includes a renamed column, it must be renamed in
-            // the view table and the definition.
-            for (auto&& view : cf.views()) {
+        }
+        // New view schemas contain the new column names, so we need to base them on the
+        // new base schema.
+        schema_ptr new_base_schema = cfm.build();
+        // If the view includes a renamed column, it must be renamed in
+        // the view table and the definition.
+        for (auto&& view : cf.views()) {
+            schema_builder builder(view);
+            std::vector<std::pair<::shared_ptr<column_identifier>, ::shared_ptr<column_identifier>>> view_renames;
+            for (auto&& entry : _renames) {
+                auto from = entry.first->prepare_column_identifier(*s);
                if (view->get_column_definition(from->name())) {
-                    schema_builder builder(view);
-
                    auto view_from = entry.first->prepare_column_identifier(*view);
                    auto view_to = entry.second->prepare_column_identifier(*view);
                    validate_column_rename(db, *view, *view_from, *view_to);
                    builder.rename_column(view_from->name(), view_to->name());
-
-                    auto new_where = util::rename_column_in_where_clause(
-                            view->view_info()->where_clause(),
-                            column_identifier::raw(view_from->text(), true),
-                            column_identifier::raw(view_to->text(), true),
-                            cql3::dialect{});
-                    builder.with_view_info(view->view_info()->base_id(), view->view_info()->base_name(),
-                            view->view_info()->include_all_columns(), std::move(new_where));
-
-                    view_updates.push_back(view_ptr(builder.build()));
+                    view_renames.emplace_back(view_from, view_to);
                }
            }
+            if (!view_renames.empty()) {
+                auto new_where = util::rename_columns_in_where_clause(
+                        view->view_info()->where_clause(),
+                        view_renames,
+                        cql3::dialect{});
+                builder.with_view_info(new_base_schema, view->view_info()->include_all_columns(), std::move(new_where));
+                view_updates.push_back(view_ptr(builder.build()));
+            }
        }
-        break;
+        return make_pair(std::move(new_base_schema), std::move(view_updates));
    }

-    return make_pair(std::move(cfm), std::move(view_updates));
+    return make_pair(cfm.build(), std::move(view_updates));
 }

 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>
 alter_table_statement::prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type ts) const {
  data_dictionary::database db = qp.db();
-  auto [cfm, view_updates] = prepare_schema_update(db, options);
-  auto m = co_await service::prepare_column_family_update_announcement(qp.proxy(), cfm.build(), std::move(view_updates), ts);
+  auto [s, view_updates] = prepare_schema_update(db, options);
+  auto m = co_await service::prepare_column_family_update_announcement(qp.proxy(), std::move(s), std::move(view_updates), ts);

  using namespace cql_transport;
  auto ret = ::make_shared<event::schema_change>(
--- a/cql3/statements/alter_table_statement.hh
+++ b/cql3/statements/alter_table_statement.hh
@@ -69,7 +69,7 @@ private:
    void add_column(const query_options& options, const schema& schema, data_dictionary::table cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
    void alter_column(const query_options& options, const schema& schema, data_dictionary::table cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
    void drop_column(const query_options& options, const schema& schema, data_dictionary::table cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
-    std::pair<schema_builder, std::vector<view_ptr>> prepare_schema_update(data_dictionary::database db, const query_options& options) const;
+    std::pair<schema_ptr, std::vector<view_ptr>> prepare_schema_update(data_dictionary::database db, const query_options& options) const;
 };

 class alter_table_statement::raw_statement : public raw::cf_statement {
--- a/cql3/statements/create_index_statement.cc
+++ b/cql3/statements/create_index_statement.cc
@@ -87,6 +87,9 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
                "Secondary indexes are not supported on COMPACT STORAGE tables that have clustering columns");
    }

+    if (!db.features().views_with_tablets && db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
+        throw exceptions::invalid_request_exception(format("Secondary indexes are not supported on base tables with tablets (keyspace '{}')", keyspace()));
+    }
    validate_for_local_index(*schema);

    std::vector<::shared_ptr<index_target>> targets;
--- a/cql3/statements/create_keyspace_statement.cc
+++ b/cql3/statements/create_keyspace_statement.cc
@@ -11,6 +11,8 @@
 #include <seastar/core/coroutine.hh>
 #include "cql3/statements/create_keyspace_statement.hh"
 #include "cql3/statements/ks_prop_defs.hh"
+#include "exceptions/exceptions.hh"
+#include "locator/tablets.hh"
 #include "prepared_statement.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "data_dictionary/keyspace_metadata.hh"
@@ -90,14 +92,14 @@ void create_keyspace_statement::validate(query_processor& qp, const service::cli

 future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>> create_keyspace_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
    using namespace cql_transport;
-    const auto& tm = *qp.proxy().get_token_metadata_ptr();
+    const auto tmptr = qp.proxy().get_token_metadata_ptr();
    const auto& feat = qp.proxy().features();
    const auto& cfg = qp.db().get_config();
    std::vector<mutation> m;
    std::vector<sstring> warnings;

    try {
-        auto ksm = _attrs->as_ks_metadata(_name, tm, feat, cfg);
+        auto ksm = _attrs->as_ks_metadata(_name, *tmptr, feat, cfg);
        m = service::prepare_new_keyspace_announcement(qp.db().real_database(), ksm, ts);
        // If the new keyspace uses tablets, as long as there are features
        // which aren't supported by tablets we want to warn the user that
@@ -111,10 +113,24 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector
        if (rs->uses_tablets()) {
            warnings.push_back(
                "Tables in this keyspace will be replicated using Tablets "
-                "and will not support CDC, LWT and counters features. "
-                "To use CDC, LWT or counters, drop this keyspace and re-create it "
-                "without tablets by adding AND TABLETS = {'enabled': false} "
-                "to the CREATE KEYSPACE statement.");
+                "and will not support Materialized Views, Secondary Indexes, CDC, LWT and counters features. "
+                "To use Materialized Views, Secondary Indexes, CDC, LWT or counters, drop this keyspace and re-create it "
+                "without tablets by adding AND TABLETS = {'enabled': false} to the CREATE KEYSPACE statement.");
+        }
+
+        // If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
+        // Verify that it's RF-rack-valid.
+        // For more context, see: scylladb/scylladb#23071.
+        if (cfg.rf_rack_valid_keyspaces()) {
+            try {
+                // We hold a group0_guard, so it's correct to check this here.
+                // The topology or schema cannot change while we're performing this query.
+                locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
+            } catch (const std::exception& e) {
+                // There's no guarantee what the type of the exception will be, so we need to
+                // wrap it manually here in a type that can be passed to the user.
+                throw exceptions::invalid_request_exception(e.what());
+            }
        }
    } catch (const exceptions::already_exists_exception& e) {
        if (!_if_not_exists) {
@@ -217,9 +233,6 @@ std::vector<sstring> check_against_restricted_replication_strategies(
    // We ignore errors (non-number, negative number, etc.) here,
    // these are checked and reported elsewhere.
    for (auto opt : attrs.get_replication_options()) {
-        if (opt.first == sstring("initial_tablets")) {
-            continue;
-        }
        try {
            auto rf = std::stol(opt.second);
            if (rf > 0) {
--- a/cql3/statements/create_view_statement.cc
+++ b/cql3/statements/create_view_statement.cc
@@ -140,6 +140,9 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(

    schema_ptr schema = validation::validate_column_family(db, _base_name.get_keyspace(), _base_name.get_column_family());

+    if (!db.features().views_with_tablets && db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
+        throw exceptions::invalid_request_exception(format("Materialized views are not supported on base tables with tablets"));
+    }
    if (schema->is_counter()) {
        throw exceptions::invalid_request_exception(format("Materialized views are not supported on counter tables"));
    }
@@ -364,7 +367,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
    }

    auto where_clause_text = util::relations_to_where_clause(_where_clause);
-    builder.with_view_info(schema->id(), schema->cf_name(), included.empty(), std::move(where_clause_text));
+    builder.with_view_info(schema, included.empty(), std::move(where_clause_text));

    return std::make_pair(view_ptr(builder.build()), std::move(warnings));
 }
--- a/cql3/statements/ks_prop_defs.cc
+++ b/cql3/statements/ks_prop_defs.cc
@@ -70,6 +70,16 @@ static std::map<sstring, sstring> prepare_options(
        }
    }

+    // #22688 / #20039 - check for illegal, empty options (after above expand)
+    // moved to here. We want to be able to remove dc:s once rf=0, 
+    // in which case, the options actually serialized in result mutations
+    // will in extreme cases in fact be empty -> cannot do this check in 
+    // verify_options. We only want to apply this constraint on the input
+    // provided by the user
+    if (options.empty() && !tm.get_topology().get_datacenters().empty()) {
+        throw exceptions::configuration_exception("Configuration for at least one datacenter must be present");
+    }
+
    return options;
 }

@@ -140,7 +150,7 @@ data_dictionary::storage_options ks_prop_defs::get_storage_options() const {
    return opts;
 }

-std::optional<unsigned> ks_prop_defs::get_initial_tablets(std::optional<unsigned> default_value) const {
+std::optional<unsigned> ks_prop_defs::get_initial_tablets(std::optional<unsigned> default_value, bool enforce_tablets) const {
    auto tablets_options = get_map(KW_TABLETS);
    if (!tablets_options) {
        return default_value;
@@ -155,6 +165,9 @@ std::optional<unsigned> ks_prop_defs::get_initial_tablets(std::optional<unsigned
        if (enabled == "true") {
            // nothing
        } else if (enabled == "false") {
+            if (enforce_tablets) {
+                throw exceptions::configuration_exception("Cannot disable tablets for keyspace since tablets are enforced using the `tablets_mode_for_new_keyspaces: enforced` config option.");
+            }
            return std::nullopt;
        } else {
            throw exceptions::configuration_exception(sstring("Tablets enabled value must be true or false; found: ") + enabled);
@@ -189,8 +202,10 @@ bool ks_prop_defs::get_durable_writes() const {
 lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata& tm, const gms::feature_service& feat, const db::config& cfg) {
    auto sc = get_replication_strategy_class().value();
    // if tablets options have not been specified, but tablets are globally enabled, set the value to 0 for N.T.S. only
-    auto enable_tablets = feat.tablets && cfg.enable_tablets();
-    auto initial_tablets = get_initial_tablets(enable_tablets && locator::abstract_replication_strategy::to_qualified_class_name(sc) == "org.apache.cassandra.locator.NetworkTopologyStrategy" ? std::optional<unsigned>(0) : std::nullopt);
+    auto enable_tablets = feat.tablets && cfg.enable_tablets_by_default();
+    std::optional<unsigned> default_initial_tablets = enable_tablets && locator::abstract_replication_strategy::to_qualified_class_name(sc) == "org.apache.cassandra.locator.NetworkTopologyStrategy"
+            ? std::optional<unsigned>(0) : std::nullopt;
+    auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
    auto options = prepare_options(sc, tm, get_replication_options());
    return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
            std::move(options), initial_tablets, get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
--- a/cql3/statements/ks_prop_defs.hh
+++ b/cql3/statements/ks_prop_defs.hh
@@ -60,7 +60,7 @@ public:
    void validate();
    std::map<sstring, sstring> get_replication_options() const;
    std::optional<sstring> get_replication_strategy_class() const;
-    std::optional<unsigned> get_initial_tablets(std::optional<unsigned> default_value) const;
+    std::optional<unsigned> get_initial_tablets(std::optional<unsigned> default_value, bool enforce_tablets = false) const;
    data_dictionary::storage_options get_storage_options() const;
    bool get_durable_writes() const;
    lw_shared_ptr<data_dictionary::keyspace_metadata> as_ks_metadata(sstring ks_name, const locator::token_metadata&, const gms::feature_service&, const db::config&);
--- a/cql3/util.cc
+++ b/cql3/util.cc
@@ -144,26 +144,29 @@ expr::expression where_clause_to_relations(const std::string_view& where_clause,
    return do_with_parser(where_clause, d, std::mem_fn(&cql3_parser::CqlParser::whereClause));
 }

-sstring rename_column_in_where_clause(const std::string_view& where_clause, column_identifier::raw from, column_identifier::raw to, dialect d) {
+sstring rename_columns_in_where_clause(const std::string_view& where_clause, std::vector<std::pair<::shared_ptr<column_identifier>, ::shared_ptr<column_identifier>>> renames, dialect d) {
    std::vector<expr::expression> relations = boolean_factors(where_clause_to_relations(where_clause, d));
    std::vector<expr::expression> new_relations;
    new_relations.reserve(relations.size());

    for (const expr::expression& old_relation : relations) {
-        expr::expression new_relation = expr::search_and_replace(old_relation,
-            [&](const expr::expression& e) -> std::optional<expr::expression> {
-                if (auto ident = expr::as_if<expr::unresolved_identifier>(&e)) {
-                    if (*ident->ident == from) {
-                        return expr::unresolved_identifier{
-                            ::make_shared<column_identifier::raw>(to)
-                        };
+        new_relations.emplace_back(
+            expr::search_and_replace(old_relation,
+                [&](const expr::expression& e) -> std::optional<expr::expression> {
+                    for (const auto& [view_from, view_to] : renames) {
+                        if (auto ident = expr::as_if<expr::unresolved_identifier>(&e)) {
+                            auto from = column_identifier::raw(view_from->text(), true);
+                            if (*ident->ident == from) {
+                                return expr::unresolved_identifier{
+                                    ::make_shared<column_identifier::raw>(view_to->text(), true)
+                                };
+                            }
+                        }
                    }
+                    return std::nullopt;
                }
-                return std::nullopt;
-            }
+            )
        );
-
-        new_relations.emplace_back(std::move(new_relation));
    }

    return relations_to_where_clause(expr::conjunction{std::move(new_relations)});
--- a/cql3/util.hh
+++ b/cql3/util.hh
@@ -40,7 +40,7 @@ sstring relations_to_where_clause(const expr::expression& e);

 expr::expression where_clause_to_relations(const std::string_view& where_clause, dialect d);

-sstring rename_column_in_where_clause(const std::string_view& where_clause, column_identifier::raw from, column_identifier::raw to, dialect d);
+sstring rename_columns_in_where_clause(const std::string_view& where_clause, std::vector<std::pair<::shared_ptr<column_identifier>, ::shared_ptr<column_identifier>>> renames, dialect d);

 /// build a CQL "select" statement with the desired parameters.
 /// If select_all_columns==true, all columns are selected and the value of
--- a/db/CMakeLists.txt
+++ b/db/CMakeLists.txt
@@ -27,6 +27,7 @@ target_sources(db
    extensions.cc
    heat_load_balance.cc
    large_data_handler.cc
+    corrupt_data_handler.cc
    marshal/type_parser.cc
    batchlog_manager.cc
    tags/utils.cc
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -38,7 +38,7 @@

 static logging::logger blogger("batchlog_manager");

-const uint32_t db::batchlog_manager::replay_interval;
+const std::chrono::seconds db::batchlog_manager::replay_interval;
 const uint32_t db::batchlog_manager::page_size;

 db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
@@ -117,7 +117,8 @@ future<> db::batchlog_manager::batchlog_replay_loop() {
        } catch (...) {
            blogger.error("Exception in batch replay: {}", std::current_exception());
        }
-        delay = std::chrono::milliseconds(replay_interval);
+        delay = utils::get_local_injector().is_enabled("short_batchlog_manager_replay_interval") ?
+                std::chrono::seconds(1) : replay_interval;
    }
 }

@@ -133,6 +134,8 @@ future<> db::batchlog_manager::drain() {
        _sem.broken();
    }

+    co_await _qp.proxy().abort_batch_writes();
+
    co_await std::move(_loop_done);
    blogger.info("Drained");
 }
@@ -174,6 +177,11 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
            return make_ready_future<stop_iteration>(stop_iteration::no);
        }

+        if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
+            blogger.debug("Skipping batch replay due to skip_batch_replay injection");
+            return make_ready_future<stop_iteration>(stop_iteration::no);
+        }
+
        // check version of serialization format
        if (!row.has("version")) {
            blogger.warn("Skipping logged batch because of unknown version");
@@ -243,7 +251,8 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
                // send to partially or wholly fail in actually sending stuff. Since we don't
                // have hints (yet), send with CL=ALL, and hope we can re-do this soon.
                // See below, we use retry on write failure.
-                return _qp.proxy().mutate(mutations, db::consistency_level::ALL, db::no_timeout, nullptr, empty_service_permit(), db::allow_per_partition_rate_limit::no);
+                auto timeout = db::timeout_clock::now() + write_timeout;
+                return _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
            });
        }).then_wrapped([this, id](future<> batch_result) {
            try {
--- a/db/batchlog_manager.hh
+++ b/db/batchlog_manager.hh
@@ -43,8 +43,9 @@ public:
    using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;

 private:
-    static constexpr uint32_t replay_interval = 60 * 1000; // milliseconds
+    static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
    static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
+    static constexpr std::chrono::seconds write_timeout = std::chrono::seconds(300);

    using clock_type = lowres_clock;

--- a/db/config.cc
+++ b/db/config.cc
@@ -238,6 +238,13 @@ const config_type& config_type_for<enum_option<db::tri_mode_restriction_t>>() {
    return ct;
 }

+template <>
+const config_type& config_type_for<enum_option<db::tablets_mode_t>>() {
+    static config_type ct(
+        "tablets mode", printable_to_json<enum_option<db::tablets_mode_t>>);
+    return ct;
+}
+
 template <>
 const config_type& config_type_for<db::config::hinted_handoff_enabled_type>() {
    static config_type ct("hinted handoff enabled", hinted_handoff_enabled_to_json);
@@ -372,6 +379,23 @@ public:
    }
 };

+template <>
+class convert<enum_option<db::tablets_mode_t>> {
+public:
+    static bool decode(const Node& node, enum_option<db::tablets_mode_t>& rhs) {
+        std::string name;
+        if (!convert<std::string>::decode(node, name)) {
+            return false;
+        }
+        try {
+            std::istringstream(name) >> rhs;
+        } catch (boost::program_options::invalid_option_value&) {
+            return false;
+        }
+        return true;
+    }
+};
+
 template<>
 struct convert<db::config::error_injection_at_startup> {
    static bool decode(const Node& node, db::config::error_injection_at_startup& rhs) {
@@ -536,6 +560,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "The directory where the schema commit log is stored. This is a special commitlog instance used for schema and system tables. For optimal write performance, it is recommended the commit log be on a separate disk partition (ideally, a separate physical device) from the data file directories.")
    , data_file_directories(this, "data_file_directories", "datadir", value_status::Used, { },
        "The directory location where table data (SSTables) is stored.")
+    , data_file_capacity(this, "data_file_capacity", liveness::LiveUpdate, value_status::Used, 0,
+        "Total capacity in bytes for storing data files. Used by tablet load balancer to compute storage utilization."
+        " If not set, will use file system's capacity.")
    , hints_directory(this, "hints_directory", value_status::Used, "",
        "The directory where hints files are stored if hinted handoff is enabled.")
    , view_hints_directory(this, "view_hints_directory", value_status::Used, "",
@@ -1201,7 +1228,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
            "Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
    , reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
            "Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
-    , reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 1,
+    , reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 2,
            "Admit new reads while there are less than this number of requests that need CPU.")
    , view_update_reader_concurrency_semaphore_serialize_limit_multiplier(this, "view_update_reader_concurrency_semaphore_serialize_limit_multiplier", liveness::LiveUpdate, value_status::Used, 2,
            "Start serializing view update reads after their collective memory consumption goes above $normal_limit * $multiplier.")
@@ -1354,7 +1381,11 @@ db::config::config(std::shared_ptr<db::extensions> exts)

    , error_injections_at_startup(this, "error_injections_at_startup", error_injection_value_status, {}, "List of error injections that should be enabled on startup.")
    , topology_barrier_stall_detector_threshold_seconds(this, "topology_barrier_stall_detector_threshold_seconds", value_status::Used, 2, "Report sites blocking topology barrier if it takes longer than this.")
-    , enable_tablets(this, "enable_tablets", value_status::Used, false, "Enable tablets for newly created keyspaces.")
+    , enable_tablets(this, "enable_tablets", value_status::Used, false, "Enable tablets for newly created keyspaces. (deprecated)")
+    , tablets_mode_for_new_keyspaces(this, "tablets_mode_for_new_keyspaces", value_status::Used, tablets_mode_t::mode::unset, "Control tablets for new keyspaces.  Can be set to the following values:\n"
+            "\tdisabled: New keyspaces use vnodes by default, unless enabled by the tablets={'enabled':true} option\n"
+            "\tenabled:  New keyspaces use tablets by default, unless disabled by the tablets={'disabled':true} option\n"
+            "\tenforced: New keyspaces must use tablets. Tablets cannot be disabled using the CREATE KEYSPACE option")
    , view_flow_control_delay_limit_in_ms(this, "view_flow_control_delay_limit_in_ms", liveness::LiveUpdate, value_status::Used, 1000,
        "The maximal amount of time that materialized-view update flow control may delay responses "
        "to try to slow down the client and prevent buildup of unfinished view updates. "
@@ -1364,6 +1395,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , disk_space_monitor_high_polling_interval_in_seconds(this, "disk_space_monitor_high_polling_interval_in_seconds", value_status::Used, 1, "Disk-space polling interval at or above polling threshold")
    , disk_space_monitor_polling_interval_threshold(this, "disk_space_monitor_polling_interval_threshold", value_status::Used, 0.9, "Disk-space polling threshold. Polling interval is increased when disk utilization is greater than or equal to this threshold")
    , enable_create_table_with_compact_storage(this, "enable_create_table_with_compact_storage", liveness::LiveUpdate, value_status::Used, false, "Enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.  This feature will eventually be removed in a future version.")
+    , rf_rack_valid_keyspaces(this, "rf_rack_valid_keyspaces", liveness::MustRestart, value_status::Used, false,
+        "Enforce RF-rack-valid keyspaces. Additionally, if there are existing RF-rack-invalid "
+        "keyspaces, attempting to start a node with this option ON will fail.")
    , default_log_level(this, "default_log_level", value_status::Used, seastar::log_level::info, "Default log level for log messages")
    , logger_log_level(this, "logger_log_level", value_status::Used, {}, "Map of logger name to log level. Valid log levels are 'error', 'warn', 'info', 'debug' and 'trace'")
    , log_to_stdout(this, "log_to_stdout", value_status::Used, true, "Send log output to stdout")
@@ -1579,6 +1613,16 @@ std::unordered_map<sstring, db::tri_mode_restriction_t::mode> db::tri_mode_restr
            {"warn", db::tri_mode_restriction_t::mode::WARN}};
 }

+std::unordered_map<sstring, db::tablets_mode_t::mode> db::tablets_mode_t::map() {
+    return {{"disabled", db::tablets_mode_t::mode::disabled},
+            {"0", db::tablets_mode_t::mode::disabled},
+            {"enabled", db::tablets_mode_t::mode::enabled},
+            {"1", db::tablets_mode_t::mode::enabled},
+            {"enforced", db::tablets_mode_t::mode::enforced},
+            {"2", db::tablets_mode_t::mode::enforced}
+            };
+}
+
 template struct utils::config_file::named_value<seastar::log_level>;

 namespace utils {
--- a/db/config.hh
+++ b/db/config.hh
@@ -130,6 +130,20 @@ struct replication_strategy_restriction_t {

 constexpr unsigned default_murmur3_partitioner_ignore_msb_bits = 12;

+struct tablets_mode_t {
+    // The `unset` mode is used internally for backward compatibility
+    // with the legacy `enable_tablets` option.
+    // It is defined as -1 as existing test code associates the value
+    // 0 with `false` and 1 with `true` when read from system.config.
+    enum class mode : int8_t {
+        unset = -1,
+        disabled = 0,
+        enabled = 1,
+        enforced = 2
+    };
+    static std::unordered_map<sstring, mode> map(); // for enum_option<>
+};
+
 class config final : public utils::config_file {
 public:
    config();
@@ -183,6 +197,7 @@ public:
    named_value<sstring> commitlog_directory;
    named_value<sstring> schema_commitlog_directory;
    named_value<string_list> data_file_directories;
+    named_value<uint64_t> data_file_capacity;
    named_value<sstring> hints_directory;
    named_value<sstring> view_hints_directory;
    named_value<sstring> saved_caches_directory;
@@ -527,6 +542,23 @@ public:
    named_value<std::vector<error_injection_at_startup>> error_injections_at_startup;
    named_value<double> topology_barrier_stall_detector_threshold_seconds;
    named_value<bool> enable_tablets;
+    named_value<enum_option<tablets_mode_t>> tablets_mode_for_new_keyspaces;
+
+    bool enable_tablets_by_default() const noexcept {
+        switch (tablets_mode_for_new_keyspaces()) {
+        case tablets_mode_t::mode::unset:
+            return enable_tablets();
+        case tablets_mode_t::mode::disabled:
+            return false;
+        case tablets_mode_t::mode::enabled:
+        case tablets_mode_t::mode::enforced:
+            return true;
+        }
+    }
+    bool enforce_tablets() const noexcept {
+        return tablets_mode_for_new_keyspaces() == tablets_mode_t::mode::enforced;
+    }
+
    named_value<uint32_t> view_flow_control_delay_limit_in_ms;

    named_value<int> disk_space_monitor_normal_polling_interval_in_seconds;
@@ -535,6 +567,8 @@ public:

    named_value<bool> enable_create_table_with_compact_storage;

+    named_value<bool> rf_rack_valid_keyspaces;
+
    static const sstring default_tls_priority;
 private:
    template<typename T>
--- a/db/corrupt_data_handler.cc
+++ b/db/corrupt_data_handler.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#include "db/corrupt_data_handler.hh"
+#include "reader_concurrency_semaphore.hh"
+#include "replica/database.hh"
+#include "utils/UUID_gen.hh"
+
+static logging::logger corrupt_data_logger("corrupt_data");
+
+namespace sm = seastar::metrics;
+
+namespace db {
+
+corrupt_data_handler::corrupt_data_handler(register_metrics rm) {
+    if (rm) {
+        _metrics.add_group("corrupt_data", {
+                sm::make_counter("entries_reported", _stats.corrupt_data_reported,
+                               sm::description("Counts the number of corrupt data instances reported to the corrupt data handler. "
+                                               "A non-zero value indicates that the database suffered data corruption."))
+                });
+    }
+}
+
+future<corrupt_data_handler::entry_id> corrupt_data_handler::record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    ++_stats.corrupt_data_reported;
+    ++_stats.corrupt_clustering_rows_reported;
+    return do_record_corrupt_clustering_row(s, pk, std::move(cr), std::move(origin), std::move(sstable_name)).then([this] (entry_id id) {
+        if (id) {
+            ++_stats.corrupt_data_recorded;
+            ++_stats.corrupt_clustering_rows_recorded;
+        }
+        return id;
+    });
+}
+
+system_table_corrupt_data_handler::system_table_corrupt_data_handler(config cfg, register_metrics rm)
+    : corrupt_data_handler(rm)
+    , _entry_ttl(cfg.entry_ttl)
+{
+}
+
+system_table_corrupt_data_handler::~system_table_corrupt_data_handler() {
+}
+
+reader_permit system_table_corrupt_data_handler::make_fragment_permit(const schema& s) {
+    return _fragment_semaphore->make_tracking_only_permit(s.shared_from_this(), "system_table_corrupt_data_handler::make_fragment_permit", db::no_timeout, {});
+}
+
+future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_mutation_fragment(
+        gate::holder permit,
+        const schema& user_table_schema,
+        const partition_key& pk,
+        const clustering_key& ck,
+        mutation_fragment_v2::kind kind,
+        frozen_mutation_fragment_v2 fmf,
+        sstring origin,
+        std::optional<sstring> sstable_name) {
+    const corrupt_data_handler::entry_id id{utils::UUID_gen::get_time_UUID()};
+
+    const auto corrupt_data_schema = _sys_ks->local_db().find_column_family(system_keyspace::NAME, system_keyspace::CORRUPT_DATA).schema();
+
+    // Using the lower-level mutation API to avoid large allocation warnings when linearizing the frozen mutation fragment.
+    mutation entry_mutation(corrupt_data_schema, partition_key::from_exploded(*corrupt_data_schema, {serialized(user_table_schema.ks_name()), serialized(user_table_schema.cf_name())}));
+    auto& entry_row = entry_mutation.partition().clustered_row(*corrupt_data_schema, clustering_key::from_single_value(*corrupt_data_schema, serialized(timeuuid_native_type{id.uuid()})));
+
+    const auto timestamp = api::new_timestamp();
+
+    auto set_cell_raw = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, managed_bytes cell_value) {
+        auto cdef = corrupt_data_schema->get_column_definition(cell_name);
+        SCYLLA_ASSERT(cdef);
+
+        entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value, _entry_ttl));
+    }; 
+
+    auto set_cell = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, data_value cell_value) {
+        auto cdef = corrupt_data_schema->get_column_definition(cell_name);
+        SCYLLA_ASSERT(cdef);
+
+        entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value.serialize_nonnull(), _entry_ttl));
+    };
+
+    entry_row.apply(row_marker(timestamp, _entry_ttl, gc_clock::now() + _entry_ttl));
+    set_cell("partition_key", data_value(to_bytes(pk.representation())));
+    set_cell("clustering_key", data_value(to_bytes(ck.representation())));
+    set_cell("mutation_fragment_kind", fmt::to_string(kind));
+    // FIXME: Exposing knowledge here that bytes are serialized by just storing the raw value.
+    // Need to replace with a fragmented-buffer serialize API call, which we don't have yet.
+    set_cell_raw("frozen_mutation_fragment", std::move(fmf).representation().to_managed_bytes());
+    set_cell("origin", origin);
+    set_cell("sstable_name", sstable_name);
+
+    return _sys_ks->apply_mutation(std::move(entry_mutation)).then([id] {
+        return id;
+    });
+}
+
+future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    if (!_sys_ks) {
+        co_return corrupt_data_handler::entry_id::create_null_id();
+    }
+    auto permit = _gate.hold();
+
+    const auto ck = cr.key();
+    auto fmf = freeze(s, mutation_fragment_v2(s, make_fragment_permit(s), std::move(cr)));
+
+    co_return co_await do_record_corrupt_mutation_fragment(std::move(permit), s, pk, ck, mutation_fragment_v2::kind::clustering_row, std::move(fmf),
+            std::move(origin), std::move(sstable_name));
+}
+
+void system_table_corrupt_data_handler::plug_system_keyspace(db::system_keyspace& sys_ks) noexcept {
+    _sys_ks = sys_ks.shared_from_this();
+    _fragment_semaphore = std::make_unique<reader_concurrency_semaphore>(reader_concurrency_semaphore::no_limits{}, "system_table_corrupt_data_handler", reader_concurrency_semaphore::register_metrics::no);
+}
+
+void system_table_corrupt_data_handler::unplug_system_keyspace() noexcept {
+    _sys_ks = nullptr;
+}
+
+future<> system_table_corrupt_data_handler::stop() noexcept {
+    co_await _gate.close();
+    if (_fragment_semaphore) {
+        co_await _fragment_semaphore->stop();
+    }
+}
+
+future<corrupt_data_handler::entry_id> nop_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
+        clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
+    return make_ready_future<entry_id>(entry_id::create_null_id());
+}
+
+} // namespace db
--- a/db/corrupt_data_handler.hh
+++ b/db/corrupt_data_handler.hh
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "db/system_keyspace.hh"
+#include "utils/UUID.hh"
+
+class reader_concurrency_semaphore;
+class reader_permit;
+
+namespace db {
+
+class corrupt_data_handler {
+public:
+    // An ID identifying the corrupt data entry.
+    // To be interpreted in the context of the storage where it is recorded, see storage_name().
+    using entry_id = utils::tagged_uuid<struct corrupt_data_entry_tag>;
+
+    struct stats {
+        // Counters for the number of corrupt data entries reported.
+        uint64_t corrupt_data_reported = 0;
+        // Counters for the number of corrupt data entries recorded.
+        // Can be less than reported depending on the configuration or if entries failed to be recorded.
+        uint64_t corrupt_data_recorded = 0;
+
+        uint64_t corrupt_clustering_rows_reported = 0;
+        uint64_t corrupt_clustering_rows_recorded = 0;
+    };
+
+private:
+    stats _stats;
+
+    seastar::metrics::metric_groups _metrics;
+
+protected:
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) = 0;
+
+public:
+    using register_metrics = bool_class<struct corrupt_data_handler_register_metrics_tag>;
+    explicit corrupt_data_handler(register_metrics);
+    virtual ~corrupt_data_handler() = default;
+
+    const stats& get_stats() const noexcept {
+        return _stats;
+    }
+
+    // The name of the storage where corrupt data is recorded.
+    // The storage-name and the entry-id together should allow the user to unambiguously locate the entry.
+    virtual sstring storage_name() const noexcept = 0;
+
+    // Record a corrupt clustering row.
+    // If the returned id is null, the row was not recorded.
+    future<entry_id> record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name);
+};
+
+// Stores corrupt data entries in the system.corrupt_data table.
+class system_table_corrupt_data_handler final : public corrupt_data_handler {
+public:
+    struct config {
+        gc_clock::duration entry_ttl;
+    };
+
+private:
+    gc_clock::duration _entry_ttl;
+
+    gate _gate;
+    seastar::shared_ptr<db::system_keyspace> _sys_ks;
+    std::unique_ptr<reader_concurrency_semaphore> _fragment_semaphore;
+
+private:
+    reader_permit make_fragment_permit(const schema& s);
+
+    future<entry_id> do_record_corrupt_mutation_fragment(gate::holder permit, const schema& user_table_schema, const partition_key& pk, const clustering_key& ck,
+            mutation_fragment_v2::kind kind, frozen_mutation_fragment_v2 mf, sstring origin, std::optional<sstring> sstable_name);
+
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
+
+public:
+    explicit system_table_corrupt_data_handler(config, register_metrics);
+    ~system_table_corrupt_data_handler();
+
+    virtual sstring storage_name() const noexcept override {
+        return format("{}.{}", db::system_keyspace::NAME, db::system_keyspace::CORRUPT_DATA);
+    }
+
+    void plug_system_keyspace(db::system_keyspace& sys_ks) noexcept;
+    void unplug_system_keyspace() noexcept;
+
+    future<> stop() noexcept;
+};
+
+// A no-op corrupt data handler that does not record any data.
+class nop_corrupt_data_handler final : public corrupt_data_handler {
+    virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
+
+public:
+    explicit nop_corrupt_data_handler(register_metrics rm)
+        : corrupt_data_handler(rm) {}
+    virtual sstring storage_name() const noexcept override {
+        return "/dev/null";
+    }
+};
+
+} // namespace db
--- a/db/hints/internal/hint_endpoint_manager.cc
+++ b/db/hints/internal/hint_endpoint_manager.cc
@@ -146,6 +146,10 @@ future<> hint_endpoint_manager::stop(drain should_drain) noexcept {
    });
 }

+void hint_endpoint_manager::cancel_draining() noexcept {
+    _sender.cancel_draining();
+}
+
 hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hint_directory, manager& shard_manager)
    : _key(key)
    , _shard_manager(shard_manager)
--- a/db/hints/internal/hint_endpoint_manager.hh
+++ b/db/hints/internal/hint_endpoint_manager.hh
@@ -102,6 +102,8 @@ public:
    /// \return Ready future when all operations are complete
    future<> stop(drain should_drain = drain::no) noexcept;

+    void cancel_draining() noexcept;
+
    /// \brief Start the timer.
    void start();

@@ -144,6 +146,10 @@ public:
        return _state.contains(state::stopped);
    }

+    bool canceled_draining() const noexcept {
+        return _sender.canceled_draining();
+    }
+
    /// \brief Returns replay position of the most recently written hint.
    ///
    /// If there weren't any hints written during this endpoint manager's lifetime, a zero replay_position is returned.
--- a/db/hints/internal/hint_sender.cc
+++ b/db/hints/internal/hint_sender.cc
@@ -10,6 +10,7 @@
 #include "db/hints/internal/hint_sender.hh"

 // Seastar features.
+#include <chrono>
 #include <exception>
 #include <seastar/core/abort_source.hh>
 #include <seastar/core/coroutine.hh>
@@ -192,6 +193,14 @@ future<> hint_sender::stop(drain should_drain) noexcept {
    });
 }

+void hint_sender::cancel_draining() {
+    manager_logger.info("Draining of {} has been marked as canceled", _ep_key);
+    if (_state.contains(state::draining)) {
+        _state.remove(state::draining);
+    }
+    _state.set(state::canceled_draining);
+}
+
 void hint_sender::add_segment(sstring seg_name) {
    _segments_to_replay.emplace_back(std::move(seg_name));
 }
@@ -449,6 +458,8 @@ bool hint_sender::send_one_file(const sstring& fname) {
    gc_clock::duration secs_since_file_mod = std::chrono::seconds(last_mod.tv_sec);
    lw_shared_ptr<send_one_file_ctx> ctx_ptr = make_lw_shared<send_one_file_ctx>(_last_schema_ver_to_column_mapping);

+    struct canceled_draining_exception {};
+
    try {
        commitlog::read_log_file(fname, manager::FILENAME_PREFIX, [this, secs_since_file_mod, &fname, ctx_ptr] (commitlog::buffer_and_replay_position buf_rp) -> future<> {
            auto& buf = buf_rp.buffer;
@@ -461,6 +472,12 @@ bool hint_sender::send_one_file(const sstring& fname) {
                    co_return;
                }

+                if (canceled_draining()) {
+                    manager_logger.debug("[{}] Exiting reading from commitlog because of canceled draining", _ep_key);
+                    // We need to throw an exception here to cancel reading the segment.
+                    throw canceled_draining_exception{};
+                }
+
                // Break early if stop() was called or the destination node went down.
                if (!can_send()) {
                    ctx_ptr->segment_replay_failed = true;
@@ -491,6 +508,8 @@ bool hint_sender::send_one_file(const sstring& fname) {
        manager_logger.error("{}: {}. Dropping...", fname, ex.what());
        ctx_ptr->segment_replay_failed = false;
        ++this->shard_stats().corrupted_files;
+    } catch  (const canceled_draining_exception&) {
+        manager_logger.debug("[{}] Loop in send_one_file finishes due to canceled draining", _ep_key);
    } catch (...) {
        manager_logger.trace("sending of {} failed: {}", fname, std::current_exception());
        ctx_ptr->segment_replay_failed = true;
@@ -499,6 +518,12 @@ bool hint_sender::send_one_file(const sstring& fname) {
    // wait till all background hints sending is complete
    ctx_ptr->file_send_gate.close().get();

+    // If draining was canceled, we can't say anything about the segment's state,
+    // so return immediately. We return false here because of that reason too.
+    if (canceled_draining()) {
+        return false;
+    }
+
    // If we are draining ignore failures and drop the segment even if we failed to send it.
    if (draining() && ctx_ptr->segment_replay_failed) {
        manager_logger.trace("send_one_file(): we are draining so we are going to delete the segment anyway");
@@ -556,6 +581,10 @@ void hint_sender::send_hints_maybe() noexcept {

    try {
        while (true) {
+            if (canceled_draining()) {
+                manager_logger.debug("[{}] Exiting loop in send_hints_maybe because of canceled draining", _ep_key);
+                break;
+            }
            const sstring* seg_name = name_of_current_segment();
            if (!seg_name || !replay_allowed() || !can_send()) {
                break;
--- a/db/hints/internal/hint_sender.hh
+++ b/db/hints/internal/hint_sender.hh
@@ -66,12 +66,14 @@ class hint_sender {
        stopping,               // stop() was called
        ep_state_left_the_ring, // destination Node is not a part of the ring anymore - usually means that it has been decommissioned
        draining,               // try to send everything out and ignore errors
+        canceled_draining,      // draining was started, but it got canceled
    };

    using state_set = enum_set<super_enum<state,
        state::stopping,
        state::ep_state_left_the_ring,
-        state::draining>>;
+        state::draining,
+        state::canceled_draining>>;

    struct send_one_file_ctx {
        send_one_file_ctx(std::unordered_map<table_schema_version, column_mapping>& last_schema_ver_to_column_mapping)
@@ -140,6 +142,12 @@ public:
    /// \param should_drain if is drain::yes - drain all pending hints
    future<> stop(drain should_drain) noexcept;

+    void cancel_draining();
+
+    bool canceled_draining() const noexcept {
+        return _state.contains(state::canceled_draining);
+    }
+
    /// \brief Add a new segment ready for sending.
    void add_segment(sstring seg_name);

--- a/db/hints/manager.cc
+++ b/db/hints/manager.cc
@@ -220,11 +220,24 @@ future<> manager::stop() {

    set_stopping();

-    return _migrating_done.finally([this] {
+    const auto& node = *_proxy.get_token_metadata_ptr()->get_topology().this_node();
+    const bool leaving = node.is_leaving() || node.left();
+
+    return _migrating_done.finally([this, leaving] {
+        // We want to stop the manager as soon as possible if it's not leaving the cluster.
+        // Because of that, we need to cancel all ongoing drains (since that can take quite a bit of time),
+        // but we also need to ensure that no new drains will be started in the meantime.
+        if (!leaving) {
+            for (auto& [_, ep_man] : _ep_managers) {
+                ep_man.cancel_draining();
+            }
+        }
        return _draining_eps_gate.close();
+        // At this point, all endpoint managers that were being previously drained have been deleted from the map.
+        // In other words, the next lambda is safe to run, i.e. we won't call `hint_endpoint_manager::stop()` twice.
    }).finally([this] {
        return parallel_for_each(_ep_managers | std::views::values, [] (hint_endpoint_manager& ep_man) {
-            return ep_man.stop();
+            return ep_man.stop(drain::no);
        }).finally([this] {
            _ep_managers.clear();
            _hint_directory_manager.clear();
@@ -667,7 +680,7 @@ future<> manager::drain_for(endpoint_id host_id, gms::inet_address ip) noexcept
        co_return;
    }

-    manager_logger.trace("on_leave_cluster: {} is removed/decommissioned", host_id);
+    manager_logger.trace("Draining starts for {}", host_id);

    const auto holder = seastar::gate::holder{_draining_eps_gate};
    // As long as we hold on to this lock, no migration of hinted handoff to host IDs
@@ -677,9 +690,24 @@ future<> manager::drain_for(endpoint_id host_id, gms::inet_address ip) noexcept

    // After an endpoint has been drained, we remove its directory with all of its contents.
    auto drain_ep_manager = [] (hint_endpoint_manager& ep_man) -> future<> {
-        return ep_man.stop(drain::yes).finally([&] {
-            return ep_man.with_file_update_mutex([&ep_man] {
-                return remove_file(ep_man.hints_dir().native());
+        // Prevent a drain if the endpoint manager was marked to cancel it.
+        if (ep_man.canceled_draining()) {
+            return make_ready_future();
+        }
+        return ep_man.stop(drain::yes).finally([&ep_man] {
+            // If draining was canceled, we can't remove the hint directory yet
+            // because there might still be some hints that we should send.
+            // We'll do that when the node starts again.
+            // Note that canceling draining can ONLY occur when the node is simply stopping.
+            // That cannot happen when decommissioning the node.
+            if (ep_man.canceled_draining()) {
+                return make_ready_future();
+            }
+
+            return ep_man.with_file_update_mutex([&ep_man] -> future<> {
+                return remove_file(ep_man.hints_dir().native()).then([&ep_man] {
+                    manager_logger.debug("Removed hint directory for {}", ep_man.end_point_key());
+                });
            });
        });
    };
@@ -986,4 +1014,18 @@ future<> manager::perform_migration() {
    manager_logger.info("Migration of hinted handoff to host ID has finished successfully");
 }

+// Technical note: This function obviously doesn't need to be a coroutine. However, it's better to impose
+//                 this constraint early on with possible future refactors in mind. It should be easier
+//                 to modify the function this way.
+future<> manager::drain_left_nodes() {
+    for (const auto& [host_id, ep_man] : _ep_managers) {
+        if (!_proxy.get_token_metadata_ptr()->is_normal_token_owner(host_id)) {
+            // It's safe to discard this future. It's awaited in `manager::stop()`.
+            (void) drain_for(host_id, {});
+        }
+    }
+
+    co_return;
+}
+
 } // namespace db::hints
--- a/db/hints/manager.hh
+++ b/db/hints/manager.hh
@@ -382,6 +382,12 @@ private:
    /// ALL requested sync points will be canceled, i.e. an exception will be issued
    /// in the corresponding futures.
    future<> perform_migration();
+
+public:
+    /// Performs draining for all nodes that have already left the cluster.
+    /// This should only be called when the hint endpoint managers have been initialized
+    /// and the hint manager has started.
+    future<> drain_left_nodes();
 };

 } // namespace db::hints
--- a/db/hints/resource_manager.cc
+++ b/db/hints/resource_manager.cc
@@ -239,6 +239,15 @@ future<> resource_manager::stop() noexcept {
    });
 }

+future<> resource_manager::drain_hints_for_left_nodes() {
+    for (manager& m : _shard_managers) {
+        // It's safe to discard the future here. It's awaited in `manager::stop()`.
+        (void) m.drain_left_nodes();
+    }
+
+    co_return;
+}
+
 future<> resource_manager::register_manager(manager& m) {
    return with_semaphore(_operation_lock, 1, [this, &m] () {
        return with_semaphore(_space_watchdog.update_lock(), 1, [this, &m] {
--- a/db/hints/resource_manager.hh
+++ b/db/hints/resource_manager.hh
@@ -188,6 +188,8 @@ public:
    /// \brief Allows replaying hints for managers which are registered now or will be in the future.
    void allow_replaying() noexcept;

+    future<> drain_hints_for_left_nodes();
+
    /// \brief Registers the hints::manager in resource_manager, and starts it, if resource_manager is already running.
    ///
    /// The hints::managers can be added either before or after resource_manager starts.
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -146,7 +146,7 @@ cql_table_large_data_handler::cql_table_large_data_handler(gms::feature_service&

 template <typename... Args>
 future<> cql_table_large_data_handler::try_record(std::string_view large_table, const sstables::sstable& sst,  const sstables::key& partition_key, int64_t size,
-        std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const {
+        std::string_view size_desc, std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const {
    if (!_sys_ks) {
        return make_ready_future<>();
    }
@@ -165,7 +165,7 @@ future<> cql_table_large_data_handler::try_record(std::string_view large_table,
    const auto sstable_name = large_data_handler::sst_filename(sst);
    std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
    auto timestamp = db_clock::now();
-    large_data_logger.warn("Writing large {} {}/{}: {} ({} bytes) to {}", desc, ks_name, cf_name, extra_path, size, sstable_name);
+    large_data_logger.warn("Writing large {} {}/{}: {} ({}) to {}", desc, ks_name, cf_name, extra_path, size_desc, sstable_name);
    return _sys_ks->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
            .discard_result()
            .handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
@@ -182,12 +182,14 @@ future<> cql_table_large_data_handler::record_large_partitions(const sstables::s

 future<> cql_table_large_data_handler::internal_record_large_partitions(const sstables::sstable& sst, const sstables::key& key,
        uint64_t partition_size, uint64_t rows) const {
-    return try_record("partition", sst, key, int64_t(partition_size), "partition", "", {"rows"}, data_value((int64_t)rows));
+    const sstring size_desc = seastar::format("{} bytes/{} rows", partition_size, rows);
+    return try_record("partition", sst, key, int64_t(partition_size), size_desc, "partition", "", {"rows"}, data_value((int64_t)rows));
 }

 future<> cql_table_large_data_handler::internal_record_large_partitions_all_data(const sstables::sstable& sst, const sstables::key& key,
        uint64_t partition_size, uint64_t rows, uint64_t range_tombstones, uint64_t dead_rows) const {
-    return try_record("partition", sst, key, int64_t(partition_size), "partition", "", {"rows", "range_tombstones", "dead_rows"},
+    const sstring size_desc = seastar::format("{} bytes/{} rows", partition_size, rows);
+    return try_record("partition", sst, key, int64_t(partition_size), size_desc, "partition", "", {"rows", "range_tombstones", "dead_rows"},
                data_value((int64_t)rows), data_value((int64_t)range_tombstones), data_value((int64_t)dead_rows));
 }

@@ -201,13 +203,14 @@ future<> cql_table_large_data_handler::internal_record_large_cells(const sstable
    auto column_name = cdef.name_as_text();
    std::string_view cell_type = cdef.is_atomic() ? "cell" : "collection";
    static const std::vector<sstring> extra_fields{"clustering_key", "column_name"};
+    const sstring size_desc = seastar::format("{} bytes", cell_size);
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, cell_type, column_name, extra_fields, ck_str, column_name);
    } else {
        auto desc = seastar::format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -215,26 +218,28 @@ future<> cql_table_large_data_handler::internal_record_large_cells_and_collectio
        const clustering_key_prefix* clustering_key, const column_definition& cdef, uint64_t cell_size, uint64_t collection_elements) const {
    auto column_name = cdef.name_as_text();
    std::string_view cell_type = cdef.is_atomic() ? "cell" : "collection";
+    const sstring size_desc = seastar::format("{} bytes", cell_size);
    static const std::vector<sstring> extra_fields{"clustering_key", "column_name", "collection_elements"};
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
    } else {
        auto desc = seastar::format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), size_desc, desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
    }
 }

 future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable& sst, const sstables::key& partition_key,
        const clustering_key_prefix* clustering_key, uint64_t row_size) const {
    static const std::vector<sstring> extra_fields{"clustering_key"};
+    const sstring size_desc = seastar::format("{} bytes", row_size);
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        std::string ck_str = key_to_str(*clustering_key, s);
-        return try_record("row", sst, partition_key, int64_t(row_size), "row", "", extra_fields, ck_str);
+        return try_record("row", sst, partition_key, int64_t(row_size), size_desc, "row", "", extra_fields, ck_str);
    } else {
-        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
+        return try_record("row", sst, partition_key, int64_t(row_size), size_desc, "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
 }

--- a/db/large_data_handler.hh
+++ b/db/large_data_handler.hh
@@ -187,7 +187,7 @@ private:
 private:
    template <typename... Args>
    future<> try_record(std::string_view large_table, const sstables::sstable& sst,  const sstables::key& partition_key, int64_t size,
-            std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const;
+            std::string_view size_desc, std::string_view desc, std::string_view extra_path, const std::vector<sstring> &extra_fields, Args&&... args) const;
 };

 class nop_large_data_handler : public large_data_handler {
--- a/db/schema_applier.cc
+++ b/db/schema_applier.cc
@@ -579,19 +579,23 @@ static future<> merge_tables_and_views(distributed<service::storage_proxy>& prox
        // 2. The table was just created - the table is guaranteed to be published with the view in that case.
        // 3. The view itself was altered - in that case we already know the base table so we can take it from
        //    the database object.
-        view_ptr vp = create_view_from_mutations(proxy, std::move(sm));
+        query::result_set rs(sm.columnfamilies_mutation());
+        const query::result_set_row& view_row = rs.row(0);
+        auto ks_name = view_row.get_nonnull<sstring>("keyspace_name");
+        auto base_name = view_row.get_nonnull<sstring>("base_table_name");
+
        schema_ptr base_schema;
        for (auto&& altered : tables_diff.altered) {
            // Chose the appropriate version of the base table schema: old -> old, new -> new.
            schema_ptr s = side == schema_diff_side::left ? altered.old_schema : altered.new_schema;
-            if (s->ks_name() == vp->ks_name() && s->cf_name() == vp->view_info()->base_name() ) {
+            if (s->ks_name() == ks_name && s->cf_name() == base_name) {
                base_schema = s;
                break;
            }
        }
        if (!base_schema) {
            for (auto&& s : tables_diff.created) {
-                if (s.get()->ks_name() == vp->ks_name() && s.get()->cf_name() == vp->view_info()->base_name() ) {
+                if (s.get()->ks_name() == ks_name && s.get()->cf_name() == base_name) {
                    base_schema = s;
                    break;
                }
@@ -599,14 +603,14 @@ static future<> merge_tables_and_views(distributed<service::storage_proxy>& prox
        }

        if (!base_schema) {
-            base_schema = proxy.local().local_db().find_schema(vp->ks_name(), vp->view_info()->base_name());
+            base_schema = proxy.local().local_db().find_schema(ks_name, base_name);
        }
+        view_ptr vp = create_view_from_mutations(proxy, std::move(sm), base_schema);

        // Now when we have a referenced base - sanity check that we're not registering an old view
        // (this could happen when we skip multiple major versions in upgrade, which is unsupported.)
        check_no_legacy_secondary_index_mv_schema(proxy.local().get_db().local(), vp, base_schema);

-        vp->view_info()->set_base_info(vp->view_info()->make_base_dependent_view_info(*base_schema));
        return vp;
    });

--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -2430,13 +2430,9 @@ static index_metadata create_index_from_index_row(const query::result_set_row& r
    return index_metadata{index_name, options, kind, is_local};
 }

-/*
- * View metadata serialization/deserialization.
- */
-
-view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm, std::optional<table_schema_version> version)  {
-    auto table_rs = query::result_set(sm.columnfamilies_mutation());
-    query::result_set_row row = table_rs.row(0);
+static schema_builder prepare_view_schema_builder_from_mutations(const schema_ctxt& ctxt, const schema_mutations& sm, std::optional<table_schema_version> version,
+                                                                const query::result_set& table_rs) {
+    const query::result_set_row& row = table_rs.row(0);

    auto ks_name = row.get_nonnull<sstring>("keyspace_name");
    auto cf_name = row.get_nonnull<sstring>("view_name");
@@ -2462,13 +2458,47 @@ view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm
    } else {
        builder.with_version(sm.digest(ctxt.features().cluster_schema_features()));
    }
+    return builder;
+}

-    auto base_id = table_id(row.get_nonnull<utils::UUID>("base_table_id"));
+/*
+ * View metadata serialization/deserialization.
+ * If the base info is not provided, the schema context must have a reference to the database,
+ * and the most up-to-date base schema will be pulled from there.
+ */
+view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm, schema_ptr base_schema, std::optional<table_schema_version> version)  {
+    auto table_rs = query::result_set(sm.columnfamilies_mutation());
+    auto builder = prepare_view_schema_builder_from_mutations(ctxt, sm, version, table_rs);
+    const query::result_set_row& row = table_rs.row(0);
+    auto include_all_columns = row.get_nonnull<bool>("include_all_columns");
+    auto where_clause = row.get_nonnull<sstring>("where_clause");
+
+    builder.with_view_info(std::move(base_schema), include_all_columns, std::move(where_clause));
+    return view_ptr(builder.build());
+}
+
+view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm, std::optional<db::view::base_dependent_view_info> base_info, std::optional<table_schema_version> version)  {
+    auto table_rs = query::result_set(sm.columnfamilies_mutation());
+    auto builder = prepare_view_schema_builder_from_mutations(ctxt, sm, version, table_rs);
+    const query::result_set_row& row = table_rs.row(0);
+    auto id = table_id(row.get_nonnull<utils::UUID>("base_table_id"));
    auto base_name = row.get_nonnull<sstring>("base_table_name");
    auto include_all_columns = row.get_nonnull<bool>("include_all_columns");
    auto where_clause = row.get_nonnull<sstring>("where_clause");

-    builder.with_view_info(std::move(base_id), std::move(base_name), include_all_columns, std::move(where_clause));
+    if (!base_info) {
+        if (!ctxt.get_db()) {
+            auto ks_name = row.get_nonnull<sstring>("keyspace_name");
+            auto cf_name = row.get_nonnull<sstring>("view_name");
+            on_internal_error(slogger, format("No database reference with missing base schema when creating view {}.{} from mutations",
+                ks_name, cf_name));
+        }
+        auto base_id = table_id(row.get_nonnull<utils::UUID>("base_table_id"));
+        auto base_schema = ctxt.get_db()->find_schema(base_id);
+        builder.with_view_info(base_schema, include_all_columns, std::move(where_clause));
+    } else {
+        builder.with_view_info(id, base_name, include_all_columns, std::move(where_clause), *base_info);
+    }
    return view_ptr(builder.build());
 }

--- a/db/schema_tables.hh
+++ b/db/schema_tables.hh
@@ -17,6 +17,7 @@
 #include "schema_mutations.hh"
 #include "types/map.hh"
 #include "query-result-set.hh"
+#include "db/view/base_info.hh"

 #include <seastar/core/distributed.hh>

@@ -287,7 +288,8 @@ std::vector<mutation> make_drop_table_mutations(lw_shared_ptr<keyspace_metadata>

 schema_ptr create_table_from_mutations(const schema_ctxt&, schema_mutations, std::optional<table_schema_version> version = {});

-view_ptr create_view_from_mutations(const schema_ctxt&, schema_mutations, std::optional<table_schema_version> version = {});
+view_ptr create_view_from_mutations(const schema_ctxt&, schema_mutations, schema_ptr, std::optional<table_schema_version> version = {});
+view_ptr create_view_from_mutations(const schema_ctxt&, schema_mutations, std::optional<view::base_dependent_view_info> = {}, std::optional<table_schema_version> version = {});

 future<std::vector<view_ptr>> create_views_from_schema_partition(distributed<service::storage_proxy>& proxy, const schema_result::mapped_type& result);

--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -23,6 +23,7 @@
 #include "gms/feature_service.hh"
 #include "system_keyspace_view_types.hh"
 #include "schema/schema_builder.hh"
+#include "timestamp.hh"
 #include "utils/assert.hh"
 #include "utils/hashers.hh"
 #include "utils/log.hh"
@@ -35,6 +36,7 @@
 #include "db/schema_tables.hh"
 #include "gms/generation-number.hh"
 #include "service/storage_service.hh"
+#include "service/storage_proxy.hh"
 #include "service/paxos/paxos_state.hh"
 #include "query-result-set.hh"
 #include "idl/frozen_mutation.dist.hh"
@@ -762,6 +764,35 @@ schema_ptr system_keyspace::large_cells() {
    return large_cells;
 }

+schema_ptr system_keyspace::corrupt_data() {
+    static thread_local auto corrupt_data = [] {
+        auto id = generate_legacy_id(NAME, CORRUPT_DATA);
+        return schema_builder(NAME, CORRUPT_DATA, id)
+                // partition key
+                .with_column("keyspace_name", utf8_type, column_kind::partition_key)
+                .with_column("table_name", utf8_type, column_kind::partition_key)
+                // clustering key
+                .with_column("id", timeuuid_type, column_kind::clustering_key)
+                // regular rows
+                // Storing keys as bytes: having a corrupt key might be the reason
+                // to record the row as corrupt, so we just dump what we have and
+                // leave interpreting to the lucky person investigating the disaster.
+                .with_column("partition_key", bytes_type)
+                .with_column("clustering_key", bytes_type)
+                // Note: mutation-fragment v2
+                .with_column("mutation_fragment_kind", utf8_type)
+                .with_column("frozen_mutation_fragment", bytes_type)
+                .with_column("origin", utf8_type)
+                .with_column("sstable_name", utf8_type)
+                // options
+                .set_comment("mutation-fragments found to be corrupted")
+                .set_gc_grace_seconds(0)
+                .with_hash_version()
+                .build();
+    }();
+    return corrupt_data;
+}
+
 static constexpr auto schema_gc_grace = std::chrono::duration_cast<std::chrono::seconds>(days(7)).count();

 /*static*/ schema_ptr system_keyspace::scylla_local() {
@@ -2305,6 +2336,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
                    peers(), peer_events(), range_xfers(),
                    compactions_in_progress(), compaction_history(),
                    sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
+                    corrupt_data(),
                    scylla_local(), db::schema_tables::scylla_table_schema_history(),
                    repair_history(),
                    v3::views_builds_in_progress(), v3::built_views(),
@@ -2931,9 +2963,8 @@ future<std::optional<mutation>> system_keyspace::get_service_levels_version_muta
    return get_scylla_local_mutation(_db, SERVICE_LEVELS_VERSION_KEY);
 }

-future<mutation> system_keyspace::make_service_levels_version_mutation(int8_t version, const service::group0_guard& guard) {
+future<mutation> system_keyspace::make_service_levels_version_mutation(int8_t version, api::timestamp_type timestamp) {
    static sstring query = format("INSERT INTO {}.{} (key, value) VALUES (?, ?);", db::system_keyspace::NAME, db::system_keyspace::SCYLLA_LOCAL);
-    auto timestamp = guard.write_timestamp();
    auto muts = co_await _qp.get_mutations_internal(query, internal_system_query_state(), timestamp, {SERVICE_LEVELS_VERSION_KEY, format("{}", version)});

    if (muts.size() != 1) {
@@ -3528,4 +3559,12 @@ future<::shared_ptr<cql3::untyped_result_set>> system_keyspace::execute_cql(cons
    return _qp.execute_internal(query_string, values, cql3::query_processor::cache_internal::yes);
 }

+future<> system_keyspace::apply_mutation(mutation m) {
+    if (m.schema()->ks_name() != NAME) {
+        on_internal_error(slogger, fmt::format("system_keyspace::apply_mutation(): attempted to apply mutation belonging to table {}.{}", m.schema()->cf_name(), m.schema()->ks_name()));
+    }
+
+    return _qp.proxy().mutate_locally(m, {}, db::commitlog::force_sync(m.schema()->static_props().wait_for_sync_to_commitlog), db::no_timeout);
+}
+
 } // namespace db
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -141,6 +141,7 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
    static schema_ptr large_partitions();
    static schema_ptr large_rows();
    static schema_ptr large_cells();
+    static schema_ptr corrupt_data();
    static schema_ptr scylla_local();
    future<> force_blocking_flush(sstring cfname);
    // This function is called when the system.peers table is read,
@@ -173,6 +174,7 @@ public:
    static constexpr auto LARGE_PARTITIONS = "large_partitions";
    static constexpr auto LARGE_ROWS = "large_rows";
    static constexpr auto LARGE_CELLS = "large_cells";
+    static constexpr auto CORRUPT_DATA = "corrupt_data";
    static constexpr auto SCYLLA_LOCAL = "scylla_local";
    static constexpr auto RAFT = "raft";
    static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
@@ -654,7 +656,7 @@ public:
 public:
    future<std::optional<int8_t>> get_service_levels_version();
    
-    future<mutation> make_service_levels_version_mutation(int8_t version, const service::group0_guard& guard);
+    future<mutation> make_service_levels_version_mutation(int8_t version, api::timestamp_type timestamp);
    future<std::optional<mutation>> get_service_levels_version_mutation();

    // Publishes a new compression dictionary to `dicts`,
@@ -685,6 +687,10 @@ public:
        return execute_cql(req, { data_value(std::forward<Args>(args))... });
    }

+    // Apply write as mutation to the system keyspace.
+    // Mutation has to belong to a table int he system keyspace.
+    future<> apply_mutation(mutation m);
+
    friend future<column_mapping> db::schema_tables::get_column_mapping(db::system_keyspace& sys_ks, ::table_id table_id, table_schema_version version);
    friend future<bool> db::schema_tables::column_mapping_exists(db::system_keyspace& sys_ks, table_id table_id, table_schema_version version);
    friend future<> db::schema_tables::drop_column_mapping(db::system_keyspace& sys_ks, table_id table_id, table_schema_version version);
--- a/db/tags/utils.cc
+++ b/db/tags/utils.cc
@@ -49,29 +49,39 @@ std::optional<std::string> find_tag(const schema& s, const sstring& tag) {
 future<> modify_tags(service::migration_manager& mm, sstring ks, sstring cf,
                     std::function<void(std::map<sstring, sstring>&)> modify) {
    co_await mm.container().invoke_on(0, [ks = std::move(ks), cf = std::move(cf), modify = std::move(modify)] (service::migration_manager& mm) -> future<> {
-        // FIXME: the following needs to be in a loop. If mm.announce() below
-        // fails, we need to retry the whole thing.
-        auto group0_guard = co_await mm.start_group0_operation();
-        // After getting the schema-modification lock, we need to read the
-        // table's *current* schema - it might have changed before we got
-        // the lock, by some concurrent modification. If the table is gone,
-        // this will throw no_such_column_family.
-        schema_ptr s = mm.get_storage_proxy().data_dictionary().find_schema(ks, cf);
-        const std::map<sstring, sstring>* tags_ptr = get_tags_of_table(s);
-        std::map<sstring, sstring> tags;
-        if (tags_ptr) {
-            // tags_ptr is a constant pointer to schema data. To allow func()
-            // to modify the tags, we must make a copy.
-            tags = *tags_ptr;
-        }
-        modify(tags);
-        schema_builder builder(s);
-        builder.add_extension(tags_extension::NAME, ::make_shared<tags_extension>(tags));
+        size_t retries = mm.get_concurrent_ddl_retries();
+        for (;;) {
+            auto group0_guard = co_await mm.start_group0_operation();
+            // After getting the schema-modification lock, we need to read the
+            // table's *current* schema - it might have changed before we got
+            // the lock, by some concurrent modification. If the table is gone,
+            // this will throw no_such_column_family.
+            schema_ptr s = mm.get_storage_proxy().data_dictionary().find_schema(ks, cf);
+            const std::map<sstring, sstring>* tags_ptr = get_tags_of_table(s);
+            std::map<sstring, sstring> tags;
+            if (tags_ptr) {
+                // tags_ptr is a constant pointer to schema data. To allow func()
+                // to modify the tags, we must make a copy.
+                tags = *tags_ptr;
+            }
+            modify(tags);
+            schema_builder builder(s);
+            builder.add_extension(tags_extension::NAME, ::make_shared<tags_extension>(tags));

-        auto m = co_await service::prepare_column_family_update_announcement(mm.get_storage_proxy(),
+            auto m = co_await service::prepare_column_family_update_announcement(mm.get_storage_proxy(),
                builder.build(), std::vector<view_ptr>(), group0_guard.write_timestamp());
-
-        co_await mm.announce(std::move(m), std::move(group0_guard), format("Modify tags for {} table", cf));
+            try {
+                co_await mm.announce(std::move(m), std::move(group0_guard), format("Modify tags for {} table", cf));
+                break;
+            }  catch (const service::group0_concurrent_modification& ex) {
+                tlogger.info("Failed to modify tags for table {} due to concurrent schema modifications. {}.",
+                    cf, retries ? "Retrying" : "Number of retries exceeded, giving up");
+                if (retries--) {
+                    continue;
+                }
+                throw;
+            }
+        }
    });
 }

--- a/db/view/base_info.hh
+++ b/db/view/base_info.hh
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2025-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include <optional>
+#include "bytes_fwd.hh"
+#include "schema/schema_fwd.hh"
+
+namespace db {
+
+namespace view {
+
+// Part of the view description which depends on the base schema.
+struct base_dependent_view_info {
+    bool has_computed_column_depending_on_base_non_primary_key;
+
+    // True if the partition key columns of the view are the same as the
+    // partition key columns of the base, maybe in a different order.
+    bool is_partition_key_permutation_of_base_partition_key;
+
+    // Indicates if the view hase pk columns which are not part of the base
+    // pk, it seems that !base_non_pk_columns_in_view_pk.empty() is the same,
+    // but actually there are cases where we can compute this boolean without
+    // succeeding to reliably build the former.
+    bool has_base_non_pk_columns_in_view_pk;
+
+
+    // A constructor for a base info that can facilitate reads and writes from the materialized view.
+    base_dependent_view_info(bool has_computed_column_depending_on_base_non_primary_key,
+            bool is_partition_key_permutation_of_base_partition_key,
+            bool has_base_non_pk_columns_in_view_pk);
+};
+
+}
+
+}
--- a/db/view/regular_column_transformation.hh
+++ b/db/view/regular_column_transformation.hh
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2024-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
+ */
+
+#pragma once
+
+#include "column_computation.hh"
+#include "mutation/atomic_cell.hh"
+#include "timestamp.hh"
+#include <type_traits>
+
+class row_marker;
+
+// In a basic column_computation defined in column_computation.hh, the
+// compute_value() method is only based on the partition key, and it must
+// return a value. That API has very limited applications - basically the
+// only thing we can implement with it is token_column_computation which
+// we used to create the token column in secondary indexes.
+// The regular_column_transformation base class here is more powerful, but
+// still is not a completely general computation: Its compute_value() virtual
+// method can transform the value read from a single cell of a regular column
+// into a new cell stored in a structure regular_column_transformation::result.
+//
+// In more details, the assumptions of regular_column_transformation is:
+// 1. compute_value() computes the value based on a *single* column in a
+//    row passed to compute_value().
+//    This assumption means that the value or deletion of the value always
+//    has a single known timestamp (and the value can't be half-missing)
+//    and single TTL information. That would not have been possible if we
+//    allowed the computation to depend on multiple columns.
+// 2. compute_value() computes the value based on a *regular* column in the
+//    base table. This means that an update can modify this value (unlike a
+//    base-table key column that can't change in an update), so the view
+//    update code needs to compute the value before and after the update,
+//    and potentially delete and create view rows.
+// 3. compute_value() returns a column_computation::result which includes
+//    a value and its liveness information (timestamp and ttl/expiry) or
+//    is missing a value.
+
+class regular_column_transformation : public column_computation {
+public:
+    struct result {
+        // We can use "bytes" instead of "managed_bytes" here because we know
+        // that a column_computation is only used for generating a key value,
+        // and that is limited to 64K. This limitation is enforced below -
+        // we never linearize a cell's value if its size is more than 64K.
+        std::optional<bytes> _value;
+
+        // _ttl and _expiry are only defined if _value is set.
+        // The default values below are used when the source cell does not
+        // expire, and are the same values that row_marker uses for a non-
+        // expiring marker. This is useful when creating a row_marker from
+        // get_ttl() and get_expiry().
+        gc_clock::duration _ttl { 0 };
+        gc_clock::time_point _expiry { gc_clock::duration(0) };
+
+        // _ts may be set even if _value is missing, which can remember the
+        // timestamp of a tombstone. Note that the current view-update code
+        // that uses this class doesn't use _ts when _value is missing.
+        api::timestamp_type _ts = api::missing_timestamp;
+
+        api::timestamp_type get_ts() const {
+            return _ts;
+        }
+
+        bool has_value() const {
+            return _value.has_value();
+        }
+
+        // Should only be called if has_value() is true:
+        const bytes& get_value() const {
+            return *_value;
+        }
+        gc_clock::duration get_ttl() const {
+            return _ttl;
+        }
+        gc_clock::time_point get_expiry() const {
+            return _expiry;
+        }
+
+        // A missing computation result
+        result() { }
+
+        // Construct a computation result by copying a given atomic_cell -
+        // including its value, timestamp, and ttl - or deletion timestamp.
+        // The second parameter is an optional transformation function f -
+        // taking a bytes and returning an optional<bytes> - that transforms
+        // the value of the cell but keeps its other liveness information.
+        // If f returns a nullopt, it causes the view row should be deleted.
+        template<typename Func=std::identity>
+        requires std::invocable<Func, bytes> && std::convertible_to<std::invoke_result_t<Func, bytes>, std::optional<bytes>>
+        result(atomic_cell_view cell, Func f = {}) {
+            _ts = cell.timestamp();
+            if (cell.is_live()) {
+                // If the cell is larger than what a key can hold (64KB),
+                // return a missing value. This lets us skip this item during
+                // view building and avoid hanging the view build as described
+                // in #8627. But it doesn't prevent later inserting such a item
+                // to the base table, nor does it implement front-end specific
+                // limits (such as Alternator's 1K or 2K limits - see #10347).
+                // Those stricter limits should be validated in the base-table
+                // write code, not here - deep inside the view update code.
+                // Note also we assume that f() doesn't grow the value further.
+                if (cell.value().size() >= 65536) {
+                    return;
+                }
+                _value = f(to_bytes(cell.value()));
+                if (_value) {
+                    if (cell.is_live_and_has_ttl()) {
+                        _ttl = cell.ttl();
+                        _expiry = cell.expiry();
+                    }
+                }
+            }
+        }
+    };
+
+    virtual ~regular_column_transformation() = default;
+    virtual result compute_value(
+        const schema& schema,
+        const partition_key& key,
+        const db::view::clustering_or_static_row& row) const = 0;
+ };
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -24,6 +24,7 @@
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/maybe_yield.hh>

+#include "db/view/base_info.hh"
 #include "replica/database.hh"
 #include "clustering_bounds_comparator.hh"
 #include "cql3/statements/select_statement.hh"
@@ -36,6 +37,7 @@
 #include "db/view/view_builder.hh"
 #include "db/view/view_updating_consumer.hh"
 #include "db/view/view_update_generator.hh"
+#include "db/view/regular_column_transformation.hh"
 #include "db/system_keyspace_view_types.hh"
 #include "db/system_keyspace.hh"
 #include "db/system_distributed_keyspace.hh"
@@ -77,10 +79,16 @@ static inline void inject_failure(std::string_view operation) {
            [operation] { throw std::runtime_error(std::string(operation)); });
 }

-view_info::view_info(const schema& schema, const raw_view_info& raw_view_info)
+view_info::view_info(const schema& schema, const raw_view_info& raw_view_info, schema_ptr base_schema)
        : _schema(schema)
        , _raw(raw_view_info)
-        , _has_computed_column_depending_on_base_non_primary_key(false)
+        , _base_info(make_base_dependent_view_info(*base_schema))
+{ }
+
+view_info::view_info(const schema& schema, const raw_view_info& raw_view_info, db::view::base_dependent_view_info base_info)
+        : _schema(schema)
+        , _raw(raw_view_info)
+        , _base_info(std::move(base_info))
 { }

 cql3::statements::select_statement& view_info::select_statement(data_dictionary::database db) const {
@@ -131,117 +139,63 @@ const column_definition* view_info::view_column(const column_definition& base_de
    return _schema.get_column_definition(base_def.name());
 }

-void view_info::set_base_info(db::view::base_info_ptr base_info) {
-    _base_info = std::move(base_info);
+void view_info::reset_view_info() {
    // Forget the cached objects which may refer to the base schema.
    _select_statement = nullptr;
    _partition_slice = std::nullopt;
 }

 // A constructor for a base info that can facilitate reads and writes from the materialized view.
-db::view::base_dependent_view_info::base_dependent_view_info(schema_ptr base_schema,
-        std::vector<column_id>&& base_regular_columns_in_view_pk,
-        std::vector<column_id>&& base_static_columns_in_view_pk)
-        : _base_schema{std::move(base_schema)}
-        , _base_regular_columns_in_view_pk{std::move(base_regular_columns_in_view_pk)}
-        , _base_static_columns_in_view_pk{std::move(base_static_columns_in_view_pk)}
-        , has_base_non_pk_columns_in_view_pk{!_base_regular_columns_in_view_pk.empty() || !_base_static_columns_in_view_pk.empty()}
-        , use_only_for_reads{false} {
-
-}
-
-// A constructor for a base info that can facilitate only reads from the materialized view.
-db::view::base_dependent_view_info::base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk, std::optional<bytes>&& column_missing_in_base)
-        : _base_schema{nullptr}
-        , _column_missing_in_base{std::move(column_missing_in_base)}
+db::view::base_dependent_view_info::base_dependent_view_info(bool has_computed_column_depending_on_base_non_primary_key,
+        bool is_partition_key_permutation_of_base_partition_key,
+        bool has_base_non_pk_columns_in_view_pk)
+        : has_computed_column_depending_on_base_non_primary_key{has_computed_column_depending_on_base_non_primary_key}
+        , is_partition_key_permutation_of_base_partition_key{is_partition_key_permutation_of_base_partition_key}
        , has_base_non_pk_columns_in_view_pk{has_base_non_pk_columns_in_view_pk}
-        , use_only_for_reads{true} {
-}
+{ }

-const std::vector<column_id>& db::view::base_dependent_view_info::base_regular_columns_in_view_pk() const {
-    if (use_only_for_reads) {
-        on_internal_error(vlogger,
-                seastar::format("base_regular_columns_in_view_pk(): operation unsupported when initialized only for view reads. "
-                "Missing column in the base table: {}", to_string_view(_column_missing_in_base.value_or(bytes()))));
-    }
-    return _base_regular_columns_in_view_pk;
-}
-
-const std::vector<column_id>& db::view::base_dependent_view_info::base_static_columns_in_view_pk() const {
-    if (use_only_for_reads) {
-        on_internal_error(vlogger,
-                seastar::format("base_static_columns_in_view_pk(): operation unsupported when initialized only for view reads. "
-                "Missing column in the base table: {}", to_string_view(_column_missing_in_base.value_or(bytes()))));
-    }
-    return _base_static_columns_in_view_pk;
-}
-
-const schema_ptr& db::view::base_dependent_view_info::base_schema() const {
-    if (use_only_for_reads) {
-        on_internal_error(vlogger,
-                seastar::format("base_schema(): operation unsupported when initialized only for view reads. "
-                "Missing column in the base table: {}", to_string_view(_column_missing_in_base.value_or(bytes()))));
-    }
-    return _base_schema;
-}
-
-db::view::base_info_ptr view_info::make_base_dependent_view_info(const schema& base) const {
-    std::vector<column_id> base_regular_columns_in_view_pk;
-    std::vector<column_id> base_static_columns_in_view_pk;
-
-    _is_partition_key_permutation_of_base_partition_key =
+db::view::base_dependent_view_info view_info::make_base_dependent_view_info(const schema& base) const {
+    bool is_partition_key_permutation_of_base_partition_key =
        std::ranges::all_of(_schema.partition_key_columns(), [&base] (const column_definition& view_col) {
            const column_definition* base_col = base.get_column_definition(view_col.name());
            return base_col && base_col->is_partition_key();
            })
        && _schema.partition_key_size() == base.partition_key_size();

+    bool has_computed_column_depending_on_base_non_primary_key = false;
+    bool has_base_non_pk_columns_in_view_pk = false;
    for (auto&& view_col : _schema.primary_key_columns()) {
        if (view_col.is_computed()) {
            // we are not going to find it in the base table...
            if (view_col.get_computation().depends_on_non_primary_key_column()) {
-                _has_computed_column_depending_on_base_non_primary_key = true;
+                has_computed_column_depending_on_base_non_primary_key = true;
            }
            continue;
        }
        const bytes& view_col_name = view_col.name();
        auto* base_col = base.get_column_definition(view_col_name);
        if (base_col && base_col->is_regular()) {
-            base_regular_columns_in_view_pk.push_back(base_col->id);
+            has_base_non_pk_columns_in_view_pk = true;
        } else if (base_col && base_col->is_static()) {
-            base_static_columns_in_view_pk.push_back(base_col->id);
+            has_base_non_pk_columns_in_view_pk = true;
        } else if (!base_col) {
-            vlogger.error("Column {} in view {}.{} was not found in the base table {}.{}",
-                    to_string_view(view_col_name), _schema.ks_name(), _schema.cf_name(), base.ks_name(), base.cf_name());
-            if (to_string_view(view_col_name) == "idx_token") {
-                vlogger.warn("Missing idx_token column is caused by an incorrect upgrade of a secondary index. "
-                        "Please recreate index {}.{} to avoid future issues.", _schema.ks_name(), _schema.cf_name());
-            }
-            // If we didn't find the column in the base column then it must have been deleted
-            // or not yet added (by alter command), this means it is for sure not a pk column
-            // in the base table. This can happen if the version of the base schema is not the
-            // one that the view was created with. Setting this schema as the base can't harm since
-            // if we got to such a situation then it means it is only going to be used for reading
-            // (computation of shadowable tombstones) and in that case the existence of such a column
-            // is the only thing that is of interest to us.
-            return make_lw_shared<db::view::base_dependent_view_info>(true, view_col_name);
+            has_base_non_pk_columns_in_view_pk = true;
        }
    }
-
-    return make_lw_shared<db::view::base_dependent_view_info>(base.shared_from_this(), std::move(base_regular_columns_in_view_pk), std::move(base_static_columns_in_view_pk));
+    return db::view::base_dependent_view_info(has_computed_column_depending_on_base_non_primary_key,
+        is_partition_key_permutation_of_base_partition_key, has_base_non_pk_columns_in_view_pk);
 }

 bool view_info::has_base_non_pk_columns_in_view_pk() const {
-    // The base info is not always available, this is because
-    // the base info initialization is separate from the view
-    // info construction. If we are trying to get this info without
-    // initializing the base information it means that we have a
-    // schema integrity problem as the creator of owning view schema
-    // didn't make sure to initialize it with base information.
-    if (!_base_info) {
-        on_internal_error(vlogger, "Tried to perform a view query which is base info dependent without initializing it");
-    }
-    return _base_info->has_base_non_pk_columns_in_view_pk;
+    return _base_info.has_base_non_pk_columns_in_view_pk;
+}
+
+bool view_info::has_computed_column_depending_on_base_non_primary_key() const {
+    return _base_info.has_computed_column_depending_on_base_non_primary_key;
+}
+
+bool view_info::is_partition_key_permutation_of_base_partition_key() const {
+    return _base_info.is_partition_key_permutation_of_base_partition_key;
 }

 clustering_row db::view::clustering_or_static_row::as_clustering_row(const schema& s) const {
@@ -342,11 +296,11 @@ bool may_be_affected_by(data_dictionary::database db, const schema& base, const
 }

 static bool update_requires_read_before_write(data_dictionary::database db, const schema& base,
-        const std::vector<view_and_base>& views,
+        const std::vector<view_ptr>& views,
        const dht::decorated_key& key,
        const rows_entry& update) {
    for (auto&& v : views) {
-        view_info& vf = *v.view->view_info();
+        view_info& vf = *v->view_info();
        if (may_be_affected_by(db, base, vf, key, update)) {
            return true;
        }
@@ -483,6 +437,30 @@ bool matches_view_filter(data_dictionary::database db, const schema& base, const
            && visitor.matches_view_filter();
 }

+view_updates::view_updates(view_ptr v, schema_ptr base)
+    : _view(std::move(v))
+    , _view_info(*_view->view_info())
+    , _base(std::move(base))
+    , _base_info(_view_info.base_info())
+    , _updates(8, partition_key::hashing(*_view), partition_key::equality(*_view))
+{
+    for (auto&& view_col : _view->primary_key_columns()) {
+        if (view_col.is_computed()) {
+            continue;
+        }
+        const bytes& view_col_name = view_col.name();
+        auto* base_col = _base->get_column_definition(view_col_name);
+        if (base_col && base_col->is_regular()) {
+            _base_regular_columns_in_view_pk.push_back(base_col->id);
+        } else if (base_col && base_col->is_static()) {
+            _base_static_columns_in_view_pk.push_back(base_col->id);
+        } else if (!base_col) {
+            on_internal_error(vlogger, format("Column {} in view {}.{} was not found in the base table {}.{}",
+                    view_col_name, _view->ks_name(), _view->cf_name(), _base->ks_name(), _base->cf_name()));
+        }
+    }
+}
+
 future<> view_updates::move_to(utils::chunked_vector<frozen_mutation_and_schema>& mutations) {
    mutations.reserve(mutations.size() + _updates.size());
    for (auto it = _updates.begin(); it != _updates.end(); it = _updates.erase(it)) {
@@ -506,79 +484,6 @@ size_t view_updates::op_count() const {
    return _op_count;
 }

-row_marker view_updates::compute_row_marker(const clustering_or_static_row& base_row) const {
-    /*
-     * We need to compute both the timestamp and expiration for view rows.
-     *
-     * Below there are several distinct cases depending on how many new key
-     * columns the view has - i.e., how many of the view's key columns were
-     * regular columns in the base. base_regular_columns_in_view_pk.size():
-     *
-     * Zero new key columns:
-     *     The view rows key is composed only from base key columns, and those
-     *     cannot be changed in an update, so the view row remains alive as
-     *     long as the base row is alive. We need to return the same row
-     *     marker as the base for the view - to keep an empty view row alive
-    *      for as long as an empty base row exists.
-     *     Note that in this case, if there are *unselected* base columns, we
-     *     may need to keep an empty view row alive even without a row marker
-     *     because the base row (which has additional columns) is still alive.
-     *     For that we have the "virtual columns" feature: In the zero new
-     *     key columns case, we put unselected columns in the view as empty
-     *     columns, to keep the view row alive.
-     *
-     * One new key column:
-     *     In this case, there is a regular base column that is part of the
-     *     view key. This regular column can be added or deleted in an update,
-     *     or its expiration be set, and those can cause the view row -
-     *     including its row marker - to need to appear or disappear as well.
-     *     So the liveness of cell of this one column determines the liveness
-     *     of the view row and the row marker that we return.
-     *
-     * Two or more new key columns:
-     *     This case is explicitly NOT supported in CQL - one cannot create a
-     *     view with more than one base-regular columns in its key. In general
-     *     picking one liveness (timestamp and expiration) is not possible
-     *     if there are multiple regular base columns in the view key, as
-     *     those can have different liveness.
-     *     However, we do allow this case for Alternator - we need to allow
-     *     the case of two (but not more) because the DynamoDB API allows
-     *     creating a GSI whose two key columns (hash and range key) were
-     *     regular columns.
-     *     We can support this case in Alternator because it doesn't use
-     *     expiration (the "TTL" it does support is different), and doesn't
-     *     support user-defined timestamps. But, the two columns can still
-     *     have different timestamps - this happens if an update modifies
-     *     just one of them. In this case the timestamp of the view update
-     *     (and that of the row marker we return) is the later of these two
-     *     updated columns.
-     */
-    const auto& col_ids = base_row.is_clustering_row()
-            ? _base_info->base_regular_columns_in_view_pk()
-            : _base_info->base_static_columns_in_view_pk();
-    if (!col_ids.empty()) {
-        auto& def = _base->column_at(base_row.column_kind(), col_ids[0]);
-        // Note: multi-cell columns can't be part of the primary key.
-        auto cell = base_row.cells().cell_at(col_ids[0]).as_atomic_cell(def);
-        auto ts = cell.timestamp();
-        if (col_ids.size() > 1){
-            // As explained above, this case only happens in Alternator,
-            // and we may need to pick a higher ts:
-            auto& second_def = _base->column_at(base_row.column_kind(), col_ids[1]);
-            auto second_cell = base_row.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
-            auto second_ts = second_cell.timestamp();
-            ts = std::max(ts, second_ts);
-            // Alternator isn't supposed to have TTL or more than two col_ids!
-            if (col_ids.size() != 2 || cell.is_live_and_has_ttl() || second_cell.is_live_and_has_ttl()) [[unlikely]] {
-                utils::on_internal_error(format("Unexpected col_ids length {} or has TTL", col_ids.size()));
-            }
-        }
-        return cell.is_live_and_has_ttl() ? row_marker(ts, cell.ttl(), cell.expiry()) : row_marker(ts);
-    }
-
-    return base_row.marker();
-}
-
 namespace {
 // The following struct is identical to view_key_with_action, except the key
 // is stored as a managed_bytes_view instead of bytes.
@@ -654,8 +559,8 @@ public:
            return {_update.key()->get_component(_base, base_col->position())};
        default:
            if (base_col->kind != _update.column_kind()) {
-                on_internal_error(vlogger, format("Tried to get a {} column from a {} row update, which is impossible",
-                        to_sstring(base_col->kind), _update.is_clustering_row() ? "clustering" : "static"));
+                on_internal_error(vlogger, format("Tried to get a {} column {} from a {} row update, which is impossible",
+                        to_sstring(base_col->kind), base_col->name_as_text(), _update.is_clustering_row() ? "clustering" : "static"));
            }
            auto& c = _update.cells().cell_at(base_col->id);
            auto value_view = base_col->is_atomic() ? c.as_atomic_cell(cdef).value() : c.as_collection_mutation().data;
@@ -676,6 +581,22 @@ private:
            return handle_collection_column_computation(collection_computation);
        }

+        // TODO: we already calculated this computation in updatable_view_key_cols,
+        // so perhaps we should pass it here and not re-compute it. But this will
+        // mean computed columns will only work for view key columns (currently
+        // we assume that anyway)
+        if (auto* c = dynamic_cast<const regular_column_transformation*>(&computation)) {
+            regular_column_transformation::result after =
+                c->compute_value(_base, _base_key, _update);
+            if (after.has_value()) {
+                return {managed_bytes_view(linearized_values.emplace_back(after.get_value()))};
+            }
+            // We only get to this function when we know the _update row
+            // exists and call it to read its key columns, so we don't expect
+            // to see a missing value for any of those columns
+            on_internal_error(vlogger, fmt::format("unexpected call to handle_computed_column {} missing in update", cdef.name_as_text()));
+        }
+
        auto computed_value = computation.compute_value(_base, _base_key);
        return {managed_bytes_view(linearized_values.emplace_back(std::move(computed_value)))};
    }
@@ -727,7 +648,6 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_or_s
        if (partition.partition_tombstone() && partition.partition_tombstone() == row_delete_tomb.tomb()) {
            return;
        }
-
        ret.push_back({&partition.clustered_row(*_view, std::move(ckey)), action});
    };

@@ -934,13 +854,12 @@ static void add_cells_to_view(const schema& base, const schema& view, column_kin
 * Creates a view entry corresponding to the provided base row.
 * This method checks that the base row does match the view filter before applying anything.
 */
-void view_updates::create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now) {
+void view_updates::create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now, row_marker update_marker) {
    if (!matches_view_filter(db, *_base, _view_info, base_key, update, now)) {
        return;
    }

    auto view_rows = get_view_rows(base_key, update, std::nullopt, {});
-    auto update_marker = compute_row_marker(update);
    const auto kind = update.column_kind();
    for (const auto& [r, action]: view_rows) {
        if (auto rm = std::get_if<row_marker>(&action)) {
@@ -958,48 +877,28 @@ void view_updates::create_entry(data_dictionary::database db, const partition_ke
 * Deletes the view entry corresponding to the provided base row.
 * This method checks that the base row does match the view filter before bothering.
 */
-void view_updates::delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now) {
+void view_updates::delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts) {
    // Before deleting an old entry, make sure it was matching the view filter
    // (otherwise there is nothing to delete)
    if (matches_view_filter(db, *_base, _view_info, base_key, existing, now)) {
-        do_delete_old_entry(base_key, existing, update, now);
+        do_delete_old_entry(base_key, existing, update, now, deletion_ts);
    }
 }

-void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now) {
+void view_updates::do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts) {
    auto view_rows = get_view_rows(base_key, existing, std::nullopt, update.tomb());
    const auto kind = existing.column_kind();
    for (const auto& [r, action] : view_rows) {
        const auto& col_ids = existing.is_clustering_row()
-                ? _base_info->base_regular_columns_in_view_pk()
-                : _base_info->base_static_columns_in_view_pk();
-        if (_view_info.has_computed_column_depending_on_base_non_primary_key()) {
-            if (auto ts_tag = std::get_if<view_key_and_action::shadowable_tombstone_tag>(&action)) {
-                r->apply(ts_tag->into_shadowable_tombstone(now));
-            }
-        } else if (!col_ids.empty()) {
-            // We delete the old row using a shadowable row tombstone, making sure that
-            // the tombstone deletes everything in the row (or it might still show up).
-            // Note: multi-cell columns can't be part of the primary key.
-            auto& def = _base->column_at(kind, col_ids[0]);
-            auto cell = existing.cells().cell_at(col_ids[0]).as_atomic_cell(def);
-            auto ts = cell.timestamp();
-            if (col_ids.size() > 1) {
-                // This is the Alternator-only support for two regular base
-                // columns that become view key columns. See explanation in
-                // view_updates::compute_row_marker().
-                auto& second_def = _base->column_at(kind, col_ids[1]);
-                auto second_cell = existing.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
-                auto second_ts = second_cell.timestamp();
-                ts = std::max(ts, second_ts);
-                // Alternator isn't supposed to have more than two col_ids!
-                if (col_ids.size() != 2) [[unlikely]] {
-                    utils::on_internal_error(format("Unexpected col_ids length {}", col_ids.size()));
-                }
-            }
-            if (cell.is_live()) {
-                r->apply(shadowable_tombstone(ts, now));
-            }
+                ? _base_regular_columns_in_view_pk
+                : _base_static_columns_in_view_pk;
+        if (!col_ids.empty() || _view_info.has_computed_column_depending_on_base_non_primary_key()) {
+            // The view key could have been modified because it contains or
+            // depends on a non-primary-key. The fact that this function was
+            // called instead of update_entry() means the caller knows it
+            // wants to delete the old row (with the given deletion_ts) and
+            // will create a different one. So let's honor this.
+            r->apply(shadowable_tombstone(deletion_ts, now));
        } else {
            // "update" caused the base row to have been deleted, and !col_id
            // means view row is the same - so it needs to be deleted as well
@@ -1046,7 +945,7 @@ bool view_updates::can_skip_view_updates(const clustering_or_static_row& update,
        // as part of its PK, there are NO virtual columns corresponding to the unselected columns in the view.
        // Because of that, we don't generate view updates when the value in an unselected column is created
        // or changes.
-        if (!column_is_selected && _base_info->has_base_non_pk_columns_in_view_pk) {
+        if (!column_is_selected && _base_info.has_base_non_pk_columns_in_view_pk) {
            return true;
        }

@@ -1100,15 +999,15 @@ bool view_updates::can_skip_view_updates(const clustering_or_static_row& update,
 * This method checks that the base row (before and after) matches the view filter before
 * applying anything.
 */
-void view_updates::update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now) {
+void view_updates::update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now, row_marker update_marker) {
    // While we know update and existing correspond to the same view entry,
    // they may not match the view filter.
    if (!matches_view_filter(db, *_base, _view_info, base_key, existing, now)) {
-        create_entry(db, base_key, update, now);
+        create_entry(db, base_key, update, now, update_marker);
        return;
    }
    if (!matches_view_filter(db, *_base, _view_info, base_key, update, now)) {
-        do_delete_old_entry(base_key, existing, update, now);
+        do_delete_old_entry(base_key, existing, update, now, update_marker.timestamp());
        return;
    }

@@ -1117,7 +1016,7 @@ void view_updates::update_entry(data_dictionary::database db, const partition_ke
    }

    auto view_rows = get_view_rows(base_key, update, std::nullopt, {});
-    auto update_marker = compute_row_marker(update);
+
    const auto kind = update.column_kind();
    for (const auto& [r, action] : view_rows) {
        if (auto rm = std::get_if<row_marker>(&action)) {
@@ -1133,6 +1032,8 @@ void view_updates::update_entry(data_dictionary::database db, const partition_ke
    _op_count += view_rows.size();
 }

+// Note: despite the general-sounding name of this function, it is used
+// just for the case of collection indexing.
 void view_updates::update_entry_for_computed_column(
        const partition_key& base_key,
        const clustering_or_static_row& update,
@@ -1155,30 +1056,72 @@ void view_updates::update_entry_for_computed_column(
    }
 }

+// view_updates::generate_update() is the main function for taking an update
+// to a base table row - consisting of existing and updated versions of row -
+// and creating from it zero or more updates to a given materialized view.
+// These view updates may consist of updating an existing view row, deleting
+// an old view row, and/or creating a new view row.
+// There are several distinct cases depending on how many of the view's key
+// columns are "new key columns", i.e., were regular key columns in the base
+// or are a computed column based on a regular column (these computed columns
+// are used by, for example, Alternator's GSI):
+//
+// Zero new key columns:
+//   The view rows key is composed only from base key columns, and those can't
+//   be changed in an update, so the view row remains alive as long as the
+//   base row is alive. The row marker for the view needs to be set to the
+//   same row marker in the base - to keep an empty view row alive for as long
+//   as an empty base row exists.
+//   Note that in this case, if there are *unselected* base columns, we may
+//   need to keep an empty view row alive even without a row marker because
+//   the base row (which has additional columns) is still alive. For that we
+//   have the "virtual columns" feature: In the zero new key columns case, we
+//   put unselected columns in the view as empty columns, to keep the view
+//   row alive.
+//
+// One new key column:
+//   In this case, there is a regular base column that is part of the view
+//   key. This regular column can be added or deleted in an update, or its
+//   expiration be set, and those can cause the view row - including its row
+//   marker - to need to appear or disappear as well. So the liveness of cell
+//   of this one column determines the liveness of the view row and the row
+//   marker that we set for it.
+//
+// Two or more new key columns:
+//   This case is explicitly NOT supported in CQL - one cannot create a view
+//   with more than one base-regular columns in its key. In general picking
+//   one liveness (timestamp and expiration) is not possible if there are
+//   multiple regular base columns in the view key, asthose can have different
+//   liveness.
+//   However, we do allow this case for Alternator - we need to allow the case
+//   of two (but not more) because the DynamoDB API allows creating a GSI
+//   whose two key columns (hash and range key) were regular columns. We can
+//   support this case in Alternator because it doesn't use expiration (the
+//   "TTL" it does support is different), and doesn't support user-defined
+//   timestamps. But, the two columns can still have different timestamps -
+//   this happens if an update modifies just one of them. In this case the
+//   timestamp of the view update (and that of the row marker) is the later
+//    of these two updated columns.
 void view_updates::generate_update(
        data_dictionary::database db,
        const partition_key& base_key,
        const clustering_or_static_row& update,
        const std::optional<clustering_or_static_row>& existing,
        gc_clock::time_point now) {
-
-    // Note that the base PK columns in update and existing are the same, since we're intrinsically dealing
-    // with the same base row. So we have to check 3 things:
-    //   1) that the clustering key doesn't have a null, which can happen for compact tables. If that's the case,
-    //      there is no corresponding entries.
-    //   2) if there is a column not part of the base PK in the view PK, whether it is changed by the update.
-    //   3) whether the update actually matches the view SELECT filter
-
+    // FIXME: The following if() is old code which may be related to COMPACT
+    // STORAGE. If this is a real case, refer to a test that demonstrates it.
+    // If it's not a real case, remove this if().
    if (update.is_clustering_row()) {
        if (!update.key()->is_full(*_base)) {
            return;
        }
    }
-
-    if (_view_info.has_computed_column_depending_on_base_non_primary_key()) {
-        return update_entry_for_computed_column(base_key, update, existing, now);
-    }
-    if (!_base_info->has_base_non_pk_columns_in_view_pk) {
+    // If the view key depends on any regular column in the base, the update
+    // may change the view key and may require deleting an old view row and
+    // inserting a new row. The other case, which we'll handle here first,
+    // is easier and require just modifying one view row.
+    if (!_base_info.has_base_non_pk_columns_in_view_pk &&
+        !_view_info.has_computed_column_depending_on_base_non_primary_key()) {
        if (update.is_static_row()) {
            // TODO: support static rows in views with pk only including columns from base pk
            return;
@@ -1186,85 +1129,186 @@ void view_updates::generate_update(
        // The view key is necessarily the same pre and post update.
        if (existing && existing->is_live(*_base)) {
            if (update.is_live(*_base)) {
-                update_entry(db, base_key, update, *existing, now);
+                update_entry(db, base_key, update, *existing, now, update.marker());
            } else {
-                delete_old_entry(db, base_key, *existing, update, now);
+                delete_old_entry(db, base_key, *existing, update, now, api::missing_timestamp);
            }
        } else if (update.is_live(*_base)) {
-            create_entry(db, base_key, update, now);
+            create_entry(db, base_key, update, now, update.marker());
        }
        return;
    }

-    const auto& col_ids = update.is_clustering_row()
-            ? _base_info->base_regular_columns_in_view_pk()
-            : _base_info->base_static_columns_in_view_pk();
-
-    // The view has a non-primary-key column from the base table as its primary key.
-    // That means it's either a regular or static column. If we are currently
-    // processing an update which does not correspond to the column's kind,
-    // just stop here.
-    if (col_ids.empty()) {
+    // Find the view key columns that may be changed by an update.
+    // This case is interesting because a change to the view key means that
+    // we may need to delete an old view row and/or create a new view row.
+    // The columns we look for are view key columns that are neither base key
+    // columns nor computed columns based just on key columns. In other words,
+    // we look here for columns which were regular columns or static columns
+    // in the base table, or computed columns based on regular columns.
+    struct updatable_view_key_col {
+        column_id view_col_id;
+        regular_column_transformation::result before;
+        regular_column_transformation::result after;
+    };
+    std::vector<updatable_view_key_col> updatable_view_key_cols;
+    for (const column_definition& view_col : _view->primary_key_columns()) {
+        if (view_col.is_computed()) {
+            const column_computation& computation = view_col.get_computation();
+            if (computation.depends_on_non_primary_key_column()) {
+                // Column is a computed column that does not depend just on
+                // the base key, so it may change in the update.
+                if (auto* c = dynamic_cast<const regular_column_transformation*>(&computation)) {
+                    updatable_view_key_cols.emplace_back(view_col.id,
+                        existing ? c->compute_value(*_base, base_key, *existing) : regular_column_transformation::result(),
+                        c->compute_value(*_base, base_key, update));
+                } else {
+                    // The only other column_computation we have which has
+                    // depends_on_non_primary_key_column is
+                    // collection_column_computation, and we have a special
+                    // function to handle that case:
+                    return update_entry_for_computed_column(base_key, update, existing, now);
+                }
+            }
+        } else {
+            const column_definition* base_col = _base->get_column_definition(view_col.name());
+            if (!base_col) {
+                on_internal_error(vlogger, fmt::format("Column {} in view {}.{} was not found in the base table {}.{}",
+                    view_col.name(), _view->ks_name(), _view->cf_name(), _base->ks_name(), _base->cf_name()));
+            }
+            // If the view key column was also a base primary key column, then
+            // it can't possibly change in this update. But the column was not
+            // not a primary key column - i.e., a regular column or static
+            // column, the update might have changed it and we need to list it
+            // on updatable_view_key_cols.
+            // We check base_col->kind == update.column_kind() instead of just
+            // !base_col->is_primary_key() because when update is a static row
+            // we know it can't possibly update a regular column (and vice
+            // versa).
+            if (base_col->kind == update.column_kind()) {
+                // This is view key, so we know it is atomic
+                std::optional<atomic_cell_view> after;
+                auto afterp = update.cells().find_cell(base_col->id);
+                if (afterp) {
+                    after = afterp->as_atomic_cell(*base_col);
+                }
+                std::optional<atomic_cell_view> before;
+                if (existing) {
+                    auto beforep = existing->cells().find_cell(base_col->id);
+                    if (beforep) {
+                        before = beforep->as_atomic_cell(*base_col);
+                    }
+                }
+                updatable_view_key_cols.emplace_back(view_col.id,
+                    before ? regular_column_transformation::result(*before) : regular_column_transformation::result(),
+                    after ? regular_column_transformation::result(*after) : regular_column_transformation::result());
+            }
+        }
+    }
+    // If we reached here, the view has a non-primary-key column from the base
+    // table as its primary key. That means it's either a regular or static
+    // column. If we are currently processing an update which does not
+    // correspond to the column's kind, updatable_view_key_cols will be empty
+    // and we can just stop here.
+    if (updatable_view_key_cols.empty()) {
        return;
    }

-    const auto kind = update.column_kind();
-
-    // If one of the key columns is missing, set has_new_row = false
-    // meaning that after the update there will be no view row.
-    // If one of the key columns is missing in the existing value,
-    // set has_old_row = false meaning we don't have an old row to
-    // delete.
+    // Use updatable_view_key_cols - the before and after values of the
+    // view key columns that may have changed - to determine if the update
+    // changes an existing view row, deletes an old row or creates a new row.
    bool has_old_row = true;
    bool has_new_row = true;
-    bool same_row = true;
-    for (auto col_id : col_ids) {
-        auto* after = update.cells().find_cell(col_id);
-        auto& cdef = _base->column_at(kind, col_id);
-        if (existing) {
-            auto* before = existing->cells().find_cell(col_id);
-            // Note that this cell is necessarily atomic, because col_ids are
-            // view key columns, and keys must be atomic.
-            if (before && before->as_atomic_cell(cdef).is_live()) {
-                if (after && after->as_atomic_cell(cdef).is_live()) {
-                    // We need to compare just the values of the keys, not
-                    // metadata like the timestamp. This is because below,
-                    // if the old and new view row have the same key, we need
-                    // to be sure to reach the update_entry() case.
-                    auto cmp = compare_unsigned(before->as_atomic_cell(cdef).value(), after->as_atomic_cell(cdef).value());
-                    if (cmp != 0) {
-                        same_row = false;
-                    }
+    bool same_row = true; // undefined if either has_old_row or has_new_row are false
+    for (const auto& u : updatable_view_key_cols) {
+        if (u.before.has_value()) {
+            if (u.after.has_value()) {
+                if (compare_unsigned(u.before.get_value(), u.after.get_value()) != 0) {
+                    same_row = false;
                }
            } else {
-                has_old_row = false;
+                has_new_row = false;
            }
        } else {
            has_old_row = false;
-        }
-        if (!after || !after->as_atomic_cell(cdef).is_live()) {
-            has_new_row = false;
+            if (!u.after.has_value()) {
+                has_new_row = false;
+            }
        }
    }
+
+    // If has_new_row, calculate a row marker for this view row - i.e., a
+    // timestamp and ttl - based on those of the updatable view key column
+    // (or, in an Alternator-only extension, more than one).
+    row_marker new_row_rm; // only set if has_new_row
+    if (has_new_row) {
+        // Note:
+        // 1. By reaching here we know that updatable_view_key_cols has at
+        //    least one member (in CQL, it's always one, in Alternator it
+        //    may be two).
+        // 2. Because has_new_row, we know all elements in that array have
+        //    after.has_value() true, so we can use after.get_ts() et al.
+        api::timestamp_type new_row_ts = updatable_view_key_cols[0].after.get_ts();
+        // This is the Alternator-only support for *two* regular base columns
+        // that become view key columns. The timestamp we use is the *maximum*
+        // of the two key columns, as explained in pull-request #17172.
+        if (updatable_view_key_cols.size() > 1) {
+            auto second_ts = updatable_view_key_cols[1].after.get_ts();
+            new_row_ts = std::max(new_row_ts, second_ts);
+            // Alternator isn't supposed to have more than two updatable view key columns!
+            if (updatable_view_key_cols.size() != 2) [[unlikely]] {
+                utils::on_internal_error(format("Unexpected updatable_view_key_col length {}", updatable_view_key_cols.size()));
+            }
+        }
+        // We assume that either updatable_view_key_cols has just one column
+        // (the only situation allowed in CQL) or if there is more then one
+        // they have the same expiry information (in Alternator, there is
+        // never a CQL TTL set).
+        new_row_rm =  row_marker(new_row_ts, updatable_view_key_cols[0].after.get_ttl(), updatable_view_key_cols[0].after.get_expiry());
+    }
+
    if (has_old_row) {
+        // As explained in #19977, when there is one updatable_view_key_cols
+        // (the only case allowed in CQL) the deletion timestamp is before's
+        // timestamp. As explained in #17119, if there are two of them (only
+        // possible in Alternator), we take the maximum.
+        // Note:
+        // 1. By reaching here we know that updatable_view_key_cols has at
+        //    least one member (in CQL, it's always one, in Alternator it
+        //    may be two).
+        // 2. Because has_old_row, we know all elements in that array have
+        //    before.has_value() true, so we can use before.get_ts().
+        auto old_row_ts = updatable_view_key_cols[0].before.get_ts();
+        if (updatable_view_key_cols.size() > 1) {
+            // This is the Alternator-only support for two regular base
+            // columns that become view key columns. See explanation in
+            // view_updates::compute_row_marker().
+            auto second_ts = updatable_view_key_cols[1].before.get_ts();
+            old_row_ts = std::max(old_row_ts, second_ts);
+            // Alternator isn't supposed to have more than two updatable view key columns!
+            if (updatable_view_key_cols.size() != 2) [[unlikely]] {
+                utils::on_internal_error(format("Unexpected updatable_view_key_col length {}", updatable_view_key_cols.size()));
+            }
+        }
        if (has_new_row) {
            if (same_row) {
-                update_entry(db, base_key, update, *existing, now);
+                update_entry(db, base_key, update, *existing, now, new_row_rm);
            } else {
-                // This code doesn't work if the old and new view row have the
-                // same key, because if they do we get both data and tombstone
-                // for the same timestamp (now) and the tombstone wins. This
-                // is why we need the "same_row" case above - it's not just a
-                // performance optimization.
-                delete_old_entry(db, base_key, *existing, update, now);
-                create_entry(db, base_key, update, now);
+                // The following code doesn't work if the old and new view row
+                // have the same key, because if they do we can get both data
+                // and tombstone for the same timestamp and the tombstone
+                // wins. This is why we need the "same_row" case above - it's
+                // not just a performance optimization.
+                delete_old_entry(db, base_key, *existing, update, now, old_row_ts);
+                create_entry(db, base_key, update, now, new_row_rm);
            }
        } else {
-            delete_old_entry(db, base_key, *existing, update, now);
+            delete_old_entry(db, base_key, *existing, update, now, old_row_ts);
        }
    } else if (has_new_row) {
-        create_entry(db, base_key, update, now);
+        create_entry(db, base_key, update, now, new_row_rm);
    }
+
 }

 bool view_updates::is_partition_key_permutation_of_base_partition_key() const {
@@ -1605,17 +1649,12 @@ view_update_builder make_view_update_builder(
        data_dictionary::database db,
        const replica::table& base_table,
        const schema_ptr& base,
-        std::vector<view_and_base>&& views_to_update,
+        std::vector<view_ptr>&& views_to_update,
        mutation_reader&& updates,
        mutation_reader_opt&& existings,
        gc_clock::time_point now) {
-    auto vs = views_to_update | std::views::transform([&] (view_and_base v) {
-        if (base->version() != v.base->base_schema()->version()) {
-            on_internal_error(vlogger, format("Schema version used for view updates ({}) does not match the current"
-                                              " base schema version of the view ({}) for view {}.{} of {}.{}",
-                base->version(), v.base->base_schema()->version(), v.view->ks_name(), v.view->cf_name(), base->ks_name(), base->cf_name()));
-        }
-        return view_updates(std::move(v));
+    auto vs = views_to_update | std::views::transform([&] (view_ptr v) {
+        return view_updates(std::move(v), base);
    }) | std::ranges::to<std::vector<view_updates>>();
    return view_update_builder(std::move(db), base_table, base, std::move(vs), std::move(updates), std::move(existings), now);
 }
@@ -1624,18 +1663,18 @@ future<query::clustering_row_ranges> calculate_affected_clustering_ranges(data_d
        const schema& base,
        const dht::decorated_key& key,
        const mutation_partition& mp,
-        const std::vector<view_and_base>& views) {
+        const std::vector<view_ptr>& views) {
    utils::chunked_vector<interval<clustering_key_prefix_view>> row_ranges;
    utils::chunked_vector<interval<clustering_key_prefix_view>> view_row_ranges;
    clustering_key_prefix_view::tri_compare cmp(base);
    if (mp.partition_tombstone() || !mp.row_tombstones().empty()) {
        for (auto&& v : views) {
            // FIXME: #2371
-            if (v.view->view_info()->select_statement(db).get_restrictions()->has_unrestricted_clustering_columns()) {
+            if (v->view_info()->select_statement(db).get_restrictions()->has_unrestricted_clustering_columns()) {
                view_row_ranges.push_back(interval<clustering_key_prefix_view>::make_open_ended_both_sides());
                break;
            }
-            for (auto&& r : v.view->view_info()->partition_slice(db).default_row_ranges()) {
+            for (auto&& r : v->view_info()->partition_slice(db).default_row_ranges()) {
                view_row_ranges.push_back(r.transform(std::mem_fn(&clustering_key_prefix::view)));
                co_await coroutine::maybe_yield();
            }
@@ -1683,7 +1722,7 @@ future<query::clustering_row_ranges> calculate_affected_clustering_ranges(data_d
    co_return result_ranges;
 }

-bool needs_static_row(const mutation_partition& mp, const std::vector<view_and_base>& views) {
+bool needs_static_row(const mutation_partition& mp, const std::vector<view_ptr>& views) {
    // TODO: We could also check whether any of the views need static rows
    // and return false if none of them do
    return mp.partition_tombstone() || !mp.static_row().empty();
@@ -2954,13 +2993,12 @@ public:
        if (!_fragments.empty()) {
            _fragments.emplace_front(*_step.reader.schema(), _builder._permit, partition_start(_step.current_key, tombstone()));
            auto base_schema = _step.base->schema();
-            auto views = with_base_info_snapshot(_views_to_build);
            auto reader = make_mutation_reader_from_fragments(_step.reader.schema(), _builder._permit, std::move(_fragments));
            auto close_reader = defer([&reader] { reader.close().get(); });
            reader.upgrade_schema(base_schema);
            _gen->populate_views(
                    *_step.base,
-                    std::move(views),
+                    _views_to_build,
                    _step.current_token(),
                    std::move(reader),
                    _now).get();
@@ -2972,6 +3010,7 @@ public:

    stop_iteration consume_end_of_partition() {
        inject_failure("view_builder_consume_end_of_partition");
+        utils::get_local_injector().inject("view_builder_consume_end_of_partition_delay", utils::wait_for_message(std::chrono::seconds(60))).get();
        flush_fragments();
        return stop_iteration(_step.build_status.empty());
    }
@@ -2995,6 +3034,12 @@ public:
                    _step.build_status.pop_back();
                }
            }
+
+            // before going back to the minimum token, advance current_key to the end
+            // and check for built views in that range.
+            _step.current_key = {_step.prange.end().value_or(dht::ring_position::max()).value().token(), partition_key::make_empty()};
+            check_for_built_views();
+
            _step.current_key = {dht::minimum_token(), partition_key::make_empty()};
            for (auto&& vs : _step.build_status) {
                vs.next_token = dht::minimum_token();
@@ -3223,12 +3268,6 @@ view_updating_consumer::view_updating_consumer(view_update_generator& gen, schem
    })
 { }

-std::vector<db::view::view_and_base> with_base_info_snapshot(std::vector<view_ptr> vs) {
-    return vs | std::views::transform([] (const view_ptr& v) {
-        return db::view::view_and_base{v, v->view_info()->base_info()};
-    }) | std::ranges::to<std::vector>();
-}
-
 delete_ghost_rows_visitor::delete_ghost_rows_visitor(service::storage_proxy& proxy, service::query_state& state, view_ptr view, db::timeout_clock::duration timeout_duration)
        : _proxy(proxy)
        , _state(state)
--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -15,6 +15,7 @@
 #include "mutation/frozen_mutation.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "locator/abstract_replication_strategy.hh"
+#include "db/view/base_info.hh"

 class frozen_mutation_and_schema;

@@ -28,54 +29,6 @@ namespace view {

 class stats;

-// Part of the view description which depends on the base schema version.
-//
-// This structure may change even though the view schema doesn't change, so
-// it needs to live outside view_ptr.
-struct base_dependent_view_info {
-private:
-    schema_ptr _base_schema;
-    // Id of a regular base table column included in the view's PK, if any.
-    // Scylla views only allow one such column, alternator can have up to two.
-    std::vector<column_id> _base_regular_columns_in_view_pk;
-    std::vector<column_id> _base_static_columns_in_view_pk;
-    // For tracing purposes, if the view is out of sync with its base table
-    // and there exists a column which is not in base, its name is stored
-    // and added to debug messages.
-    std::optional<bytes> _column_missing_in_base = {};
-public:
-    const std::vector<column_id>& base_regular_columns_in_view_pk() const;
-    const std::vector<column_id>& base_static_columns_in_view_pk() const;
-    const schema_ptr& base_schema() const;
-
-    // Indicates if the view hase pk columns which are not part of the base
-    // pk, it seems that !base_non_pk_columns_in_view_pk.empty() is the same,
-    // but actually there are cases where we can compute this boolean without
-    // succeeding to reliably build the former.
-    const bool has_base_non_pk_columns_in_view_pk;
-
-    // If base_non_pk_columns_in_view_pk couldn't reliably be built, this base
-    // info can't be used for computing view updates, only for reading the materialized
-    // view.
-    const bool use_only_for_reads;
-
-    // A constructor for a base info that can facilitate reads and writes from the materialized view.
-    base_dependent_view_info(schema_ptr base_schema,
-            std::vector<column_id>&& base_regular_columns_in_view_pk,
-            std::vector<column_id>&& base_static_columns_in_view_pk);
-    // A constructor for a base info that can facilitate only reads from the materialized view.
-    base_dependent_view_info(bool has_base_non_pk_columns_in_view_pk, std::optional<bytes>&& column_missing_in_base);
-};
-
-// Immutable snapshot of view's base-schema-dependent part.
-using base_info_ptr = lw_shared_ptr<const base_dependent_view_info>;
-
-// Snapshot of the view schema and its base-schema-dependent part.
-struct view_and_base {
-    view_ptr view;
-    base_info_ptr base;
-};
-
 // An immutable representation of a clustering or static row of the base table.
 struct clustering_or_static_row {
 private:
@@ -207,18 +160,15 @@ class view_updates final {
    view_ptr _view;
    const view_info& _view_info;
    schema_ptr _base;
-    base_info_ptr _base_info;
+    const base_dependent_view_info& _base_info;
+    // Id of a regular base table column included in the view's PK, if any.
+    // Scylla views only allow one such column, alternator can have up to two.
+    std::vector<column_id> _base_regular_columns_in_view_pk;
+    std::vector<column_id> _base_static_columns_in_view_pk;
    std::unordered_map<partition_key, mutation_partition, partition_key::hashing, partition_key::equality> _updates;
    size_t _op_count = 0;
 public:
-    explicit view_updates(view_and_base vab)
-            : _view(std::move(vab.view))
-            , _view_info(*_view->view_info())
-            , _base(vab.base->base_schema())
-            , _base_info(vab.base)
-            , _updates(8, partition_key::hashing(*_view), partition_key::equality(*_view))
-    {
-    }
+    explicit view_updates(view_ptr v, schema_ptr b);

    future<> move_to(utils::chunked_vector<frozen_mutation_and_schema>& mutations);

@@ -240,10 +190,10 @@ private:
    };
    std::vector<view_row_entry> get_view_rows(const partition_key& base_key, const clustering_or_static_row& update, const std::optional<clustering_or_static_row>& existing, row_tombstone update_tomb);
    bool can_skip_view_updates(const clustering_or_static_row& update, const clustering_or_static_row& existing) const;
-    void create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now);
-    void delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now);
-    void do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now);
-    void update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now);
+    void create_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, gc_clock::time_point now, row_marker update_marker);
+    void delete_old_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts);
+    void do_delete_old_entry(const partition_key& base_key, const clustering_or_static_row& existing, const clustering_or_static_row& update, gc_clock::time_point now, api::timestamp_type deletion_ts);
+    void update_entry(data_dictionary::database db, const partition_key& base_key, const clustering_or_static_row& update, const clustering_or_static_row& existing, gc_clock::time_point now, row_marker update_marker);
    void update_entry_for_computed_column(const partition_key& base_key, const clustering_or_static_row& update, const std::optional<clustering_or_static_row>& existing, gc_clock::time_point now);
 };

@@ -308,7 +258,7 @@ view_update_builder make_view_update_builder(
        data_dictionary::database db,
        const replica::table& base_table,
        const schema_ptr& base_schema,
-        std::vector<view_and_base>&& views_to_update,
+        std::vector<view_ptr>&& views_to_update,
        mutation_reader&& updates,
        mutation_reader_opt&& existings,
        gc_clock::time_point now);
@@ -318,9 +268,9 @@ future<query::clustering_row_ranges> calculate_affected_clustering_ranges(
        const schema& base,
        const dht::decorated_key& key,
        const mutation_partition& mp,
-        const std::vector<view_and_base>& views);
+        const std::vector<view_ptr>& views);

-bool needs_static_row(const mutation_partition& mp, const std::vector<view_and_base>& views);
+bool needs_static_row(const mutation_partition& mp, const std::vector<view_ptr>& views);

 // Whether this node and shard should generate and send view updates for the given token.
 // Checks that the node is one of the replicas (not a pending replicas), and is ready for reads.
@@ -343,13 +293,6 @@ size_t memory_usage_of(const frozen_mutation_and_schema& mut);
 */
 void create_virtual_column(schema_builder& builder, const bytes& name, const data_type& type);

-/**
- * Converts a collection of view schema snapshots into a collection of
- * view_and_base objects, which are snapshots of both the view schema
- * and the base-schema-dependent part of view description.
- */
-std::vector<view_and_base> with_base_info_snapshot(std::vector<view_ptr>);
-
 }

 }
--- a/db/view/view_update_generator.cc
+++ b/db/view/view_update_generator.cc
@@ -331,7 +331,7 @@ static size_t memory_usage_of(const utils::chunked_vector<frozen_mutation_and_sc
 * @return a future that resolves when the updates have been acknowledged by the view replicas
 */
 future<> view_update_generator::populate_views(const replica::table& table,
-        std::vector<view_and_base> views,
+        std::vector<view_ptr> views,
        dht::token base_token,
        mutation_reader&& reader,
        gc_clock::time_point now) {
@@ -404,7 +404,7 @@ struct view_update_generation_timeout_exception : public seastar::timed_out_erro
 future<> view_update_generator::generate_and_propagate_view_updates(const replica::table& table,
        const schema_ptr& base,
        reader_permit permit,
-        std::vector<view_and_base>&& views,
+        std::vector<view_ptr>&& views,
        mutation&& m,
        mutation_reader_opt existings,
        tracing::trace_state_ptr tr_state,
--- a/db/view/view_update_generator.hh
+++ b/db/view/view_update_generator.hh
@@ -26,6 +26,7 @@ struct frozen_mutation_and_schema;
 class mutation;
 class reader_permit;
 class mutation_reader;
+class view_ptr;
 using mutation_reader_opt = optimized_optional<mutation_reader>;

 namespace dht {
@@ -51,7 +52,6 @@ using allow_hints = bool_class<allow_hints_tag>;
 namespace db::view {

 class stats;
-struct view_and_base;
 struct wait_for_all_updates_tag {};
 using wait_for_all_updates = bool_class<wait_for_all_updates_tag>;

@@ -104,7 +104,7 @@ public:

    // Reader's schema must be the same as the base schema of each of the views.
    future<> populate_views(const replica::table& base,
-            std::vector<view_and_base>,
+            std::vector<view_ptr>,
            dht::token base_token,
            mutation_reader&&,
            gc_clock::time_point);
@@ -112,7 +112,7 @@ public:
    future<> generate_and_propagate_view_updates(const replica::table& table,
            const schema_ptr& base,
            reader_permit permit,
-            std::vector<view_and_base>&& views,
+            std::vector<view_ptr>&& views,
            mutation&& m,
            mutation_reader_opt existings,
            tracing::trace_state_ptr tr_state,
--- a/dist/debian/control.template
+++ b/dist/debian/control.template
@@ -12,15 +12,16 @@ Architecture: any
 Description: Scylla database main configuration file
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
-Replaces: %{product}-server (<< 1.1)
+Replaces: %{product}-server (<< 1.1), scylla-enterprise-conf (<< 2025.1.0~)
 Conflicts: %{product}-server (<< 1.1)
+Breaks: scylla-enterprise-conf (<< 2025.1.0~)

 Package: %{product}-server
 Architecture: any
 Depends: ${misc:Depends}, %{product}-conf (= ${binary:Version}), %{product}-python3 (= ${binary:Version})
-Replaces: %{product}-tools (<<5.5)
-Breaks: %{product}-tools (<<5.5)
-Description: Scylla database server binaries 
+Replaces: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
+Breaks: %{product}-tools (<<5.5), scylla-enterprise-tools (<< 2024.2.0~), scylla-enterprise-server (<< 2025.1.0~)
+Description: Scylla database server binaries
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.

@@ -29,6 +30,8 @@ Section: debug
 Priority: extra
 Architecture: any
 Depends: %{product}-server (= ${binary:Version}), ${misc:Depends}
+Replaces: scylla-enterprise-server-dbg (<< 2025.1.0~)
+Breaks: scylla-enterprise-server-dbg (<< 2025.1.0~)
 Description: debugging symbols for %{product}-server
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
@@ -37,13 +40,17 @@ Description: debugging symbols for %{product}-server
 Package: %{product}-kernel-conf
 Architecture: any
 Depends: procps
+Replaces: scylla-enterprise-kernel-conf (<< 2025.1.0~)
+Breaks: scylla-enterprise-kernel-conf (<< 2025.1.0~)
 Description: Scylla kernel tuning configuration
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.

 Package: %{product}-node-exporter
 Architecture: any
+Replaces: scylla-enterprise-node-exporter (<< 2025.1.0~)
 Conflicts: prometheus-node-exporter
+Breaks: scylla-enterprise-node-exporter (<< 2025.1.0~)
 Description: Prometheus exporter for machine metrics
 Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.

@@ -54,6 +61,49 @@ Depends: %{product}-server (= ${binary:Version})
 , %{product}-kernel-conf (= ${binary:Version})
 , %{product}-node-exporter (= ${binary:Version})
 , %{product}-cqlsh (= ${binary:Version})
+Replaces: scylla-enterprise (<< 2025.1.0~)
+Breaks: scylla-enterprise (<< 2025.1.0~)
 Description: Scylla database metapackage
 Scylla is a highly scalable, eventually consistent, distributed,
 partitioned row DB.
+
+Package: scylla-enterprise-conf
+Depends: %{product}-conf (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise-server
+Depends: %{product}-server (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise
+Depends: %{product} (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise-kernel-conf
+Depends: %{product}-kernel-conf (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+
+Package: scylla-enterprise-node-exporter
+Depends: %{product}-node-exporter (= ${binary:Version})
+Architecture: all
+Priority: optional
+Section: oldlibs
+Description: transitional package
+ This is a transitional package. It can safely be removed.
+ 
--- a/dist/debian/debian/rules
+++ b/dist/debian/debian/rules
@@ -11,6 +11,8 @@ endif

 product := $(subst -server,,$(DEB_SOURCE))

+libreloc_list := $(shell find scylla/libreloc/ -maxdepth 1 -type f -not -name .*.hmac -and -not -name gnutls.config -printf '-X%f ')
+libexec_list := $(shell find scylla/libexec/ -maxdepth 1 -type f -not -name scylla -and -not -name iotune -printf '-X%f ')
 override_dh_auto_configure:

 override_dh_auto_build:
@@ -38,7 +40,7 @@ endif
 override_dh_strip:
 	# The binaries (ethtool...patchelf) don't pass dh_strip after going through patchelf. Since they are
 	# already stripped, nothing is lost if we exclude them, so that's what we do.
-	dh_strip -Xlibprotobuf.so.15 -Xld.so -Xethtool -Xgawk -Xgzip -Xhwloc-calc -Xhwloc-distrib -Xifconfig -Xlscpu -Xnetstat -Xpatchelf --dbg-package=$(product)-server-dbg
+	dh_strip $(libreloc_list) $(libexec_list) --dbg-package=$(product)-server-dbg
 	find $(CURDIR)/debian/$(product)-server-dbg/usr/lib/debug/.build-id/ -name "*.debug" -exec objcopy --decompress-debug-sections {} \;

 override_dh_makeshlibs:
--- a/dist/debian/debian/scylla-server.install
+++ b/dist/debian/debian/scylla-server.install
@@ -21,6 +21,7 @@ opt/scylladb/scyllatop/*
 opt/scylladb/scripts/libexec/*
 opt/scylladb/bin/*
 opt/scylladb/libreloc/*
+opt/scylladb/libreloc/.*.hmac
 opt/scylladb/libexec/*
 usr/lib/scylla/*
 var/lib/scylla/data
--- a/dist/docker/debian/build_docker.sh
+++ b/dist/docker/debian/build_docker.sh
@@ -83,6 +83,7 @@ fi
 packages=(
    "build/dist/$config/debian/${product}_$version-$release-1_$arch.deb"
    "build/dist/$config/debian/$product-server_$version-$release-1_$arch.deb"
+    "build/dist/$config/debian/$product-server-dbg_$version-$release-1_$arch.deb"
    "build/dist/$config/debian/$product-conf_$version-$release-1_$arch.deb"
    "build/dist/$config/debian/$product-kernel-conf_$version-$release-1_$arch.deb"
    "build/dist/$config/debian/$product-node-exporter_$version-$release-1_$arch.deb"
--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -13,7 +13,8 @@ Requires:       %{product}-python3 = %{version}-%{release}
 Requires:       %{product}-kernel-conf = %{version}-%{release}
 Requires:       %{product}-node-exporter = %{version}-%{release}
 Requires:       %{product}-cqlsh = %{version}-%{release}
-Obsoletes:      scylla-server < 1.1
+Provides:       scylla-enterprise = %{version}-%{release}
+Obsoletes:      scylla-enterprise < 2025.1.0

 %global _debugsource_template %{nil}
 %global _debuginfo_subpackages %{nil}
@@ -73,6 +74,10 @@ Requires:       %{product}-python3 = %{version}-%{release}
 AutoReqProv:    no
 Provides:       %{product}-tools:%{_bindir}/nodetool
 Provides:       %{product}-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
+Provides:       scylla-enterprise-tools:%{_bindir}/nodetool
+Provides:       scylla-enterprise-tools:%{_sysconfigdir}/bash_completion.d/nodetool-completion
+Provides:       scylla-enterprise-server = %{version}-%{release}
+Obsoletes:      scylla-enterprise-server < 2025.1.0

 %description server
 This package contains ScyllaDB server.
@@ -132,6 +137,7 @@ ln -sfT /etc/scylla /var/lib/scylla/conf
 /opt/scylladb/scyllatop/*
 /opt/scylladb/bin/*
 /opt/scylladb/libreloc/*
+/opt/scylladb/libreloc/.*.hmac
 /opt/scylladb/libexec/*
 %{_prefix}/lib/scylla/*
 %attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/
@@ -156,6 +162,8 @@ ln -sfT /etc/scylla /var/lib/scylla/conf
 Group:          Applications/Databases
 Summary:        Scylla configuration package
 Obsoletes:      scylla-server < 1.1
+Provides:       scylla-enterprise-conf = %{version}-%{release}
+Obsoletes:      scylla-enterprise-conf < 2025.1.0

 %description conf
 This package contains the main scylla configuration file.
@@ -176,6 +184,8 @@ Summary:        Scylla configuration package for the Linux kernel
 Requires:       kmod
 # tuned overwrites our sysctl settings
 Obsoletes:      tuned >= 2.11.0
+Provides:       scylla-enterprise-kernel-conf = %{version}-%{release}
+Obsoletes:      scylla-enterprise-kernel-conf < 2025.1.0

 %description kernel-conf
 This package contains Linux kernel configuration changes for the Scylla database.  Install this package
@@ -212,6 +222,8 @@ Group:          Applications/Databases
 Summary:        Prometheus exporter for machine metrics
 License:        ASL 2.0
 URL:            https://github.com/prometheus/node_exporter
+Provides:       scylla-enterprise-node-exporter = %{version}-%{release}
+Obsoletes:      scylla-enterprise-node-exporter < 2025.1.0

 %description node-exporter
 Prometheus exporter for machine metrics, written in Go with pluggable metric collectors.
--- a/docs/_ext/scylladb_swagger.py
+++ b/docs/_ext/scylladb_swagger.py
@@ -80,9 +80,12 @@ class SwaggerProcessor():

 def custom_pathto(app, docname, typ=None, anchor=None):
    current_doc = app.env.docname
-    current_version =  os.environ.get('SPHINX_MULTIVERSION_NAME', '')
+    current_version = os.environ.get('SPHINX_MULTIVERSION_NAME', '')
+    flag = os.environ.get('FLAG', 'manual')
+    
    if current_version:
-        return "/" + current_version + "/" + docname
+        prefix = "/manual/" if flag == 'manual' else "/"
+        return f"{prefix}{current_version}/{docname}"
    return relative_uri(app.builder.get_target_uri(current_doc), docname) + (('#' + anchor) if anchor else '')

 def setup(app):
--- a/docs/_utils/redirects.yaml
+++ b/docs/_utils/redirects.yaml
@@ -2,6 +2,11 @@
 #old path: new path


+# Remove reduntant pages
+
+/stable/getting-started/tutorials: https://docs.scylladb.com/stable/get-started/develop-with-scylladb/tutorials-example-projects.html
+/stable/contribute: https://github.com/scylladb/scylladb/blob/master/CONTRIBUTING.md
+
 # Remove an oudated article

 /stable/troubleshooting/nodetool-memory-read-timeout.html: /stable/troubleshooting/index.html
--- a/docs/alternator/compatibility.md
+++ b/docs/alternator/compatibility.md
@@ -187,8 +187,8 @@ ATTACH SERVICE_LEVEL oltp TO bob;
 Note that `alternator_enforce_authorization` has to be enabled in Scylla configuration.

 See [Authorization](##Authorization) section to learn more about roles and authorization.
-See <https://enterprise.docs.scylladb.com/stable/using-scylla/workload-prioritization.html>
-to read about **Workload Prioritization** in detail.
+See [Workload Prioritization](../features/workload-prioritization)
+to read about Workload Prioritization in detail.

 ## Metrics

@@ -272,12 +272,6 @@ behave the same in Alternator. However, there are a few features which we have
 not implemented yet. Unimplemented features return an error when used, so
 they should be easy to detect. Here is a list of these unimplemented features:

-* Currently in Alternator, a GSI (Global Secondary Index) can only be added
-  to a table at table creation time. DynamoDB allows adding a GSI (but not an
-  LSI) to an existing table using an UpdateTable operation, and similarly it
-  allows removing a GSI from a table.
-  <https://github.com/scylladb/scylla/issues/11567>
-
 * GSI (Global Secondary Index) and LSI (Local Secondary Index) may be
  configured to project only a subset of the base-table attributes to the
  index. This option is not yet respected by Alternator - all attributes
@@ -319,7 +313,7 @@ they should be easy to detect. Here is a list of these unimplemented features:
  RestoreTableToPointInTime

 * DynamoDB's encryption-at-rest settings are not supported. The Encryption-
-  at-rest feature is available in Scylla Enterprise, but needs to be
+  at-rest feature is available in ScyllaDB, but needs to be
  enabled and configured separately, not through the DynamoDB API.

 * No support for throughput accounting or capping. As mentioned above, the
@@ -378,3 +372,14 @@ they should be easy to detect. Here is a list of these unimplemented features:
  that can be used to forbid table deletion. This table option was added to
  DynamoDB in March 2023.
  <https://github.com/scylladb/scylla/issues/14482>
+
+* Alternator does not support the table option WarmThroughput that can be
+  used to check or guarantee that the database has "warmed" to handle a
+  particular throughput. This table option was added to DynamoDB in
+  November 2024.
+  <https://github.com/scylladb/scylladb/issues/21853>
+
+* Alternator does not support the table option MultiRegionConsistency
+  that can be used to achieve consistent reads on global (multi-region) tables.
+  This table option was added as a preview to DynamoDB in December 2024.
+  <https://github.com/scylladb/scylladb/issues/21852>
--- a/docs/alternator/new-apis.md
+++ b/docs/alternator/new-apis.md
@@ -144,3 +144,46 @@ If a certain data center or rack has no functional nodes, or doesn't even
 exist, an empty list (`[]`) is returned by the `/localnodes` request.
 A client should be prepared to consider expanding the node search to an
 entire data center, or other data centers, in that case.
+
+## Tablets
+"Tablets" are ScyllaDB's new approach to replicating data across a cluster.
+It replaces the older approach which was named "vnodes". Compared to vnodes,
+tablets are smaller pieces of tables that are easier to move between nodes,
+and allow for faster growing or shrinking of the cluster when needed.
+
+In this version, tablet support is incomplete and not all of the features
+which Alternator needs are supported with tablets. So currently, new
+Alternator tables default to using vnodes - not tablets.
+
+However, if you do want to create an Alternator table which uses tablets,
+you can do this by specifying the `experimental:initial_tablets` tag in
+the CreateTable operation. The value of this tag can be:
+
+* Any valid integer as the value of this tag enables tablets.
+  Typically the number "0" is used - which tells ScyllaDB to pick a reasonable
+  number of initial tablets. But any other number can be used, and this
+  number overrides the default choice of initial number of tablets.
+
+* Any non-integer value - e.g., the string "none" - creates the table
+  without tablets - i.e., using vnodes.
+
+The `experimental:initial_tablets` tag only has any effect while creating
+a new table with CreateTable - changing it later has no effect.
+
+Because the tablets support is incomplete, when tablets are enabled for an
+Alternator table, the following features will not work for this table:
+
+* The table must have one of the write isolation modes which does not
+  not use LWT, because it's not supported with tablets. The allowed write
+  isolation modes are `forbid_rmw` or `unsafe_rmw`.
+  Setting the isolation mode to `always_use_lwt` will succeed, but the writes
+  themselves will fail with an InternalServerError. At that point you can
+  still change the write isolation mode of the table to a supported mode.
+  See <https://github.com/scylladb/scylladb/issues/18068>.
+
+* Enabling TTL with UpdateTableToLive doesn't work (results in an error).
+  See <https://github.com/scylladb/scylla/issues/16567>.
+
+* Enabling Streams with CreateTable or UpdateTable doesn't work
+  (results in an error).
+  See <https://github.com/scylladb/scylla/issues/16317>.
--- a/docs/architecture/compaction/compaction-strategies.rst
+++ b/docs/architecture/compaction/compaction-strategies.rst
@@ -70,8 +70,6 @@ Set the parameters for :ref:`Leveled Compaction <leveled-compaction-strategy-lcs
 Incremental Compaction Strategy (ICS)
 =====================================

-.. versionadded:: 2019.1.4 Scylla Enterprise
-
 ICS principles of operation are similar to those of STCS, merely replacing the increasingly larger SSTables in each tier, by increasingly longer SSTable runs, modeled after LCS runs, but using larger fragment size of 1 GB, by default.

 Compaction is triggered when there are two or more runs of roughly the same size. These runs are incrementally compacted with each other, producing a new SSTable run, while incrementally releasing space as soon as each SSTable in the input run is processed and compacted. This method eliminates the high temporary space amplification problem of STCS by limiting the overhead to twice the (constant) fragment size, per shard.
--- a/docs/architecture/index.rst
+++ b/docs/architecture/index.rst
@@ -12,6 +12,7 @@ ScyllaDB Architecture
   SSTable <sstable/index/>
   Compaction Strategies <compaction/compaction-strategies>
   Raft Consensus Algorithm in ScyllaDB </architecture/raft>
+   Zero-token Nodes </architecture/zero-token-nodes>
   
              
 * :doc:`Data Distribution with Tablets </architecture/tablets/>` - Tablets in ScyllaDB
@@ -22,5 +23,6 @@ ScyllaDB Architecture
 * :doc:`SSTable </architecture/sstable/index/>` - ScyllaDB SSTable 2.0 and 3.0 Format Information
 * :doc:`Compaction Strategies </architecture/compaction/compaction-strategies>` - High-level analysis of different compaction strategies
 * :doc:`Raft Consensus Algorithm in ScyllaDB </architecture/raft>` - Overview of how Raft is implemented in ScyllaDB.
+* :doc:`Zero-token Nodes </architecture/zero-token-nodes>` - Nodes that do not replicate any data.

 Learn more about these topics in the `ScyllaDB University: Architecture lesson <https://university.scylladb.com/courses/scylla-essentials-overview/lessons/architecture/>`_.
--- a/docs/architecture/sstable/_common/sstable_what_is.rst
+++ b/docs/architecture/sstable/_common/sstable_what_is.rst
@@ -15,7 +15,7 @@ SSTable Version Support
     - ScyllaDB Enterprise Version
     - ScyllaDB Open Source Version
   * - 3.x ('me')
-     - 2022.2
+     - 2022.2 and above
     - 5.1 and above
   * - 3.x ('md')
     - 2021.1
--- a/docs/architecture/sstable/index.rst
+++ b/docs/architecture/sstable/index.rst
@@ -9,11 +9,7 @@ ScyllaDB SSTable Format

 .. include:: _common/sstable_what_is.rst

-* In ScyllaDB 6.0 and above, *me* format is enabled by default.
-
-* In ScyllaDB Enterprise 2021.1, ScyllaDB 4.3 and above, *md* format is enabled by default.
-  
-* In ScyllaDB 3.1 and above, *mc* format is enabled by default. 
+In ScyllaDB 6.0 and above, *me* format is enabled by default.

 For more information on each of the SSTable formats, see below:

--- a/docs/architecture/sstable/sstable3/index.rst
+++ b/docs/architecture/sstable/sstable3/index.rst
@@ -12,17 +12,7 @@ ScyllaDB SSTable - 3.x

 .. include:: ../_common/sstable_what_is.rst

-* In ScyllaDB 6.0 and above, the ``me`` format is mandatory, and ``md`` format is used only when upgrading from an existing cluster using ``md``. The ``sstable_format`` parameter is ignored if it is set to ``md``.
-* In ScyllaDB 5.1 and above, the ``me`` format is enabled by default.
-* In ScyllaDB 4.3 to 5.0, the ``md`` format is enabled by default.
-* In ScyllaDB 3.1 to 4.2, the ``mc`` format is enabled by default. 
-* In ScyllaDB 3.0, the ``mc`` format is disabled by default. You can enable it by adding the ``enable_sstables_mc_format`` parameter set to ``true`` in the ``scylla.yaml`` file. For example: 
-    
-    .. code-block:: shell
-    
-       enable_sstables_mc_format: true
-
-.. REMOVE IN FUTURE VERSIONS - Remove the note above in version 5.2.
+In ScyllaDB 6.0 and above, the ``me`` format is mandatory, and ``md`` format is used only when upgrading from an existing cluster using ``md``. The ``sstable_format`` parameter is ignored if it is set to ``md``.

 Additional Information
 -------------------------
--- a/docs/architecture/tablets.rst
+++ b/docs/architecture/tablets.rst
@@ -75,15 +75,7 @@ to a new node.
 File-based Streaming
 ========================

-:label-tip:`ScyllaDB Enterprise`
-
-File-based streaming is a ScyllaDB Enterprise-only feature that optimizes
-tablet migration.
-
-In ScyllaDB Open Source, migrating tablets is performed by streaming mutation
-fragments, which involves deserializing SSTable files into mutation fragments
-and re-serializing them back into SSTables on the other node.
-In ScyllaDB Enterprise, migrating tablets is performed by streaming entire
+Migrating tablets is performed by streaming entire
 SStables, which does not require (de)serializing or processing mutation fragments.
 As a result, less data is streamed over the network, and less CPU is consumed,
 especially for data models that contain small cells.
@@ -98,15 +90,15 @@ Enabling Tablets

 ScyllaDB now uses tablets by default for data distribution.
 Enabling tablets by default when creating new keyspaces is
-controlled by the :confval:`enable_tablets` option. However, tablets only work if
+controlled by the :confval:`tablets_mode_for_new_keyspaces` option. However, tablets only work if
 supported on all nodes within the cluster.

 When creating a new keyspace with tablets enabled by default, you can still opt-out
-on a per-keyspace basis. The recommended ``NetworkTopologyStrategy`` for keyspaces
-remains *required* even if tablets are disabled.
+on a per-keyspace basis using ``CREATE KEYSPACE <ks> WITH tablets = {'enabled': false}``,
+unless the :confval:`tablets_mode_for_new_keyspaces` option is set to ``enforced``.

-You can create a keyspace with tablets
-disabled with the ``tablets = {'enabled': false}`` option:
+Note: The recommended ``NetworkTopologyStrategy`` for keyspaces
+remains *required* even if tablets are disabled.

 .. code:: cql

@@ -143,19 +135,31 @@ You can create a keyspace with tablets enabled with the ``tablets = {'enabled':
    the keyspace schema with ``tablets = { 'enabled': false }`` or 
    ``tablets = { 'enabled': true }``.

+.. _tablets-limitations:
+
 Limitations and Unsupported Features
 --------------------------------------

+.. warning::
+
+    If a keyspace has tablets enabled, it must remain :term:`RF-rack-valid <RF-rack-valid keyspace>`
+    throughout its lifetime. Failing to keep that invariant satisfied may result in data inconsistencies,
+    performance problems, or other issues.
+
 The following ScyllaDB features are not supported if a keyspace has tablets
-enabled:
+enabled. If you plan to use any of the features listed below, CREATE your keyspace
+:ref:`with tablets disabled <tablets-enable-tablets>`.

 * Counters
 * Change Data Capture (CDC)
 * Lightweight Transactions (LWT)
 * Alternator (as it uses LWT)
+* Materialized Views (MV) ``*``
+* Secondary indexes (SI, as it depends on MV) ``*``

-If you plan to use any of the above features, CREATE your keyspace
-:ref:`with tablets disabled <tablets-enable-tablets>`.
+``*`` You can enable experimental support for MV and SI using
+the ``--experimental-features=views-with-tablets`` configuration option. 
+See :ref:`Views with tablets <admin-views-with-tablets>` for details.

 Resharding in keyspaces with tablets enabled has the following limitations:

--- a/docs/architecture/zero-token-nodes.rst
+++ b/docs/architecture/zero-token-nodes.rst
@@ -0,0 +1,28 @@
+=========================
+Zero-token Nodes
+=========================
+
+By default, all nodes in a cluster own a set of token ranges and are used to
+replicate data. In certain circumstances, you may choose to add a node that
+doesn't own any token. Such nodes are referred to as zero-token nodes. They
+do not have a copy of the data but only participate in Raft quorum voting.
+
+To configure a zero-token node, set the ``join_ring`` parameter to ``false``.
+
+You can use zero-token nodes in multi-DC deployments to reduce the risk of
+losing a quorum of nodes.
+See :doc:`Preventing Quorum Loss in Symmetrical Multi-DC Clusters </operating-scylla/procedures/cluster-management/arbiter-dc>` for details.
+
+Note that:
+
+* Zero-token nodes are ignored by drivers, so there is no need to change
+  the load balancing policy on the clients after adding zero-token nodes
+  to the cluster.
+* Zero-token nodes never store replicated data, so running ``nodetool rebuild``,
+  ``nodetool repair``, and ``nodetool cleanup`` can be skipped as it does not
+  affect zero-token nodes.
+* Racks consisting solely of zero-token nodes are not taken into consideration
+  when deciding whether a keyspace is :term:`RF-rack-valid <RF-rack-valid keyspace>`.
+  However, an RF-rack-valid keyspace must have the replication factor equal to 0
+  in an :doc:`arbiter DC </operating-scylla/procedures/cluster-management/arbiter-dc>`.
+  Otherwise, it is RF-rack-invalid.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -67,7 +67,7 @@ extensions = [
 ]

 # The suffix(es) of source filenames.
-source_suffix = ['.rst']
+source_suffix = ['.rst', '.md']

 # The master toctree document.
 master_doc = "index"
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -1,31 +0,0 @@
-Contribute to ScyllaDB
-=======================
-
-Thank you for your interest in making ScyllaDB better!
-We appreciate your help and look forward to welcoming you to the ScyllaDB Community.
-There are two ways you can contribute:
-
-* Send a patch to the ScyllaDB source code
-* Write documentation for ScyllaDB Docs
-
-
-Contribute to ScyllaDB's Source Code
------------------------------------
-ScyllaDB developers use patches and email to share and discuss changes.
-Setting up can take a little time, but once you have done it the first time, it’s easy.
-
-The basic steps are:
-
-* Join the ScyllaDB community
-* Create a Git branch to work on
-* Commit your work with clear commit messages and sign-offs.
-* Send a PR or use ``git format-patch`` and ``git send-email`` to send to the list
-
-
-The entire process is `documented here <https://github.com/scylladb/scylla/blob/master/CONTRIBUTING.md>`_.
-
-Contribute to ScyllaDB Docs
---------------------------
-
-Each ScyllaDB project has accompanying documentation. For information about contributing documentation to a specific ScyllaDB project, refer to the README file for the individual project.
-For general information or to contribute to the ScyllaDB Sphinx theme, read the `Contributor's Guide <https://sphinx-theme.scylladb.com/stable/contribute/>`_.
--- a/docs/cql/_common/tablets-default.rst
+++ b/docs/cql/_common/tablets-default.rst
@@ -1,3 +0,0 @@
-By default, a keyspace is created with tablets enabled. The ``tablets`` option 
-is used to opt out a keyspace from tablets-based distribution; see :ref:`Enabling Tablets <tablets-enable-tablets>`
-for details.
--- a/docs/cql/compaction.rst
+++ b/docs/cql/compaction.rst
@@ -170,8 +170,6 @@ LCS options
 Incremental Compaction Strategy (ICS)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-.. versionadded:: 2019.1.4 Scylla Enterprise
-
 When using ICS, SSTable runs are put in different buckets depending on their size.
 When an SSTable run is bucketed, the average size of the runs in the bucket is compared to the new run, as well as the ``bucket_high`` and ``bucket_low`` levels.

--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -203,18 +203,6 @@ An example that excludes a datacenter while using ``replication_factor``::
    DESCRIBE KEYSPACE excalibur
        CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true;

-
-
-.. only:: opensource
-  
-  Keyspace storage options :label-caution:`Experimental`
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-  By default, SStables of a keyspace are stored locally.
-  As an alternative, you can configure your keyspace to be stored
-  on Amazon S3 or another S3-compatible object store.
-  See :ref:`Keyspace storage options <admin-keyspace-storage-options>` for details.
-
 .. _tablets:

 The ``tablets`` property
@@ -232,7 +220,15 @@ sub-option                             type  description
 ``'initial'``                          int   The number of tablets to start with
 ===================================== ====== =============================================

-.. scylladb_include_flag:: tablets-default.rst
+By default, a keyspace is created with tablets enabled. You can use the ``tablets`` option 
+to opt out a keyspace from tablets-based distribution.
+
+You may want to opt out if you plan to use features that are not supported for keyspaces
+with tablets enabled. Keyspaces using tablets must also remain :term:`RF-rack-valid <RF-rack-valid keyspace>`
+throughout their lifetime. See :ref:`Limitations and Unsupported Features <tablets-limitations>`
+for details.
+
+**The ``initial`` sub-option (deprecated)**

 A good rule of thumb to calculate initial tablets is to divide the expected total storage used
 by tables in this keyspace by (``replication_factor`` * 5GB). For example, if you expect a 30TB
@@ -253,6 +249,14 @@ An example that creates a keyspace with 2048 tablets per table::

 See :doc:`Data Distribution with Tablets </architecture/tablets>` for more information about tablets.

+Keyspace storage options :label-caution:`Experimental`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default, SStables of a keyspace are stored locally.
+As an alternative, you can configure your keyspace to be stored
+on Amazon S3 or another S3-compatible object store.
+See :ref:`Keyspace storage options <admin-keyspace-storage-options>` for details.
+
 .. _use-statement:        
        
 USE
@@ -285,8 +289,8 @@ For instance::

 The supported options are the same as :ref:`creating a keyspace <create-keyspace-statement>`.

-ALTER KEYSPACE with Tablets :label-caution:`Experimental`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ALTER KEYSPACE with Tablets 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 Modifying a keyspace with tablets enabled is possible and doesn't require any special CQL syntax. However, there are some limitations:

@@ -295,6 +299,7 @@ Modifying a keyspace with tablets enabled is possible and doesn't require any sp
 - If there's any other ongoing global topology operation, executing the ``ALTER`` statement will fail (with an explicit and specific error) and needs to be repeated.
 - The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
 - The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
+- The ``ALTER`` statement will fail if it would make the keyspace :term:`RF-rack-invalid <RF-rack-valid keyspace>`.

 .. _drop-keyspace-statement:

--- a/docs/cql/types.rst
+++ b/docs/cql/types.rst
@@ -481,7 +481,8 @@ Creating a new user-defined type is done using a ``CREATE TYPE`` statement defin
   field_definition: `identifier` `cql_type`

 A UDT has a name (``udt_name``), which is used to declare columns of that type and is a set of named and typed fields. The ``udt_name`` can be any
-type, including collections or other UDTs. UDTs and collections inside collections must always be frozen (no matter which version of ScyllaDB you are using). 
+type, including collections or other UDTs.
+Similar to collections, a UDT can be frozen or non-frozen. A frozen UDT is immutable and can only be updated as a whole. Nested UDTs or UDTs used in keys must always be frozen.

 For example::

@@ -506,26 +507,15 @@ For example::

  CREATE TABLE superheroes (
       name frozen<full_name> PRIMARY KEY,
-       home frozen<address>
+       home address
  );

 .. note::

   - Attempting to create an already existing type will result in an error unless the ``IF NOT EXISTS`` option is used. If it is used, the statement will be a no-op if the type already exists.
   - A type is intrinsically bound to the keyspace in which it is created and can only be used in that keyspace. At creation, if the type name is prefixed by a keyspace name, it is created in that keyspace. Otherwise, it is created in the current keyspace.
-   - As of ScyllaDB Open Source 3.2, UDTs not inside collections do not have to be frozen, but in all versions prior to ScyllaDB Open Source 3.2, and in all ScyllaDB Enterprise versions, UDTs **must** be frozen. 


-A non-frozen UDT example with ScyllaDB Open Source 3.2 and higher::
-
-   CREATE TYPE ut (a int, b int);
-   CREATE TABLE cf (a int primary key, b ut);
-
-Same UDT in versions prior::
-
-   CREATE TYPE ut (a int, b int);
-   CREATE TABLE cf (a int primary key, b frozen<ut>);
-
 UDT literals
 ~~~~~~~~~~~~

--- a/docs/dev/system_keyspace.md
+++ b/docs/dev/system_keyspace.md
@@ -121,6 +121,29 @@ SELECT * FROM system.large_cells;
 SELECT * FROM system.large_cells WHERE keyspace_name = 'ks1' and table_name = 'standard1';
 ~~~

+## system.corrupt\_data
+
+Stores data found to be corrupt during internal operations. This data cannot be written to sstables because then it will be spread around by repair and compaction. It will also possibly cause failures in sstable parsing.
+At the same time, the data should be kept around so that it can be inspected and possibly restored by the database operator.
+This table is used to store such data. Data is saved at the mutation-fragment level.
+
+Schema:
+```cql
+CREATE TABLE system.corrupt_data (
+    keyspace_name text,              # keyspace name of source table
+    table_name text,                 # table name of source table
+    id timeuuid,                     # id of the corrupt mutation fragment, assigned by the database when the corrupt data entry is created
+    partition_key blob,              # partition key of partition in the source table, can be incomplete or null due to corruption
+    clustering_key text,             # clustering key of mutation-fragment in the source table, can be null for some mutation-fragment kinds, can be incomplete or null due to corruption
+    mutation_fragment_kind text,     # kind of the mutation fragment, one of 'partition start', 'partition end', 'static row', 'clustering row', 'range tombstone change'; only the latter two can have clustering_key set
+    frozen_mutation_fragment blob,   # the serialized mutation fragment itself
+    origin text,                     # the name of the process that found the corruption, e.g. 'sstable-writer'
+    sstable_name text,               # the name of the sstable that contains the corrupt data, if known; sstable is not kept around, it could be compacted or deleted
+    PRIMARY KEY ((keyspace_name, table_name), id)
+) WITH CLUSTERING ORDER BY (id ASC)
+    AND gc_grace_seconds = 0;
+```
+
 ## system.raft

 Holds information about Raft
@@ -225,7 +248,9 @@ CREATE TYPE system.tablet_task_info (
    tablet_task_id uuid,
    request_time timestamp,
    sched_nr bigint,
-    sched_time timestamp
+    sched_time timestamp,
+    repair_hosts_filter text,
+    repair_dcs_filter text,
 )
 ~~~

@@ -255,6 +280,8 @@ Only tables which use tablet-based replication strategy have an entry here.
  * `request_time` - The time the request is created.
  * `sched_nr` - Number of times the request has been scheduled by the repair scheduler.
  * `sched_time` - The time the request has been scheduled by the repair scheduler.
+  * `repair_hosts_filter` - Repair replicas listed in the comma-separated host_id list.
+  * `repair_dcs_filter` - Repair replicas listed in the comma-separated DC list.

 `repair_scheduler_config` contains configuration for the repair scheduler. It contains the following values:
  * `auto_repair_enabled` - When set to true, auto repair is enabled. Disabled by default.
--- a/docs/dev/task_manager.md
+++ b/docs/dev/task_manager.md
@@ -64,18 +64,20 @@ Briefly:
 - `/task_manager/list_module_tasks/{module}` -
        lists (by default non-internal) tasks in the module;
 - `/task_manager/task_status/{task_id}` -
-        gets the task's status, unregisters the task if it's finished;
+        gets the task's status;
 - `/task_manager/abort_task/{task_id}` -
        aborts the task if it's abortable;
 - `/task_manager/wait_task/{task_id}` -
        waits for the task and gets its status;
 - `/task_manager/task_status_recursive/{task_id}` -
        gets statuses of the task and all its descendants in BFS
-        order, unregisters the task;
+        order;
 - `/task_manager/ttl` -
        gets or sets new ttl.
 - `/task_manager/user_ttl` -
        gets or sets new user ttl.
+- `/task_manager/drain/{module}` -
+        unregisters all finished local tasks in the module.

 # Virtual tasks

--- a/docs/dev/topology-over-raft.md
+++ b/docs/dev/topology-over-raft.md
@@ -124,6 +124,9 @@ Additionally to specific node states, there entire topology can also be in a tra
    it from group 0. We also use this state to rollback a failed bootstrap or decommission.
 - `rollback_to_normal` - the decommission or removenode operation failed. Rollback the operation by
    moving the node we tried to decommission/remove back to the normal state.
+- `lock` - the topology stays in this state until externally changed (to null state), preventing topology
+    requests from starting. Intended to be used in tests which want to prevent internally-triggered topology
+    operations during the test.

 When a node bootstraps, we create new tokens for it and a new CDC generation
 and enter the `commit_cdc_generation` state. Once the generation is committed,
@@ -239,6 +242,8 @@ globally driven by the topology change coordinator and serialized per-tablet. Tr

 - rebuild - new tablet replica is rebuilt from existing ones, possibly dropping old replica afterwards (on node removal or replace)

+- rebuild_v2 - same as rebuild, but repairs a tablet and streams data from one replica, instead of streaming data from all replicas
+
 - repair - tablet replicas are repaired

 Each tablet has its own state machine for keeping state of transition stored in group0 which is part of the tablet state. It involves
@@ -329,6 +334,32 @@ stateDiagram-v2
 ```

 The above state transition state machine is the same for those tablet transition kinds: migration, intranode_migration, rebuild.
+
+In rebuild_v2 transition kind streaming stage is followed by the rebuild_repair stage:
+
+```mermaid
+stateDiagram-v2
+    state if_state <<choice>>
+    [*] --> allow_write_both_read_old
+    allow_write_both_read_old --> write_both_read_old
+    write_both_read_old --> rebuild_repair
+    rebuild_repair --> streaming
+    streaming --> write_both_read_new
+    write_both_read_new --> use_new
+    use_new --> cleanup
+    cleanup --> end_migration
+    end_migration --> [*]
+    allow_write_both_read_old --> cleanup_target: error
+    write_both_read_old --> cleanup_target: error
+    rebuild_repair --> cleanup_target: error
+    streaming --> cleanup_target: error
+    write_both_read_new --> if_state: error
+    if_state --> use_new: more new replicas
+    if_state --> cleanup_target: more old replicas
+    cleanup_target --> revert_migration
+    revert_migration --> [*]
+```
+
 The repair tablet transition kind is different. It transits only to the repair and end_repair stage because no token ownership is changed.

 The behavioral difference between "migration" and "intranode_migration" transitions is in the way "streaming" stage
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -193,6 +193,8 @@ ScyllaDB comes with its own version of the Apache Cassandra client tools, in the

 We recommend uninstalling Apache Cassandra before installing :code:`scylla-tools`.

+ .. TODO Update the example below then a patch release for 2025.1 is available
+
 .. _faq-pinning:

 Can I install or upgrade to a patch release other than latest on Debian or Ubuntu?
--- a/docs/features/cdc/cdc-intro.rst
+++ b/docs/features/cdc/cdc-intro.rst
@@ -67,9 +67,6 @@ You can enable CDC when creating or altering a table using the ``cdc`` option, f

    CREATE TABLE ks.t (pk int, ck int, v int, PRIMARY KEY (pk, ck, v)) WITH cdc = {'enabled':true};

-.. note::
-   If you enabled CDC and later decide to disable it, you need to **stop all writes** to the base table before issuing the ``ALTER TABLE ... WITH cdc = {'enabled':false};`` command.
-
 .. include:: /features/cdc/_common/cdc-params.rst

 Using CDC with Applications
--- a/docs/features/local-secondary-indexes.rst
+++ b/docs/features/local-secondary-indexes.rst
@@ -6,9 +6,9 @@ Local Secondary Indexes is an enhancement to :doc:`Global Secondary Indexes <sec
 which allows ScyllaDB to optimize workloads where the partition key of the base table and the index are the same key.

 .. note::
-   As of ScyllaDB Open Source 4.0, updates for local secondary indexes are performed **synchronously**. When updates are synchronous, the client acknowledges the write
+   Updates for local secondary indexes are performed **synchronously**. When updates are synchronous, the client acknowledges the write
   operation only **after both** the base table modification **and** the view update are written.
-   This is important to note because the process is no longer asynchronous and the modifications are immediately reflected in the index.
+   This is important to note because the process is no longer asynchronous, and the modifications are immediately reflected in the index.
   In addition, if the view update fails, the client receives a write error.

 Example:
--- a/docs/features/workload-prioritization.rst
+++ b/docs/features/workload-prioritization.rst
@@ -18,7 +18,7 @@ For example, consider the following two workloads:
  - Slow queries 
  - In essence - Latency agnostic

-Using Service Level CQL commands, database administrators (working on Scylla Enterprise) can set different workload prioritization levels (levels of service) for each workload without sacrificing latency or throughput.
+Using Service Level CQL commands, database administrators (working on ScyllaDB) can set different workload prioritization levels (levels of service) for each workload without sacrificing latency or throughput.
 By assigning each service level to the different roles within your organization, DBAs ensure that each role_ receives the level of service the role requires.

 .. _`role` : /operating-scylla/security/rbac_usecase/
@@ -425,7 +425,7 @@ In order for workload prioritization to take effect, application users need to b

 Limits
 ======
-Scylla Enterprise is limited to 8 service levels, including the default one; this means you can create up to 7 service levels.
+ScyllaDB is limited to 8 service levels, including the default one; this means you can create up to 7 service levels.


 Additional References
--- a/docs/getting-started/_common/os-support-info.rst
+++ b/docs/getting-started/_common/os-support-info.rst
@@ -1,21 +0,0 @@
-You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
-
-+----------------------------+--------------------+-------+---------------+
-| Linux Distributions        |Ubuntu              | Debian|Rocky / CentOS |
-|                            |                    |       |/ RHEL         |
-+----------------------------+------+------+------+-------+-------+-------+
-| ScyllaDB Version / Version |20.04 |22.04 |24.04 |  11   |   8   |   9   |
-+============================+======+======+======+=======+=======+=======+
-|   6.2                      | |v|  | |v|  | |v|  | |v|   | |v|   | |v|   |
-+----------------------------+------+------+------+-------+-------+-------+
-|   6.1                      | |v|  | |v|  | |v|  | |v|   | |v|   | |v|   |
-+----------------------------+------+------+------+-------+-------+-------+
-
-* The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
-* All releases are available as a Docker container and EC2 AMI, GCP, and Azure images. 
-
-Supported Architecture
-----------------------------
-
-ScyllaDB Open Source supports x86_64 for all versions and AArch64 starting from ScyllaDB 4.6 and nightly build. 
-In particular, aarch64 support includes AWS EC2 Graviton.
--- a/Show More
+++ b/Show More