re-spin: 5.2.11

Update ./tools/jmx and ./tools/java submodules
* tools/jmx 88d9bdc...bc4f8ea (1): > Merge "scylla-apiclient: update several Java dependencies" from Piotr Grabowski * tools/java f8f556d802...e716e1bd1d (1): > Merge 'build: update several dependencies' from Piotr Grabowski Update build dependencies which were flagged by security scanners. Refs: scylladb/scylla-jmx#220 Refs: scylladb/scylla-tools-java#351 Closes #16150
2023-11-26 16:17:58 +02:00 · 2023-11-23 15:29:00 +02:00 · 2023-11-22 14:22:13 +02:00 · 2023-11-21 01:29:28 +01:00 · 2023-11-21 01:29:28 +01:00 · 2023-11-21 01:29:28 +01:00
380 changed files with 13439 additions and 7264 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/2
+++ b/2
@@ -72,7 +72,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=5.2.0-dev
+VERSION=5.2.11

 if test -f version
 then
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
@@ -88,17 +88,20 @@ json::json_return_type make_streamed(rjson::value&& value) {
        // move objects to coroutine frame.
        auto los = std::move(os);
        auto lrs = std::move(rs);
+        std::exception_ptr ex;
        try {
            co_await rjson::print(*lrs, los);
-            co_await los.flush();
-            co_await los.close();
        } catch (...) {
            // at this point, we cannot really do anything. HTTP headers and return code are
            // already written, and quite potentially a portion of the content data.
            // just log + rethrow. It is probably better the HTTP server closes connection
            // abruptly or something...
-            elogger.error("Unhandled exception in data streaming: {}", std::current_exception());
-            throw;
+            ex = std::current_exception();
+            elogger.error("Exception during streaming HTTP response: {}", ex);
+        }
+        co_await los.close();
+        if (ex) {
+            co_await coroutine::return_exception_ptr(std::move(ex));
        }
        co_return;
    };
@@ -2358,21 +2361,22 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
    return item;
 }

-std::vector<rjson::value> executor::describe_multi_item(schema_ptr schema,
-        const query::partition_slice& slice,
-        const cql3::selection::selection& selection,
-        const query::result& query_result,
-        const std::optional<attrs_to_get>& attrs_to_get) {
-    cql3::selection::result_set_builder builder(selection, gc_clock::now());
-    query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
+future<std::vector<rjson::value>> executor::describe_multi_item(schema_ptr schema,
+        const query::partition_slice&& slice,
+        shared_ptr<cql3::selection::selection> selection,
+        foreign_ptr<lw_shared_ptr<query::result>> query_result,
+        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get) {
+    cql3::selection::result_set_builder builder(*selection, gc_clock::now());
+    query::result_view::consume(*query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
    auto result_set = builder.build();
    std::vector<rjson::value> ret;
    for (auto& result_row : result_set->rows()) {
        rjson::value item = rjson::empty_object();
-        describe_single_item(selection, result_row, attrs_to_get, item);
+        describe_single_item(*selection, result_row, *attrs_to_get, item);
        ret.push_back(std::move(item));
+        co_await coroutine::maybe_yield();
    }
-    return ret;
+    co_return ret;
 }

 static bool check_needs_read_before_write(const parsed::value& v) {
@@ -3254,8 +3258,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
                    service::storage_proxy::coordinator_query_options(executor::default_timeout(), permit, client_state, trace_state)).then(
                    [schema = rs.schema, partition_slice = std::move(partition_slice), selection = std::move(selection), attrs_to_get = rs.attrs_to_get] (service::storage_proxy::coordinator_query_result qr) mutable {
                utils::get_local_injector().inject("alternator_batch_get_item", [] { throw std::runtime_error("batch_get_item injection"); });
-                std::vector<rjson::value> jsons = describe_multi_item(schema, partition_slice, *selection, *qr.query_result, *attrs_to_get);
-                return make_ready_future<std::vector<rjson::value>>(std::move(jsons));
+                return describe_multi_item(std::move(schema), std::move(partition_slice), std::move(selection), std::move(qr.query_result), std::move(attrs_to_get));
            });
            response_futures.push_back(std::move(f));
        }
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -222,11 +222,11 @@ public:
        const query::result&,
        const std::optional<attrs_to_get>&);

-    static std::vector<rjson::value> describe_multi_item(schema_ptr schema,
-        const query::partition_slice& slice,
-        const cql3::selection::selection& selection,
-        const query::result& query_result,
-        const std::optional<attrs_to_get>& attrs_to_get);
+    static future<std::vector<rjson::value>> describe_multi_item(schema_ptr schema,
+        const query::partition_slice&& slice,
+        shared_ptr<cql3::selection::selection> selection,
+        foreign_ptr<lw_shared_ptr<query::result>> query_result,
+        shared_ptr<const std::optional<attrs_to_get>> attrs_to_get);

    static void describe_single_item(const cql3::selection::selection&,
        const std::vector<bytes_opt>&,
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -145,19 +145,24 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
    auto table = find_table(_proxy, request);
    auto db = _proxy.data_dictionary();
    auto cfs = db.get_tables();
-    auto i = cfs.begin();
-    auto e = cfs.end();

    if (limit < 1) {
        throw api_error::validation("Limit must be 1 or more");
    }

-    // TODO: the unordered_map here is not really well suited for partial
-    // querying - we're sorting on local hash order, and creating a table
-    // between queries may or may not miss info. But that should be rare,
-    // and we can probably expect this to be a single call.
+    // # 12601 (maybe?) - sort the set of tables on ID. This should ensure we never
+    // generate duplicates in a paged listing here. Can obviously miss things if they 
+    // are added between paged calls and end up with a "smaller" UUID/ARN, but that 
+    // is to be expected.
+    std::sort(cfs.begin(), cfs.end(), [](const data_dictionary::table& t1, const data_dictionary::table& t2) {
+        return t1.schema()->id().uuid() < t2.schema()->id().uuid();
+    });
+
+    auto i = cfs.begin();
+    auto e = cfs.end();
+
    if (streams_start) {
-        i = std::find_if(i, e, [&](data_dictionary::table t) {
+        i = std::find_if(i, e, [&](const data_dictionary::table& t) {
            return t.schema()->id().uuid() == streams_start
                && cdc::get_base_table(db.real_database(), *t.schema())
                && is_alternator_keyspace(t.schema()->ks_name())
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -1946,7 +1946,7 @@
         "operations":[
            {
               "method":"POST",
-               "summary":"Reset local schema",
+               "summary":"Forces this node to recalculate versions of schema objects.",
               "type":"void",
               "nickname":"reset_local_schema",
               "produces":[
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -17,36 +17,42 @@ namespace fd = httpd::failure_detector_json;

 void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
-        std::vector<fd::endpoint_state> res;
-        for (auto i : g.get_endpoint_states()) {
-            fd::endpoint_state val;
-            val.addrs = boost::lexical_cast<std::string>(i.first);
-            val.is_alive = i.second.is_alive();
-            val.generation = i.second.get_heart_beat_state().get_generation();
-            val.version = i.second.get_heart_beat_state().get_heart_beat_version();
-            val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
-            for (auto a : i.second.get_application_state_map()) {
-                fd::version_value version_val;
-                // We return the enum index and not it's name to stay compatible to origin
-                // method that the state index are static but the name can be changed.
-                version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
-                version_val.value = a.second.value;
-                version_val.version = a.second.version;
-                val.application_state.push(version_val);
+        return g.container().invoke_on(0, [] (gms::gossiper& g) {
+            std::vector<fd::endpoint_state> res;
+            for (auto i : g.get_endpoint_states()) {
+                fd::endpoint_state val;
+                val.addrs = boost::lexical_cast<std::string>(i.first);
+                val.is_alive = i.second.is_alive();
+                val.generation = i.second.get_heart_beat_state().get_generation();
+                val.version = i.second.get_heart_beat_state().get_heart_beat_version();
+                val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
+                for (auto a : i.second.get_application_state_map()) {
+                    fd::version_value version_val;
+                    // We return the enum index and not it's name to stay compatible to origin
+                    // method that the state index are static but the name can be changed.
+                    version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
+                    version_val.value = a.second.value;
+                    version_val.version = a.second.version;
+                    val.application_state.push(version_val);
+                }
+                res.push_back(val);
            }
-            res.push_back(val);
-        }
-        return make_ready_future<json::json_return_type>(res);
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    fd::get_up_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
-        int res = g.get_up_endpoint_count();
-        return make_ready_future<json::json_return_type>(res);
+        return g.container().invoke_on(0, [] (gms::gossiper& g) {
+            int res = g.get_up_endpoint_count();
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    fd::get_down_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
-        int res = g.get_down_endpoint_count();
-        return make_ready_future<json::json_return_type>(res);
+        return g.container().invoke_on(0, [] (gms::gossiper& g) {
+            int res = g.get_down_endpoint_count();
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    fd::get_phi_convict_threshold.set(r, [] (std::unique_ptr<request> req) {
@@ -54,11 +60,13 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    });

    fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
-        std::map<sstring, sstring> nodes_status;
-        for (auto& entry : g.get_endpoint_states()) {
-            nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
-        }
-        return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
+        return g.container().invoke_on(0, [] (gms::gossiper& g) {
+            std::map<sstring, sstring> nodes_status;
+            for (auto& entry : g.get_endpoint_states()) {
+                nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
+            }
+            return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
+        });
    });

    fd::set_phi_convict_threshold.set(r, [](std::unique_ptr<request> req) {
@@ -67,13 +75,15 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
    });

    fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
-        auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
-        if (!state) {
-            return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
-        }
-        std::stringstream ss;
-        g.append_endpoint_state(ss, *state);
-        return make_ready_future<json::json_return_type>(sstring(ss.str()));
+        return g.container().invoke_on(0, [req = std::move(req)] (gms::gossiper& g) {
+            auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
+            if (!state) {
+                return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
+            }
+            std::stringstream ss;
+            g.append_endpoint_state(ss, *state);
+            return make_ready_future<json::json_return_type>(sstring(ss.str()));
+        });
    });

    fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -6,6 +6,8 @@
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+#include <seastar/core/coroutine.hh>
+
 #include "gossiper.hh"
 #include "api/api-doc/gossiper.json.hh"
 #include "gms/gossiper.hh"
@@ -14,19 +16,23 @@ namespace api {
 using namespace json;

 void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
-    httpd::gossiper_json::get_down_endpoint.set(r, [&g] (const_req req) {
-        auto res = g.get_unreachable_members();
-        return container_to_vec(res);
+    httpd::gossiper_json::get_down_endpoint.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
+        auto res = co_await g.get_unreachable_members_synchronized();
+        co_return json::json_return_type(container_to_vec(res));
    });

-    httpd::gossiper_json::get_live_endpoint.set(r, [&g] (const_req req) {
-        auto res = g.get_live_members();
-        return container_to_vec(res);
+
+    httpd::gossiper_json::get_live_endpoint.set(r, [&g] (std::unique_ptr<request> req) {
+        return g.get_live_members_synchronized().then([] (auto res) {
+            return make_ready_future<json::json_return_type>(container_to_vec(res));
+        });
    });

-    httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (const_req req) {
-        gms::inet_address ep(req.param["addr"]);
-        return g.get_endpoint_downtime(ep);
+    httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
+        gms::inet_address ep(req->param["addr"]);
+        // synchronize unreachable_members on all shards
+        co_await g.get_unreachable_members_synchronized();
+        co_return g.get_endpoint_downtime(ep);
    });

    httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<request> req) {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -243,17 +243,21 @@ future<json::json_return_type> set_tables_autocompaction(http_context& ctx, cons
 }

 void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
-    ss::start_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
+    ss::start_native_transport.set(r, [&ctx, &ctl](std::unique_ptr<request> req) {
        return smp::submit_to(0, [&] {
-            return ctl.start_server();
+            return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
+                return ctl.start_server();
+            });
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

-    ss::stop_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
+    ss::stop_native_transport.set(r, [&ctx, &ctl](std::unique_ptr<request> req) {
        return smp::submit_to(0, [&] {
-            return ctl.request_stop_server();
+            return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
+                return ctl.request_stop_server();
+            });
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -275,17 +279,21 @@ void unset_transport_controller(http_context& ctx, routes& r) {
 }

 void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
-    ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
-        return smp::submit_to(0, [&] {
-            return ctl.request_stop_server();
+    ss::stop_rpc_server.set(r, [&ctx, &ctl] (std::unique_ptr<request> req) {
+        return smp::submit_to(0, [&ctx, &ctl] {
+            return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] () mutable {
+                return ctl.request_stop_server();
+            });
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

-    ss::start_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
-        return smp::submit_to(0, [&] {
-            return ctl.start_server();
+    ss::start_rpc_server.set(r, [&ctx, &ctl](std::unique_ptr<request> req) {
+        return smp::submit_to(0, [&ctx, &ctl] {
+            return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] () mutable {
+                return ctl.start_server();
+            });
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -1041,14 +1049,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        return make_ready_future<json::json_return_type>(res);
    });

-    ss::reset_local_schema.set(r, [&sys_ks](std::unique_ptr<request> req) {
+    ss::reset_local_schema.set(r, [&ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        // FIXME: We should truncate schema tables if more than one node in the cluster.
-        auto& sp = service::get_storage_proxy();
-        auto& fs = sp.local().features();
        apilog.info("reset_local_schema");
-        return db::schema_tables::recalculate_schema_version(sys_ks, sp, fs).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        co_await ss.local().reload_schema();
+        co_return json_void();
    });

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
--- a/atomic_cell.cc
+++ b/atomic_cell.cc
@@ -66,36 +66,48 @@ atomic_cell::atomic_cell(const abstract_type& type, atomic_cell_view other)
    set_view(_data);
 }

-// Based on:
-//  - org.apache.cassandra.db.AbstractCell#reconcile()
-//  - org.apache.cassandra.db.BufferExpiringCell#reconcile()
-//  - org.apache.cassandra.db.BufferDeletedCell#reconcile()
+// Based on Cassandra's resolveRegular function:
+//  - https://github.com/apache/cassandra/blob/e4f31b73c21b04966269c5ac2d3bd2562e5f6c63/src/java/org/apache/cassandra/db/rows/Cells.java#L79-L119
+//
+// Note: the ordering algorithm for cell is the same as for rows,
+// except that the cell value is used to break a tie in case all other attributes are equal.
+// See compare_row_marker_for_merge.
 std::strong_ordering
 compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) {
+    // Largest write timestamp wins.
    if (left.timestamp() != right.timestamp()) {
        return left.timestamp() <=> right.timestamp();
    }
+    // Tombstones always win reconciliation with live cells of the same timestamp
    if (left.is_live() != right.is_live()) {
        return left.is_live() ? std::strong_ordering::less : std::strong_ordering::greater;
    }
    if (left.is_live()) {
-        auto c = compare_unsigned(left.value(), right.value()) <=> 0;
-        if (c != 0) {
-            return c;
-        }
+        // Prefer expiring cells (which will become tombstones at some future date) over live cells.
+        // See https://issues.apache.org/jira/browse/CASSANDRA-14592
        if (left.is_live_and_has_ttl() != right.is_live_and_has_ttl()) {
-            // prefer expiring cells.
            return left.is_live_and_has_ttl() ? std::strong_ordering::greater : std::strong_ordering::less;
        }
+        // If both are expiring, choose the cell with the latest expiry or derived write time.
        if (left.is_live_and_has_ttl()) {
+            // Prefer cell with latest expiry
            if (left.expiry() != right.expiry()) {
                return left.expiry() <=> right.expiry();
-            } else {
-                // prefer the cell that was written later,
-                // so it survives longer after it expires, until purged.
+            } else if (right.ttl() != left.ttl()) {
+                // The cell write time is derived by (expiry - ttl).
+                // Prefer the cell that was written later,
+                // so it survives longer after it expires, until purged,
+                // as it become purgeable gc_grace_seconds after it was written.
+                //
+                // Note that this is an extension to Cassandra's algorithm
+                // which stops at the expiration time, and if equal,
+                // move forward to compare the cell values.
                return right.ttl() <=> left.ttl();
            }
        }
+        // The cell with the largest value wins, if all other attributes of the cells are identical.
+        // This is quite arbitrary, but still required to break the tie in a deterministic way.
+        return compare_unsigned(left.value(), right.value());
    } else {
        // Both are deleted

--- a/auth/roles-metadata.cc
+++ b/auth/roles-metadata.cc
@@ -55,6 +55,7 @@ future<bool> default_role_row_satisfies(
        return qp.execute_internal(
                query,
                db::consistency_level::ONE,
+                internal_distributed_query_state(),
                {meta::DEFAULT_SUPERUSER_NAME},
                cql3::query_processor::cache_internal::yes).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
            if (results->empty()) {
--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -168,7 +168,7 @@ std::ostream& operator<<(std::ostream& os, pretty_printed_throughput tp) {
 }

 static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
-        const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk) {
+        const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks) {
    auto timestamp = table_s.min_memtable_timestamp();
    std::optional<utils::hashed_key> hk;
    for (auto&& sst : boost::range::join(selector.select(dk).sstables, table_s.compacted_undeleted_sstables())) {
@@ -179,6 +179,7 @@ static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_
            hk = sstables::sstable::make_hashed_key(*table_s.schema(), dk.key());
        }
        if (sst->filter_has_key(*hk)) {
+            bloom_filter_checks++;
            timestamp = std::min(timestamp, sst->get_stats_metadata().min_timestamp);
        }
    }
@@ -414,9 +415,12 @@ private:

 class formatted_sstables_list {
    bool _include_origin = true;
-    std::vector<sstring> _ssts;
+    std::vector<std::string> _ssts;
 public:
    formatted_sstables_list() = default;
+    void reserve(size_t n) {
+        _ssts.reserve(n);
+    }
    explicit formatted_sstables_list(const std::vector<shared_sstable>& ssts, bool include_origin) : _include_origin(include_origin) {
        _ssts.reserve(ssts.size());
        for (const auto& sst : ssts) {
@@ -435,9 +439,7 @@ public:
 };

 std::ostream& operator<<(std::ostream& os, const formatted_sstables_list& lst) {
-    os << "[";
-    os << boost::algorithm::join(lst._ssts, ",");
-    os << "]";
+    fmt::print(os, "[{}]", fmt::join(lst._ssts, ","));
    return os;
 }

@@ -462,6 +464,7 @@ protected:
    uint64_t _start_size = 0;
    uint64_t _end_size = 0;
    uint64_t _estimated_partitions = 0;
+    uint64_t _bloom_filter_checks = 0;
    db::replay_position _rp;
    encoding_stats_collector _stats_collector;
    bool _can_split_large_partition = false;
@@ -573,13 +576,13 @@ protected:
        return bool(_sstable_set);
    }

-    compaction_writer create_gc_compaction_writer() const {
+    compaction_writer create_gc_compaction_writer(run_id gc_run) const {
        auto sst = _sstable_creator(this_shard_id());

        auto&& priority = _io_priority;
        auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
        sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
-        cfg.run_identifier = _run_identifier;
+        cfg.run_identifier = gc_run;
        cfg.monitor = monitor.get();
        auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats(), priority);
        return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
@@ -600,8 +603,14 @@ protected:
    // When compaction finishes, all the temporary sstables generated here will be deleted and removed
    // from table's sstable set.
    compacted_fragments_writer get_gc_compacted_fragments_writer() {
+        // because the temporary sstable run can overlap with the non-gc sstables run created by
+        // get_compacted_fragments_writer(), we have to use a different run_id. the gc_run_id is
+        // created here as:
+        // 1. it can be shared across all sstables created by this writer
+        // 2. it is optional, as gc writer is not always used
+        auto gc_run = run_id::create_random_id();
        return compacted_fragments_writer(*this,
-             [this] (const dht::decorated_key&) { return create_gc_compaction_writer(); },
+             [this, gc_run] (const dht::decorated_key&) { return create_gc_compaction_writer(gc_run); },
             [this] (compaction_writer* cw) { stop_gc_compaction_writer(cw); },
             _stop_request_observable);
    }
@@ -618,8 +627,8 @@ protected:
        return _used_garbage_collected_sstables;
    }

-    bool enable_garbage_collected_sstable_writer() const noexcept {
-        return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max();
+    virtual bool enable_garbage_collected_sstable_writer() const noexcept {
+        return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max() && bool(_replacer);
    }
 public:
    compaction& operator=(const compaction&) = delete;
@@ -641,6 +650,7 @@ private:
    future<> setup() {
        auto ssts = make_lw_shared<sstables::sstable_set>(make_sstable_set_for_input());
        formatted_sstables_list formatted_msg;
+        formatted_msg.reserve(_sstables.size());
        auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
        min_max_tracker<api::timestamp_type> timestamp_tracker;

@@ -757,6 +767,7 @@ protected:
                .ended_at = ended_at,
                .start_size = _start_size,
                .end_size = _end_size,
+                .bloom_filter_checks = _bloom_filter_checks,
            },
        };

@@ -776,7 +787,7 @@ protected:
        log_info("{} {} sstables to {}. {} to {} (~{}% of original) in {}ms = {}. ~{} total partitions merged to {}.",
                report_finish_desc(),
                _input_sstable_generations.size(), new_sstables_msg, pretty_printed_data_size(_start_size), pretty_printed_data_size(_end_size), int(ratio * 100),
-                std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(), pretty_printed_throughput(_end_size, duration),
+                std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(), pretty_printed_throughput(_start_size, duration),
                _cdata.total_partitions, _cdata.total_keys_written);

        return ret;
@@ -797,7 +808,7 @@ private:
            };
        }
        return [this] (const dht::decorated_key& dk) {
-            return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk);
+            return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks);
        };
    }

@@ -997,51 +1008,6 @@ void compacted_fragments_writer::consume_end_of_stream() {
    }
 }

-class reshape_compaction : public compaction {
-public:
-    reshape_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
-        : compaction(table_s, std::move(descriptor), cdata) {
-    }
-
-    virtual sstables::sstable_set make_sstable_set_for_input() const override {
-        return sstables::make_partitioned_sstable_set(_schema, false);
-    }
-
-    flat_mutation_reader_v2 make_sstable_reader() const override {
-        return _compacting->make_local_shard_sstable_reader(_schema,
-                _permit,
-                query::full_partition_range,
-                _schema->full_slice(),
-                _io_priority,
-                tracing::trace_state_ptr(),
-                ::streamed_mutation::forwarding::no,
-                ::mutation_reader::forwarding::no,
-                default_read_monitor_generator());
-    }
-
-    std::string_view report_start_desc() const override {
-        return "Reshaping";
-    }
-
-    std::string_view report_finish_desc() const override {
-        return "Reshaped";
-    }
-
-    virtual compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
-        auto sst = _sstable_creator(this_shard_id());
-        setup_new_sstable(sst);
-
-        sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
-        return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
-    }
-
-    virtual void stop_sstable_writer(compaction_writer* writer) override {
-        if (writer) {
-            finish_new_sstable(writer);
-        }
-    }
-};
-
 class regular_compaction : public compaction {
    // keeps track of monitors for input sstable, which are responsible for adjusting backlog as compaction progresses.
    mutable compaction_read_monitor_generator _monitor_generator;
@@ -1151,12 +1117,13 @@ private:
    }

    void update_pending_ranges() {
-        if (!_sstable_set || _sstable_set->all()->empty() || _cdata.pending_replacements.empty()) { // set can be empty for testing scenario.
+        auto pending_replacements = std::exchange(_cdata.pending_replacements, {});
+        if (!_sstable_set || _sstable_set->all()->empty() || pending_replacements.empty()) { // set can be empty for testing scenario.
            return;
        }
        // Releases reference to sstables compacted by this compaction or another, both of which belongs
        // to the same column family
-        for (auto& pending_replacement : _cdata.pending_replacements) {
+        for (auto& pending_replacement : pending_replacements) {
            for (auto& sst : pending_replacement.removed) {
                // Set may not contain sstable to be removed because this compaction may have started
                // before the creation of that sstable.
@@ -1170,7 +1137,70 @@ private:
            }
        }
        _selector.emplace(_sstable_set->make_incremental_selector());
-        _cdata.pending_replacements.clear();
+    }
+};
+
+class reshape_compaction : public regular_compaction {
+private:
+    bool has_sstable_replacer() const noexcept {
+        return bool(_replacer);
+    }
+public:
+    reshape_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
+            : regular_compaction(table_s, std::move(descriptor), cdata) {
+    }
+
+    virtual sstables::sstable_set make_sstable_set_for_input() const override {
+        return sstables::make_partitioned_sstable_set(_schema, false);
+    }
+
+    // Unconditionally enable incremental compaction if the strategy specifies a max output size, e.g. LCS.
+    virtual bool enable_garbage_collected_sstable_writer() const noexcept override {
+        return _max_sstable_size != std::numeric_limits<uint64_t>::max() && bool(_replacer);
+    }
+
+    flat_mutation_reader_v2 make_sstable_reader() const override {
+        return _compacting->make_local_shard_sstable_reader(_schema,
+                _permit,
+                query::full_partition_range,
+                _schema->full_slice(),
+                _io_priority,
+                tracing::trace_state_ptr(),
+                ::streamed_mutation::forwarding::no,
+                ::mutation_reader::forwarding::no,
+                default_read_monitor_generator());
+    }
+
+    std::string_view report_start_desc() const override {
+        return "Reshaping";
+    }
+
+    std::string_view report_finish_desc() const override {
+        return "Reshaped";
+    }
+
+    virtual compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
+        auto sst = _sstable_creator(this_shard_id());
+        setup_new_sstable(sst);
+
+        sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
+        return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
+    }
+
+    virtual void stop_sstable_writer(compaction_writer* writer) override {
+        if (writer) {
+            if (has_sstable_replacer()) {
+                regular_compaction::stop_sstable_writer(writer);
+            } else {
+                finish_new_sstable(writer);
+            }
+        }
+    }
+
+    virtual void on_end_of_compaction() override {
+        if (has_sstable_replacer()) {
+            regular_compaction::on_end_of_compaction();
+        }
    }
 };

--- a/compaction/compaction.hh
+++ b/compaction/compaction.hh
@@ -92,12 +92,15 @@ struct compaction_stats {
    uint64_t start_size = 0;
    uint64_t end_size = 0;
    uint64_t validation_errors = 0;
+    // Bloom filter checks during max purgeable calculation
+    uint64_t bloom_filter_checks = 0;

    compaction_stats& operator+=(const compaction_stats& r) {
        ended_at = std::max(ended_at, r.ended_at);
        start_size += r.start_size;
        end_size += r.end_size;
        validation_errors += r.validation_errors;
+        bloom_filter_checks += r.bloom_filter_checks;
        return *this;
    }
    friend compaction_stats operator+(const compaction_stats& l, const compaction_stats& r) {
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -7,10 +7,12 @@
 */

 #include "compaction_manager.hh"
+#include "compaction_descriptor.hh"
 #include "compaction_strategy.hh"
 #include "compaction_backlog_manager.hh"
 #include "sstables/sstables.hh"
 #include "sstables/sstables_manager.hh"
+#include <memory>
 #include <seastar/core/metrics.hh>
 #include <seastar/core/coroutine.hh>
 #include <seastar/coroutine/switch_to.hh>
@@ -78,6 +80,23 @@ public:
            _compacting.erase(sst);
        }
    }
+
+    class update_me : public compaction_manager::task::on_replacement {
+        compacting_sstable_registration& _registration;
+        public:
+            update_me(compacting_sstable_registration& registration)
+                : _registration{registration} {}
+            void on_removal(const std::vector<sstables::shared_sstable>& sstables) override {
+                _registration.release_compacting(sstables);
+            }
+            void on_addition(const std::vector<sstables::shared_sstable>& sstables) override {
+                _registration.register_compacting(sstables);
+            }
+    };
+
+    auto update_on_sstable_replacement() {
+        return update_me(*this);
+    }
 };

 sstables::compaction_data compaction_manager::create_compaction_data() {
@@ -279,7 +298,7 @@ compaction_manager::task::task(compaction_manager& mgr, compaction::table_state*
    , _description(std::move(desc))
 {}

-future<compaction_manager::compaction_stats_opt> compaction_manager::perform_task(shared_ptr<compaction_manager::task> task) {
+future<compaction_manager::compaction_stats_opt> compaction_manager::perform_task(shared_ptr<compaction_manager::task> task, throw_if_stopping do_throw_if_stopping) {
    _tasks.push_back(task);
    auto unregister_task = defer([this, task] {
        _tasks.remove(task);
@@ -292,6 +311,9 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_tas
        co_return res;
    } catch (sstables::compaction_stopped_exception& e) {
        cmlog.info("{}: stopped, reason: {}", *task, e.what());
+        if (do_throw_if_stopping) {
+            throw;
+        }
    } catch (sstables::compaction_aborted_exception& e) {
        cmlog.error("{}: aborted, reason: {}", *task, e.what());
        _stats.errors++;
@@ -310,14 +332,14 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_tas
    co_return std::nullopt;
 }

-future<sstables::compaction_result> compaction_manager::task::compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted, can_purge_tombstones can_purge) {
+future<sstables::compaction_result> compaction_manager::task::compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement& on_replace, can_purge_tombstones can_purge) {
    if (!descriptor.sstables.size()) {
        // if there is nothing to compact, just return.
        co_return sstables::compaction_result{};
    }

    bool should_update_history = this->should_update_history(descriptor.options.type());
-    sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), cdata, std::move(release_exhausted), std::move(can_purge));
+    sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), cdata, on_replace, std::move(can_purge));

    if (should_update_history) {
        co_await update_history(*_compacting_table, res, cdata);
@@ -325,8 +347,11 @@ future<sstables::compaction_result> compaction_manager::task::compact_sstables_a

    co_return res;
 }
-future<sstables::compaction_result> compaction_manager::task::compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted, can_purge_tombstones can_purge) {
+
+future<sstables::compaction_result> compaction_manager::task::compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement& on_replace, can_purge_tombstones can_purge,
+                                                                               sstables::offstrategy offstrategy) {
    compaction::table_state& t = *_compacting_table;
+
    if (can_purge) {
        descriptor.enable_garbage_collection(t.main_sstable_set());
    }
@@ -334,15 +359,26 @@ future<sstables::compaction_result> compaction_manager::task::compact_sstables(s
        auto sst = t.make_sstable();
        return sst;
    };
-    descriptor.replacer = [this, &t, release_exhausted] (sstables::compaction_completion_desc desc) {
+
+    descriptor.replacer = [this, &t, &on_replace, offstrategy] (sstables::compaction_completion_desc desc) {
        t.get_compaction_strategy().notify_completion(desc.old_sstables, desc.new_sstables);
        _cm.propagate_replacement(t, desc.old_sstables, desc.new_sstables);
+        // on_replace updates the compacting registration with the old and new
+        // sstables. while on_compaction_completion() removes the old sstables
+        // from the table's sstable set, and adds the new ones to the sstable
+        // set.
+        // since the regular compactions exclude the sstables in the sstable
+        // set which are currently being compacted, if we want to ensure the
+        // exclusive access of compactions to an sstable we should guard it
+        // with the registration when adding/removing it to/from the sstable
+        // set. otherwise, the regular compaction would pick it up in the time
+        // window, where the sstables:
+        // - are still in the main set
+        // - are not being compacted.
+        on_replace.on_addition(desc.new_sstables);
        auto old_sstables = desc.old_sstables;
-        t.on_compaction_completion(std::move(desc), sstables::offstrategy::no).get();
-        // Calls compaction manager's task for this compaction to release reference to exhausted SSTables.
-        if (release_exhausted) {
-            release_exhausted(old_sstables);
-        }
+        t.on_compaction_completion(std::move(desc), offstrategy).get();
+        on_replace.on_removal(old_sstables);
    };

    co_return co_await sstables::compact_sstables(std::move(descriptor), cdata, t);
@@ -387,9 +423,7 @@ protected:
        sstables::compaction_strategy cs = t->get_compaction_strategy();
        sstables::compaction_descriptor descriptor = cs.get_major_compaction_job(*t, _cm.get_candidates(*t));
        auto compacting = compacting_sstable_registration(_cm, descriptor.sstables);
-        auto release_exhausted = [&compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
-            compacting.release_compacting(exhausted_sstables);
-        };
+        auto on_replace = compacting.update_on_sstable_replacement();
        setup_new_compaction(descriptor.run_identifier);

        cmlog.info0("User initiated compaction started on behalf of {}.{}", t->schema()->ks_name(), t->schema()->cf_name());
@@ -401,7 +435,7 @@ protected:
        // the exclusive lock can be freed to let regular compaction run in parallel to major
        lock_holder.return_all();

-        co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, std::move(release_exhausted));
+        co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, on_replace);

        finish_compaction();

@@ -448,12 +482,12 @@ protected:
    }
 };

-future<> compaction_manager::run_custom_job(compaction::table_state& t, sstables::compaction_type type, const char* desc, noncopyable_function<future<>(sstables::compaction_data&)> job) {
+future<> compaction_manager::run_custom_job(compaction::table_state& t, sstables::compaction_type type, const char* desc, noncopyable_function<future<>(sstables::compaction_data&)> job, throw_if_stopping do_throw_if_stopping) {
    if (_state != state::enabled) {
        return make_ready_future<>();
    }

-    return perform_task(make_shared<custom_compaction_task>(*this, &t, type, desc, std::move(job))).discard_result();
+    return perform_task(make_shared<custom_compaction_task>(*this, &t, type, desc, std::move(job)), do_throw_if_stopping).discard_result();
 }

 future<> compaction_manager::update_static_shares(float static_shares) {
@@ -647,6 +681,7 @@ sstables::compaction_stopped_exception compaction_manager::task::make_compaction

 compaction_manager::compaction_manager(config cfg, abort_source& as)
    : _cfg(std::move(cfg))
+    , _compaction_submission_timer(compaction_sg().cpu, compaction_submission_callback())
    , _compaction_controller(make_compaction_controller(compaction_sg(), static_shares(), [this] () -> float {
        _last_backlog = backlog();
        auto b = _last_backlog / available_memory();
@@ -681,6 +716,7 @@ compaction_manager::compaction_manager(config cfg, abort_source& as)

 compaction_manager::compaction_manager()
    : _cfg(config{ .available_memory = 1 })
+    , _compaction_submission_timer(compaction_sg().cpu, compaction_submission_callback())
    , _compaction_controller(make_compaction_controller(compaction_sg(), 1, [] () -> float { return 1.0; }))
    , _backlog_manager(_compaction_controller)
    , _throughput_updater(serialized_action([this] { return update_throughput(throughput_mbs()); }))
@@ -738,7 +774,7 @@ void compaction_manager::register_metrics() {
 void compaction_manager::enable() {
    assert(_state == state::none || _state == state::disabled);
    _state = state::enabled;
-    _compaction_submission_timer.arm(periodic_compaction_submission_interval());
+    _compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
    _waiting_reevalution = postponed_compactions_reevaluation();
 }

@@ -982,9 +1018,7 @@ protected:
            }
            auto compacting = compacting_sstable_registration(_cm, descriptor.sstables);
            auto weight_r = compaction_weight_registration(&_cm, weight);
-            auto release_exhausted = [&compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
-                compacting.release_compacting(exhausted_sstables);
-            };
+            auto on_replace = compacting.update_on_sstable_replacement();
            cmlog.debug("Accepted compaction job: task={} ({} sstable(s)) of weight {} for {}.{}",
                fmt::ptr(this), descriptor.sstables.size(), weight, t.schema()->ks_name(), t.schema()->cf_name());

@@ -993,7 +1027,7 @@ protected:

            try {
                bool should_update_history = this->should_update_history(descriptor.options.type());
-                sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, std::move(release_exhausted));
+                sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
                finish_compaction();
                if (should_update_history) {
                    // update_history can take a long time compared to
@@ -1093,54 +1127,40 @@ public:
    }
 private:
    future<> run_offstrategy_compaction(sstables::compaction_data& cdata) {
-        // This procedure will reshape sstables in maintenance set until it's ready for
-        // integration into main set.
-        // It may require N reshape rounds before the set satisfies the strategy invariant.
-        // This procedure also only updates maintenance set at the end, on success.
-        // Otherwise, some overlapping could be introduced in the set after each reshape
-        // round, progressively degrading read amplification until integration happens.
-        // The drawback of this approach is the 2x space requirement as the old sstables
-        // will only be deleted at the end. The impact of this space requirement is reduced
-        // by the fact that off-strategy is serialized across all tables, meaning that the
-        // actual requirement is the size of the largest table's maintenance set.
+        // Incrementally reshape the SSTables in maintenance set. The output of each reshape
+        // round is merged into the main set. The common case is that off-strategy input
+        // is mostly disjoint, e.g. repair-based node ops, then all the input will be
+        // reshaped in a single round. The incremental approach allows us to be space
+        // efficient (avoiding a 100% overhead) as we will incrementally replace input
+        // SSTables from maintenance set by output ones into main set.

        compaction::table_state& t = *_compacting_table;
-        const auto& maintenance_sstables = t.maintenance_sstable_set();

        // Filter out sstables that require view building, to avoid a race between off-strategy
        // and view building. Refs: #11882
-        const auto old_sstables = boost::copy_range<std::vector<sstables::shared_sstable>>(*maintenance_sstables.all()
-                | boost::adaptors::filtered([] (const sstables::shared_sstable& sst) {
-            return !sst->requires_view_building();
-        }));
-        std::vector<sstables::shared_sstable> reshape_candidates = old_sstables;
-        std::vector<sstables::shared_sstable> sstables_to_remove;
-        std::unordered_set<sstables::shared_sstable> new_unused_sstables;
-
-        auto cleanup_new_unused_sstables_on_failure = defer([&new_unused_sstables] {
-            for (auto& sst : new_unused_sstables) {
-                sst->mark_for_deletion();
-            }
-        });
+        auto get_reshape_candidates = [&t] () {
+            auto maintenance_ssts = t.maintenance_sstable_set().all();
+            return boost::copy_range<std::vector<sstables::shared_sstable>>(*maintenance_ssts
+                | boost::adaptors::filtered([](const sstables::shared_sstable& sst) {
+                        return !sst->requires_view_building();
+                }));
+        };

        auto get_next_job = [&] () -> std::optional<sstables::compaction_descriptor> {
            auto& iop = service::get_local_streaming_priority(); // run reshape in maintenance mode
-            auto desc = t.get_compaction_strategy().get_reshaping_job(reshape_candidates, t.schema(), iop, sstables::reshape_mode::strict);
+            auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), iop, sstables::reshape_mode::strict);
            return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
        };

        std::exception_ptr err;
        while (auto desc = get_next_job()) {
-            desc->creator = [this, &new_unused_sstables, &t] (shard_id dummy) {
-                auto sst = t.make_sstable();
-                new_unused_sstables.insert(sst);
-                return sst;
-            };
-            auto input = boost::copy_range<std::unordered_set<sstables::shared_sstable>>(desc->sstables);
+            auto compacting = compacting_sstable_registration(_cm, desc->sstables);
+            auto on_replace = compacting.update_on_sstable_replacement();

-            sstables::compaction_result ret;
            try {
-                ret = co_await sstables::compact_sstables(std::move(*desc), cdata, t);
+                sstables::compaction_result _ = co_await compact_sstables(std::move(*desc), _compaction_data, on_replace,
+                                                                          compaction_manager::can_purge_tombstones::no,
+                                                                          sstables::offstrategy::yes);
            } catch (sstables::compaction_stopped_exception&) {
                // If off-strategy compaction stopped on user request, let's not discard the partial work.
                // Therefore, both un-reshaped and reshaped data will be integrated into main set, allowing
@@ -1149,36 +1169,20 @@ private:
                break;
            }
            _performed = true;
-
-            // update list of reshape candidates without input but with output added to it
-            auto it = boost::remove_if(reshape_candidates, [&] (auto& s) { return input.contains(s); });
-            reshape_candidates.erase(it, reshape_candidates.end());
-            std::move(ret.new_sstables.begin(), ret.new_sstables.end(), std::back_inserter(reshape_candidates));
-
-            // If compaction strategy is unable to reshape input data in a single round, it may happen that a SSTable A
-            // created in round 1 will be compacted in a next round producing SSTable B. As SSTable A is no longer needed,
-            // it can be removed immediately. Let's remove all such SSTables immediately to reduce off-strategy space requirement.
-            // Input SSTables from maintenance set can only be removed later, as SSTable sets are only updated on completion.
-            auto can_remove_now = [&] (const sstables::shared_sstable& s) { return new_unused_sstables.contains(s); };
-            for (auto&& sst : input) {
-                if (can_remove_now(sst)) {
-                    co_await sst->unlink();
-                    new_unused_sstables.erase(std::move(sst));
-                } else {
-                    sstables_to_remove.push_back(std::move(sst));
-                }
-            }
        }

-        // at this moment reshape_candidates contains a set of sstables ready for integration into main set
-        auto completion_desc = sstables::compaction_completion_desc{
-            .old_sstables = std::move(old_sstables),
-            .new_sstables = std::move(reshape_candidates)
-        };
-        co_await t.on_compaction_completion(std::move(completion_desc), sstables::offstrategy::yes);
+        // There might be some remaining sstables in maintenance set that didn't require reshape, or the
+        // user has aborted off-strategy. So we can only integrate them into the main set, such that
+        // they become candidates for regular compaction. We cannot hold them forever in maintenance set,
+        // as that causes read and space amplification issues.
+        if (auto sstables = get_reshape_candidates(); sstables.size()) {
+            auto completion_desc = sstables::compaction_completion_desc{
+                .old_sstables = sstables, // removes from maintenance set.
+                .new_sstables = sstables, // adds into main set.
+            };
+            co_await t.on_compaction_completion(std::move(completion_desc), sstables::offstrategy::yes);
+        }

-        cleanup_new_unused_sstables_on_failure.cancel();
-        co_await sstables::sstable_directory::delete_atomically(std::move(sstables_to_remove));
        if (err) {
            co_await coroutine::return_exception_ptr(std::move(err));
        }
@@ -1201,9 +1205,11 @@ protected:
            std::exception_ptr ex;
            try {
                compaction::table_state& t = *_compacting_table;
-                auto maintenance_sstables = t.maintenance_sstable_set().all();
-                cmlog.info("Starting off-strategy compaction for {}.{}, {} candidates were found",
-                        t.schema()->ks_name(), t.schema()->cf_name(), maintenance_sstables->size());
+                {
+                    auto maintenance_sstables = t.maintenance_sstable_set().all();
+                    cmlog.info("Starting off-strategy compaction for {}.{}, {} candidates were found",
+                               t.schema()->ks_name(), t.schema()->cf_name(), maintenance_sstables->size());
+                }
                co_await run_offstrategy_compaction(_compaction_data);
                finish_compaction();
                cmlog.info("Done with off-strategy compaction for {}.{}", t.schema()->ks_name(), t.schema()->cf_name());
@@ -1276,9 +1282,7 @@ private:
                sstable_level, sstables::compaction_descriptor::default_max_sstable_bytes, run_identifier, _options);

            // Releases reference to cleaned sstable such that respective used disk space can be freed.
-            auto release_exhausted = [this] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
-                _compacting.release_compacting(exhausted_sstables);
-            };
+            auto on_replace = _compacting.update_on_sstable_replacement();

            setup_new_compaction(descriptor.run_identifier);

@@ -1287,7 +1291,7 @@ private:

            std::exception_ptr ex;
            try {
-                sstables::compaction_result res = co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, std::move(release_exhausted), _can_purge);
+                sstables::compaction_result res = co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, on_replace, _can_purge);
                finish_compaction();
                _cm.reevaluate_postponed_compactions();
                co_return res;  // done with current sstable
@@ -1444,14 +1448,26 @@ protected:
        co_return std::nullopt;
    }
 private:
-    // Releases reference to cleaned files such that respective used disk space can be freed.
-    void release_exhausted(std::vector<sstables::shared_sstable> exhausted_sstables) {
-        _compacting.release_compacting(exhausted_sstables);
-    }
-
    future<> run_cleanup_job(sstables::compaction_descriptor descriptor) {
        co_await coroutine::switch_to(_cm.compaction_sg().cpu);

+        // Releases reference to cleaned files such that respective used disk space can be freed.
+        using update_registration = compacting_sstable_registration::update_me;
+        class release_exhausted : public update_registration {
+            sstables::compaction_descriptor& _desc;
+        public:
+            release_exhausted(compacting_sstable_registration& registration, sstables::compaction_descriptor& desc)
+                : update_registration{registration}
+                , _desc{desc} {}
+            void on_removal(const std::vector<sstables::shared_sstable>& sstables) override {
+                auto exhausted = boost::copy_range<std::unordered_set<sstables::shared_sstable>>(sstables);
+                std::erase_if(_desc.sstables, [&] (const sstables::shared_sstable& sst) {
+                    return exhausted.contains(sst);
+                });
+                update_registration::on_removal(sstables);
+            }
+        };
+        release_exhausted on_replace{_compacting, descriptor};
        for (;;) {
            compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_cm._compaction_controller.backlog_of_shares(200), _cm.available_memory()));
            _cm.register_backlog_tracker(user_initiated);
@@ -1459,8 +1475,7 @@ private:
            std::exception_ptr ex;
            try {
                setup_new_compaction(descriptor.run_identifier);
-                co_await compact_sstables_and_update_history(descriptor, _compaction_data,
-                                          std::bind(&cleanup_sstables_compaction_task::release_exhausted, this, std::placeholders::_1));
+                co_await compact_sstables_and_update_history(descriptor, _compaction_data, on_replace);
                finish_compaction();
                _cm.reevaluate_postponed_compactions();
                co_return;  // done with current job
--- a/compaction/compaction_manager.hh
+++ b/compaction/compaction_manager.hh
@@ -32,6 +32,7 @@
 #include "compaction.hh"
 #include "compaction_weight_registration.hh"
 #include "compaction_backlog_manager.hh"
+#include "compaction/compaction_descriptor.hh"
 #include "strategy_control.hh"
 #include "backlog_controller.hh"
 #include "seastarx.hh"
@@ -49,6 +50,8 @@ public:
    boost::icl::interval_map<dht::token, gc_clock::time_point, boost::icl::partial_absorber, std::less, boost::icl::inplace_max> map;
 };

+using throw_if_stopping = bool_class<struct throw_if_stopping_tag>;
+
 // Compaction manager provides facilities to submit and track compaction jobs on
 // behalf of existing tables.
 class compaction_manager {
@@ -137,11 +140,20 @@ public:

        virtual ~task();

+        // called when a compaction replaces the exhausted sstables with the new set
+        struct on_replacement {
+            virtual ~on_replacement() {}
+            // called after the replacement completes
+            // @param sstables the old sstable which are replaced in this replacement
+            virtual void on_removal(const std::vector<sstables::shared_sstable>& sstables) = 0;
+            // called before the replacement happens
+            // @param sstables the new sstables to be added to the table's sstable set
+            virtual void on_addition(const std::vector<sstables::shared_sstable>& sstables) = 0;
+        };
+
    protected:
        virtual future<compaction_stats_opt> do_run() = 0;

-        using throw_if_stopping = bool_class<struct throw_if_stopping_tag>;
-
        state switch_state(state new_state);

        future<semaphore_units<named_semaphore_exception_factory>> acquire_semaphore(named_semaphore& sem, size_t units = 1);
@@ -158,12 +170,10 @@ public:
        // otherwise, returns stop_iteration::no after sleep for exponential retry.
        future<stop_iteration> maybe_retry(std::exception_ptr err, bool throw_on_abort = false);

-        // Compacts set of SSTables according to the descriptor.
-        using release_exhausted_func_t = std::function<void(const std::vector<sstables::shared_sstable>& exhausted_sstables)>;
-        future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted,
-                                  can_purge_tombstones can_purge = can_purge_tombstones::yes);
-        future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted,
+        future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
                                  can_purge_tombstones can_purge = can_purge_tombstones::yes);
+        future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
+                                  can_purge_tombstones can_purge = can_purge_tombstones::yes, sstables::offstrategy offstrategy = sstables::offstrategy::no);
        future<> update_history(compaction::table_state& t, const sstables::compaction_result& res, const sstables::compaction_data& cdata);
        bool should_update_history(sstables::compaction_type ct) {
            return ct == sstables::compaction_type::Compaction;
@@ -296,10 +306,10 @@ private:
    std::function<void()> compaction_submission_callback();
    // all registered tables are reevaluated at a constant interval.
    // Submission is a NO-OP when there's nothing to do, so it's fine to call it regularly.
-    timer<lowres_clock> _compaction_submission_timer = timer<lowres_clock>(compaction_submission_callback());
    static constexpr std::chrono::seconds periodic_compaction_submission_interval() { return std::chrono::seconds(3600); }

    config _cfg;
+    timer<lowres_clock> _compaction_submission_timer;
    compaction_controller _compaction_controller;
    compaction_backlog_manager _backlog_manager;
    optimized_optional<abort_source::subscription> _early_abort_subscription;
@@ -315,7 +325,7 @@ private:
    per_table_history_maps _repair_history_maps;
    tombstone_gc_state _tombstone_gc_state;
 private:
-    future<compaction_stats_opt> perform_task(shared_ptr<task>);
+    future<compaction_stats_opt> perform_task(shared_ptr<task>, throw_if_stopping do_throw_if_stopping = throw_if_stopping::no);

    future<> stop_tasks(std::vector<shared_ptr<task>> tasks, sstring reason);
    future<> update_throughput(uint32_t value_mbs);
@@ -460,7 +470,7 @@ public:
    // parameter type is the compaction type the operation can most closely be
    //      associated with, use compaction_type::Compaction, if none apply.
    // parameter job is a function that will carry the operation
-    future<> run_custom_job(compaction::table_state& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&)> job);
+    future<> run_custom_job(compaction::table_state& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&)> job, throw_if_stopping do_throw_if_stopping);

    class compaction_reenabler {
        compaction_manager& _cm;
--- a/compaction/compaction_strategy.cc
+++ b/compaction/compaction_strategy.cc
@@ -409,7 +409,9 @@ public:
                l0_old_ssts.push_back(std::move(sst));
            }
        }
-        _l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
+        if (l0_old_ssts.size() || l0_new_ssts.size()) {
+            _l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
+        }
    }
 };

--- a/compaction/leveled_compaction_strategy.cc
+++ b/compaction/leveled_compaction_strategy.cc
@@ -144,6 +144,8 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input

    auto max_sstable_size_in_bytes = _max_sstable_size_in_mb * 1024 * 1024;

+    leveled_manifest::logger.debug("get_reshaping_job: mode={} input.size={} max_sstable_size_in_bytes={}", mode == reshape_mode::relaxed ? "relaxed" : "strict", input.size(), max_sstable_size_in_bytes);
+
    for (auto& sst : input) {
        auto sst_level = sst->get_sstable_level();
        if (sst_level > leveled_manifest::MAX_LEVELS - 1) {
@@ -227,6 +229,9 @@ leveled_compaction_strategy::get_cleanup_compaction_jobs(table_state& table_s, s
 }

 unsigned leveled_compaction_strategy::ideal_level_for_input(const std::vector<sstables::shared_sstable>& input, uint64_t max_sstable_size) {
+    if (!max_sstable_size) {
+        return 1;
+    }
    auto log_fanout = [fanout = leveled_manifest::leveled_fan_out] (double x) {
        double inv_log_fanout = 1.0f / std::log(fanout);
        return log(x) * inv_log_fanout;
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -65,6 +65,13 @@ commitlog_sync_period_in_ms: 10000
 # is reasonable.
 commitlog_segment_size_in_mb: 32

+# The size of the individual schema commitlog file segments.
+
+# The segment size puts a limit on the mutation size that can be
+# written at once, and some schema mutation writes are much larger
+# than average.
+schema_commitlog_segment_size_in_mb: 32
+
 # seed_provider class_name is saved for future use.
 # A seed address is mandatory.
 seed_provider:
@@ -553,4 +560,16 @@ murmur3_partitioner_ignore_msb_bits: 12
 # WARNING: It's unsafe to set this to false if the node previously booted
 # with the schema commit log enabled. In such case, some schema changes
 # may be lost if the node was not cleanly stopped.
-force_schema_commit_log: true
+force_schema_commit_log: true
+
+# Use Raft to consistently manage schema information in the cluster.
+# Refer to https://docs.scylladb.com/master/architecture/raft.html for more details.
+# The 'Handling Failures' section is especially important.
+#
+# Once enabled in a cluster, this cannot be turned off.
+# If you want to bootstrap a new cluster without Raft, make sure to set this to `false`
+# before starting your nodes for the first time.
+#
+# A cluster not using Raft can be 'upgraded' to use Raft. Refer to the aforementioned
+# documentation, section 'Enabling Raft in ScyllaDB 5.2 and further', for the procedure.
+consistent_cluster_management: true
--- a/configure.py
+++ b/configure.py
@@ -409,6 +409,7 @@ scylla_tests = set([
    'test/boost/limiting_data_source_test',
    'test/boost/linearizing_input_stream_test',
    'test/boost/loading_cache_test',
+    'test/boost/locator_topology_test',
    'test/boost/log_heap_test',
    'test/boost/estimated_histogram_test',
    'test/boost/summary_test',
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -463,8 +463,7 @@ orderByClause[raw::select_statement::parameters::orderings_type& orderings]
    ;

 jsonValue returns [expression value]
-    :
-    | s=STRING_LITERAL { $value = untyped_constant{untyped_constant::string, $s.text}; }
+    : s=STRING_LITERAL { $value = untyped_constant{untyped_constant::string, $s.text}; }
    | m=marker         { $value = std::move(m); }
    ;

@@ -1578,8 +1577,7 @@ marker returns [expression value]
    ;

 intValue returns [expression value]
-    :
-    | t=INTEGER     { $value = untyped_constant{untyped_constant::integer, $t.text}; }
+    : t=INTEGER     { $value = untyped_constant{untyped_constant::integer, $t.text}; }
    | e=marker      { $value = std::move(e); }
    ;

--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -10,6 +10,7 @@

 #include "cql3/attributes.hh"
 #include "cql3/column_identifier.hh"
+#include <optional>

 namespace cql3 {

@@ -55,9 +56,9 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    }
 }

-int32_t attributes::get_time_to_live(const query_options& options) {
+std::optional<int32_t> attributes::get_time_to_live(const query_options& options) {
    if (!_time_to_live.has_value() || _time_to_live_unset_guard.is_unset(options))
-        return 0;
+        return std::nullopt;

    cql3::raw_value tval = expr::evaluate(*_time_to_live, options);
    if (tval.is_null()) {
--- a/cql3/attributes.hh
+++ b/cql3/attributes.hh
@@ -45,7 +45,7 @@ public:

    int64_t get_timestamp(int64_t now, const query_options& options);

-    int32_t get_time_to_live(const query_options& options);
+    std::optional<int32_t> get_time_to_live(const query_options& options);

    db::timeout_clock::duration get_timeout(const query_options& options) const;

--- a/cql3/expr/expression.cc
+++ b/cql3/expr/expression.cc
@@ -1416,7 +1416,7 @@ expression search_and_replace(const expression& e,
                    };
                },
                [&] (const binary_operator& oper) -> expression {
-                    return binary_operator(recurse(oper.lhs), oper.op, recurse(oper.rhs));
+                    return binary_operator(recurse(oper.lhs), oper.op, recurse(oper.rhs), oper.order);
                },
                [&] (const column_mutation_attribute& cma) -> expression {
                    return column_mutation_attribute{cma.kind, recurse(cma.column)};
--- a/cql3/functions/castas_fcts.cc
+++ b/cql3/functions/castas_fcts.cc
@@ -165,8 +165,6 @@ static data_value castas_fctn_from_dv_to_string(data_value from) {
    return from.type()->to_string_impl(from);
 }

-// FIXME: Add conversions for counters, after they are fully implemented...
-
 static constexpr unsigned next_power_of_2(unsigned val) {
    unsigned ret = 1;
    while (ret <= val) {
@@ -370,6 +368,26 @@ castas_fctn get_castas_fctn(data_type to_type, data_type from_type) {
        return castas_fctn_from_dv_to_string;
    case cast_switch_case_val(kind::utf8, kind::ascii):
        return castas_fctn_simple<sstring, sstring>;
+
+    case cast_switch_case_val(kind::byte, kind::counter):
+        return castas_fctn_simple<int8_t, int64_t>;
+    case cast_switch_case_val(kind::short_kind, kind::counter):
+        return castas_fctn_simple<int16_t, int64_t>;
+    case cast_switch_case_val(kind::int32, kind::counter):
+        return castas_fctn_simple<int32_t, int64_t>;
+    case cast_switch_case_val(kind::long_kind, kind::counter):
+        return castas_fctn_simple<int64_t, int64_t>;
+    case cast_switch_case_val(kind::float_kind, kind::counter):
+        return castas_fctn_simple<float, int64_t>;
+    case cast_switch_case_val(kind::double_kind, kind::counter):
+        return castas_fctn_simple<double, int64_t>;
+    case cast_switch_case_val(kind::varint, kind::counter):
+        return castas_fctn_simple<utils::multiprecision_int, int64_t>;
+    case cast_switch_case_val(kind::decimal, kind::counter):
+        return castas_fctn_from_integer_to_decimal<int64_t>;
+    case cast_switch_case_val(kind::ascii, kind::counter):
+    case cast_switch_case_val(kind::utf8, kind::counter):
+        return castas_fctn_to_string<int64_t>;
    }
    throw exceptions::invalid_request_exception(format("{} cannot be cast to {}", from_type->name(), to_type->name()));
 }
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -13,6 +13,7 @@
 #include "cql3/lists.hh"
 #include "cql3/constants.hh"
 #include "cql3/user_types.hh"
+#include "cql3/ut_name.hh"
 #include "cql3/type_json.hh"
 #include "cql3/functions/user_function.hh"
 #include "cql3/functions/user_aggregate.hh"
@@ -52,6 +53,13 @@ bool abstract_function::requires_thread() const { return false; }

 bool as_json_function::requires_thread() const { return false; }

+static bool same_signature(const shared_ptr<function>& f1, const shared_ptr<function>& f2) {
+    if (f1 == nullptr || f2 == nullptr) {
+        return false;
+    }
+    return f1->name() == f2->name() && f1->arg_types() == f2->arg_types();
+}
+
 thread_local std::unordered_multimap<function_name, shared_ptr<function>> functions::_declared = init();

 void functions::clear_functions() noexcept {
@@ -97,11 +105,6 @@ functions::init() noexcept {
        if (type == cql3_type::blob) {
            continue;
        }
-        // counters are not supported yet
-        if (type.is_counter()) {
-            warn(unimplemented::cause::COUNTERS);
-            continue;
-        }

        declare(make_to_blob_function(type.get_type()));
        declare(make_from_blob_function(type.get_type()));
@@ -143,22 +146,56 @@ void functions::replace_function(shared_ptr<function> func) {
    with_udf_iter(func->name(), func->arg_types(), [func] (functions::declared_t::iterator i) {
        i->second = std::move(func);
    });
+    auto scalar_func = dynamic_pointer_cast<scalar_function>(func);
+    if (!scalar_func) {
+        return;
+    }
+    for (auto& fit : _declared) {
+        auto aggregate = dynamic_pointer_cast<user_aggregate>(fit.second);
+        if (aggregate && (same_signature(aggregate->sfunc(), scalar_func)
+            || (same_signature(aggregate->finalfunc(), scalar_func))
+            || (same_signature(aggregate->reducefunc(), scalar_func))))
+        {
+            // we need to replace at least one underlying function
+            shared_ptr<scalar_function> sfunc = same_signature(aggregate->sfunc(), scalar_func) ? scalar_func : aggregate->sfunc();
+            shared_ptr<scalar_function> finalfunc = same_signature(aggregate->finalfunc(), scalar_func) ? scalar_func : aggregate->finalfunc();
+            shared_ptr<scalar_function> reducefunc = same_signature(aggregate->reducefunc(), scalar_func) ? scalar_func : aggregate->reducefunc();
+            fit.second = ::make_shared<user_aggregate>(aggregate->name(), aggregate->initcond(), sfunc, reducefunc, finalfunc);
+        }
+    }
 }

 void functions::remove_function(const function_name& name, const std::vector<data_type>& arg_types) {
    with_udf_iter(name, arg_types, [] (functions::declared_t::iterator i) { _declared.erase(i); });
 }

-std::optional<function_name> functions::used_by_user_aggregate(const function_name& name) {
+std::optional<function_name> functions::used_by_user_aggregate(shared_ptr<user_function> func) {
    for (const shared_ptr<function>& fptr : _declared | boost::adaptors::map_values) {
        auto aggregate = dynamic_pointer_cast<user_aggregate>(fptr);
-        if (aggregate && (aggregate->sfunc().name() == name || (aggregate->has_finalfunc() && aggregate->finalfunc().name() == name))) {
+        if (aggregate && (same_signature(aggregate->sfunc(), func)
+            || (same_signature(aggregate->finalfunc(), func))
+            || (same_signature(aggregate->reducefunc(), func))))
+        {
            return aggregate->name();
        }
    }
    return {};
 }

+std::optional<function_name> functions::used_by_user_function(const ut_name& user_type) {
+    for (const shared_ptr<function>& fptr : _declared | boost::adaptors::map_values) {
+        for (auto& arg_type : fptr->arg_types()) {
+            if (arg_type->references_user_type(user_type.get_keyspace(), user_type.get_user_type_name())) {
+                return fptr->name();
+            }
+        }
+        if (fptr->return_type()->references_user_type(user_type.get_keyspace(), user_type.get_user_type_name())) {
+            return fptr->name();
+        }
+    }
+    return {};
+}
+
 lw_shared_ptr<column_specification>
 functions::make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
        const function& fun, size_t i) {
--- a/cql3/functions/functions.hh
+++ b/cql3/functions/functions.hh
@@ -71,7 +71,8 @@ public:
    static void add_function(shared_ptr<function>);
    static void replace_function(shared_ptr<function>);
    static void remove_function(const function_name& name, const std::vector<data_type>& arg_types);
-    static std::optional<function_name> used_by_user_aggregate(const function_name& name);
+    static std::optional<function_name> used_by_user_aggregate(shared_ptr<user_function>);
+    static std::optional<function_name> used_by_user_function(const ut_name& user_type);
 private:
    template <typename F>
    static void with_udf_iter(const function_name& name, const std::vector<data_type>& arg_types, F&& f);
--- a/cql3/functions/user_aggregate.hh
+++ b/cql3/functions/user_aggregate.hh
@@ -37,14 +37,14 @@ public:
    virtual sstring element_type() const override { return "aggregate"; }
    virtual std::ostream& describe(std::ostream& os) const override;

-    const scalar_function& sfunc() const {
-        return *_sfunc;
+    seastar::shared_ptr<scalar_function> sfunc() const {
+        return _sfunc;
    }
-    const scalar_function& reducefunc() const {
-        return *_reducefunc;
+    seastar::shared_ptr<scalar_function> reducefunc() const {
+        return _reducefunc;
    }
-    const scalar_function& finalfunc() const {
-        return *_finalfunc;
+    seastar::shared_ptr<scalar_function> finalfunc() const {
+        return _finalfunc;
    }
    const bytes_opt& initcond() const {
        return _initcond;
--- a/cql3/query_options.cc
+++ b/cql3/query_options.cc
@@ -135,12 +135,21 @@ void query_options::prepare(const std::vector<lw_shared_ptr<column_specification
    ordered_values.reserve(specs.size());
    for (auto&& spec : specs) {
        auto& spec_name = spec->name->text();
+        bool found_value_for_name = false;
        for (size_t j = 0; j < names.size(); j++) {
            if (names[j] == spec_name) {
                ordered_values.emplace_back(_value_views[j]);
+                found_value_for_name = true;
                break;
            }
        }
+
+        // No bound value was found with the name `spec_name`.
+        // This means that the user forgot to include a bound value with such name.
+        if (!found_value_for_name) {
+            throw exceptions::invalid_request_exception(
+                format("Missing value for bind marker with name: {}", spec_name));
+        }
    }
    _value_views = std::move(ordered_values);
 }
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -22,6 +22,7 @@
 #include "db/config.hh"
 #include "data_dictionary/data_dictionary.hh"
 #include "hashers.hh"
+#include "utils/error_injection.hh"

 namespace cql3 {

@@ -600,6 +601,14 @@ query_processor::get_statement(const sstring_view& query, const service::client_
 std::unique_ptr<raw::parsed_statement>
 query_processor::parse_statement(const sstring_view& query) {
    try {
+        {
+            const char* error_injection_key = "query_processor-parse_statement-test_failure";
+            utils::get_local_injector().inject(error_injection_key, [&]() {
+                if (query.find(error_injection_key) != sstring_view::npos) {
+                    throw std::runtime_error(error_injection_key);
+                }
+            });
+        }
        auto statement = util::do_with_parser(query,  std::mem_fn(&cql3_parser::CqlParser::query));
        if (!statement) {
            throw exceptions::syntax_exception("Parsing failed");
--- a/cql3/selection/field_selector.hh
+++ b/cql3/selection/field_selector.hh
@@ -80,7 +80,7 @@ public:

    virtual sstring assignment_testable_source_context() const override {
        auto&& name = _type->field_name(_field);
-        auto sname = sstring(reinterpret_cast<const char*>(name.begin(), name.size()));
+        auto sname = std::string_view(reinterpret_cast<const char*>(name.data()), name.size());
        return format("{}.{}", _selected, sname);
    }

--- a/cql3/statements/cas_request.cc
+++ b/cql3/statements/cas_request.cc
@@ -120,6 +120,9 @@ std::optional<mutation> cas_request::apply(foreign_ptr<lw_shared_ptr<query::resu

 const update_parameters::prefetch_data::row* cas_request::find_old_row(const cas_row_update& op) const {
    static const clustering_key empty_ckey = clustering_key::make_empty();
+    if (_key.empty()) {
+        throw exceptions::invalid_request_exception("Empty partition key range");
+    }
    const partition_key& pkey = _key.front().start()->value().key().value();
    // We must ignore statement clustering column restriction when
    // choosing a row to check the conditions. If there is no
@@ -131,6 +134,9 @@ const update_parameters::prefetch_data::row* cas_request::find_old_row(const cas
    //   CREATE TABLE t(p int, c int, s int static, v int, PRIMARY KEY(p, c));
    //   INSERT INTO t(p, s) VALUES(1, 1);
    //   UPDATE t SET v=1 WHERE p=1 AND c=1 IF s=1;
+    if (op.ranges.empty()) {
+        throw exceptions::invalid_request_exception("Empty clustering range");
+    }
    const clustering_key& ckey = op.ranges.front().start() ?  op.ranges.front().start()->value() : empty_ckey;
    auto row = _rows.find_row(pkey, ckey);
    if (row == nullptr && !ckey.is_empty() &&
--- a/cql3/statements/describe_statement.cc
+++ b/cql3/statements/describe_statement.cc
@@ -178,7 +178,13 @@ future<std::vector<description>> function(replica::database& db, const sstring&

    auto udfs = boost::copy_range<std::vector<shared_ptr<const keyspace_element>>>(fs | boost::adaptors::transformed([] (const auto& f) {
        return dynamic_pointer_cast<const functions::user_function>(f.second);
+    }) | boost::adaptors::filtered([] (const auto& f) {
+        return f != nullptr;
    }));
+    if (udfs.empty()) {
+        throw exceptions::invalid_request_exception(format("Function '{}' not found in keyspace '{}'", name, ks));
+    }
+
    co_return co_await generate_descriptions(db, udfs, true);
 }

@@ -191,13 +197,19 @@ future<std::vector<description>> functions(replica::database& db,const sstring&

 future<std::vector<description>> aggregate(replica::database& db, const sstring& ks, const sstring& name) {
    auto fs = functions::functions::find(functions::function_name(ks, name));
-    if(fs.empty()) {
+    if (fs.empty()) {
        throw exceptions::invalid_request_exception(format("Aggregate '{}' not found in keyspace '{}'", name, ks));
    }

    auto udas = boost::copy_range<std::vector<shared_ptr<const keyspace_element>>>(fs | boost::adaptors::transformed([] (const auto& f) {
        return dynamic_pointer_cast<const functions::user_aggregate>(f.second);
+    }) | boost::adaptors::filtered([] (const auto& f) {
+        return f != nullptr;
    }));
+    if (udas.empty()) {
+        throw exceptions::invalid_request_exception(format("Aggregate '{}' not found in keyspace '{}'", name, ks));
+    }
+
    co_return co_await generate_descriptions(db, udas, true);
 }

--- a/cql3/statements/drop_function_statement.cc
+++ b/cql3/statements/drop_function_statement.cc
@@ -35,7 +35,7 @@ drop_function_statement::prepare_schema_mutations(query_processor& qp, api::time
        if (!user_func) {
            throw exceptions::invalid_request_exception(format("'{}' is not a user defined function", func));
        }
-        if (auto aggregate = functions::functions::used_by_user_aggregate(user_func->name()); bool(aggregate)) {
+        if (auto aggregate = functions::functions::used_by_user_aggregate(user_func)) {
            throw exceptions::invalid_request_exception(format("Cannot delete function {}, as it is used by user-defined aggregate {}", func, *aggregate));
        }
        m = co_await qp.get_migration_manager().prepare_function_drop_announcement(user_func, ts);
--- a/cql3/statements/drop_type_statement.cc
+++ b/cql3/statements/drop_type_statement.cc
@@ -10,6 +10,7 @@
 #include "cql3/statements/drop_type_statement.hh"
 #include "cql3/statements/prepared_statement.hh"
 #include "cql3/query_processor.hh"
+#include "cql3/functions/functions.hh"

 #include "boost/range/adaptor/map.hpp"

@@ -109,6 +110,9 @@ void drop_type_statement::validate_while_executing(query_processor& qp) const {
            }
        }

+        if (auto&& fun_name = functions::functions::used_by_user_function(_name)) {
+            throw exceptions::invalid_request_exception(format("Cannot drop user type {}.{} as it is still used by function {}", keyspace, type->get_name_as_string(), *fun_name));
+        }
    } catch (data_dictionary::no_such_keyspace& e) {
        throw exceptions::invalid_request_exception(format("Cannot drop type in unknown keyspace {}", keyspace()));
    }
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -17,6 +17,7 @@
 #include "cql3/util.hh"
 #include "validation.hh"
 #include "db/consistency_level_validations.hh"
+#include <optional>
 #include <seastar/core/shared_ptr.hh>
 #include <boost/range/adaptor/transformed.hpp>
 #include <boost/range/adaptor/map.hpp>
@@ -95,8 +96,9 @@ bool modification_statement::is_timestamp_set() const {
    return attrs->is_timestamp_set();
 }

-gc_clock::duration modification_statement::get_time_to_live(const query_options& options) const {
-    return gc_clock::duration(attrs->get_time_to_live(options));
+std::optional<gc_clock::duration> modification_statement::get_time_to_live(const query_options& options) const {
+    std::optional<int32_t> ttl = attrs->get_time_to_live(options);
+    return ttl ? std::make_optional<gc_clock::duration>(*ttl) : std::nullopt;
 }

 future<> modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
@@ -305,6 +307,10 @@ modification_statement::execute_with_condition(query_processor& qp, service::que
        throw exceptions::invalid_request_exception(format("Unrestricted partition key in a conditional {}",
                    type.is_update() ? "update" : "deletion"));
    }
+    if (ranges.empty()) {
+        throw exceptions::invalid_request_exception(format("Unrestricted clustering key in a conditional {}",
+                    type.is_update() ? "update" : "deletion"));
+    }

    auto request = seastar::make_shared<cas_request>(s, std::move(keys));
    // cas_request can be used for batches as well single statements; Here we have just a single
--- a/cql3/statements/modification_statement.hh
+++ b/cql3/statements/modification_statement.hh
@@ -130,7 +130,7 @@ public:

    bool is_timestamp_set() const;

-    gc_clock::duration get_time_to_live(const query_options& options) const;
+    std::optional<gc_clock::duration> get_time_to_live(const query_options& options) const;

    virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;

--- a/cql3/statements/schema_altering_statement.cc
+++ b/cql3/statements/schema_altering_statement.cc
@@ -120,7 +120,10 @@ schema_altering_statement::execute(query_processor& qp, service::query_state& st
    }

    return execute0(qp, state, options).then([this, &state, internal](::shared_ptr<messages::result_message> result) {
-        auto permissions_granted_fut = internal
+        // We don't want to grant the permissions to the supposed creator even if the statement succeeded if it's an internal query
+        // or if the query did not actually create the item, i.e. the query is bounced to another shard or it's a IF NOT EXISTS
+        // query where the item already exists.
+        auto permissions_granted_fut = internal || !result->is_schema_change()
                ? make_ready_future<>()
                : grant_permissions_to_creator(state.get_client_state());
        return permissions_granted_fut.then([result = std::move(result)] {
--- a/cql3/update_parameters.hh
+++ b/cql3/update_parameters.hh
@@ -93,7 +93,7 @@ public:
    };
    // Note: value (mutation) only required to contain the rows we are interested in
 private:
-    const gc_clock::duration _ttl;
+    const std::optional<gc_clock::duration> _ttl;
    // For operations that require a read-before-write, stores prefetched cell values.
    // For CAS statements, stores values of conditioned columns.
    // Is a reference to an outside prefetch_data container since a CAS BATCH statement
@@ -106,7 +106,7 @@ public:
    const query_options& _options;

    update_parameters(const schema_ptr schema_, const query_options& options,
-            api::timestamp_type timestamp, gc_clock::duration ttl, const prefetch_data& prefetched)
+            api::timestamp_type timestamp, std::optional<gc_clock::duration> ttl, const prefetch_data& prefetched)
        : _ttl(ttl)
        , _prefetched(prefetched)
        , _timestamp(timestamp)
@@ -127,11 +127,7 @@ public:
    }

    atomic_cell make_cell(const abstract_type& type, const raw_value_view& value, atomic_cell::collection_member cm = atomic_cell::collection_member::no) const {
-        auto ttl = _ttl;
-
-        if (ttl.count() <= 0) {
-            ttl = _schema->default_time_to_live();
-        }
+        auto ttl = this->ttl();

        return value.with_value([&] (const FragmentedView auto& v) {
            if (ttl.count() > 0) {
@@ -143,11 +139,7 @@ public:
    };

    atomic_cell make_cell(const abstract_type& type, const managed_bytes_view& value, atomic_cell::collection_member cm = atomic_cell::collection_member::no) const {
-        auto ttl = _ttl;
-
-        if (ttl.count() <= 0) {
-            ttl = _schema->default_time_to_live();
-        }
+        auto ttl = this->ttl();

        if (ttl.count() > 0) {
            return atomic_cell::make_live(type, _timestamp, value, _local_deletion_time + ttl, ttl, cm);
@@ -169,7 +161,7 @@ public:
    }

    gc_clock::duration ttl() const {
-        return _ttl.count() > 0 ? _ttl : _schema->default_time_to_live();
+        return _ttl.value_or(_schema->default_time_to_live());
    }

    gc_clock::time_point expiry() const {
--- a/data_dictionary/data_dictionary.cc
+++ b/data_dictionary/data_dictionary.cc
@@ -216,7 +216,7 @@ keyspace_metadata::keyspace_metadata(std::string_view name,
                        std::move(strategy_options),
                        durable_writes,
                        std::move(cf_defs),
-                        user_types_metadata{},
+                        std::move(user_types),
                        storage_options{}) { }

 keyspace_metadata::keyspace_metadata(std::string_view name,
@@ -254,6 +254,11 @@ keyspace_metadata::new_keyspace(std::string_view name,
    return ::make_lw_shared<keyspace_metadata>(name, strategy_name, options, durables_writes, cf_defs, user_types_metadata{}, storage_opts);
 }

+lw_shared_ptr<keyspace_metadata>
+keyspace_metadata::new_keyspace(const keyspace_metadata& ksm) {
+    return new_keyspace(ksm.name(), ksm.strategy_name(), ksm.strategy_options(), ksm.durable_writes(), std::vector<schema_ptr>{}, ksm.get_storage_options());
+}
+
 void keyspace_metadata::add_user_type(const user_type ut) {
    _user_types.add_type(ut);
 }
--- a/data_dictionary/keyspace_metadata.hh
+++ b/data_dictionary/keyspace_metadata.hh
@@ -55,6 +55,8 @@ public:
                 bool durables_writes,
                 std::vector<schema_ptr> cf_defs = std::vector<schema_ptr>{},
                 storage_options storage_opts = {});
+    static lw_shared_ptr<keyspace_metadata>
+    new_keyspace(const keyspace_metadata& ksm);
    void validate(const locator::topology&) const;
    const sstring& name() const {
        return _name;
--- a/db/chained_delegating_reader.hh
+++ b/db/chained_delegating_reader.hh
@@ -59,7 +59,7 @@ public:
        }

        _end_of_stream = false;
-        forward_buffer_to(pr.start());
+        clear_buffer();
        return _underlying->fast_forward_to(std::move(pr));
    }

--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -1671,9 +1671,9 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:

        align = f.disk_write_dma_alignment();
        auto is_overwrite = false;
+        auto existing_size = f.known_size();

        if ((flags & open_flags::dsync) != open_flags{}) {
-            auto existing_size = f.known_size();
            is_overwrite = true;
            // would be super nice if we just could mmap(/dev/zero) and do sendto
            // instead of this, but for now we must do explicit buffer writes.
@@ -1683,8 +1683,6 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:
            if (existing_size > max_size) {
                co_await f.truncate(max_size);
            } else if (existing_size < max_size) {
-                totals.total_size_on_disk += (max_size - existing_size);
-
                clogger.trace("Pre-writing {} of {} KB to segment {}", (max_size - existing_size)/1024, max_size/1024, filename);

                // re-open without o_dsync for pre-alloc. The reason/rationale
@@ -1732,6 +1730,12 @@ future<db::commitlog::segment_manager::sseg_ptr> db::commitlog::segment_manager:
            co_await f.truncate(max_size);
        }

+        // #12810 - we did not update total_size_on_disk unless o_dsync was 
+        // on. So kept running with total == 0 -> free for all in creating new segment.
+        // Always update total_size_on_disk. Will wrap-around iff existing_size > max_size. 
+        // That is ok.
+        totals.total_size_on_disk += (max_size - existing_size);
+
        if (cfg.extensions && !cfg.extensions->commitlog_file_extensions().empty()) {
            for (auto * ext : cfg.extensions->commitlog_file_extensions()) {
                auto nf = co_await ext->wrap_file(filename, f, flags);
@@ -2116,6 +2120,9 @@ future<> db::commitlog::segment_manager::do_pending_deletes() {
    clogger.debug("Discarding segments {}", ftd);

    for (auto& [f, mode] : ftd) {
+        // `f.remove_file()` resets known_size to 0, so remember the size here,
+        // in order to subtract it from total_size_on_disk accurately.
+        auto size = f.known_size();
        try {
            if (f) {
                co_await f.close();
@@ -2132,7 +2139,6 @@ future<> db::commitlog::segment_manager::do_pending_deletes() {
                }
            }

-            auto size = f.known_size();
            auto usage = totals.total_size_on_disk;
            auto next_usage = usage - size;

@@ -2165,7 +2171,7 @@ future<> db::commitlog::segment_manager::do_pending_deletes() {
        // or had such an exception that we consider the file dead
        // anyway. In either case we _remove_ the file size from
        // footprint, because it is no longer our problem.
-        totals.total_size_on_disk -= f.known_size();
+        totals.total_size_on_disk -= size;
    }

    // #8376 - if we had an error in recycling (disk rename?), and no elements
--- a/db/config.cc
+++ b/db/config.cc
@@ -418,6 +418,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , commitlog_segment_size_in_mb(this, "commitlog_segment_size_in_mb", value_status::Used, 64,
        "Sets the size of the individual commitlog file segments. A commitlog segment may be archived, deleted, or recycled after all its data has been flushed to SSTables. This amount of data can potentially include commitlog segments from every table in the system. The default size is usually suitable for most commitlog archiving, but if you want a finer granularity, 8 or 16 MB is reasonable. See Commit log archive configuration.\n"
        "Related information: Commit log archive configuration")
+    , schema_commitlog_segment_size_in_mb(this, "schema_commitlog_segment_size_in_mb", value_status::Used, 32,
+        "Sets the size of the individual schema commitlog file segments. The segment size puts a limit on the mutation size that can be written at once, and some schema mutation writes are much larger than average.\n"
+        "Related information: Commit log archive configuration")
    /* Note: does not exist on the listing page other than in above comment, wtf? */
    , commitlog_sync_period_in_ms(this, "commitlog_sync_period_in_ms", value_status::Used, 10000,
        "Controls how long the system waits for other writes before performing a sync in \"periodic\" mode.")
@@ -826,6 +829,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format.  Deprecated, please use \"sstable_format\" instead.")
    , enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format.  Deprecated, please use \"sstable_format\" instead.")
    , sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
+    , table_digest_insensitive_to_expiry(this, "table_digest_insensitive_to_expiry", liveness::MustRestart, value_status::Used, true,
+            "When enabled, per-table schema digest calculation ignores empty partitions.")
    , enable_dangerous_direct_import_of_cassandra_counters(this, "enable_dangerous_direct_import_of_cassandra_counters", value_status::Used, false, "Only turn this option on if you want to import tables from Cassandra containing counters, and you are SURE that no counters in that table were created in a version earlier than Cassandra 2.1."
        " It is not enough to have ever since upgraded to newer versions of Cassandra. If you EVER used a version earlier than 2.1 in the cluster where these SSTables come from, DO NOT TURN ON THIS OPTION! You will corrupt your data. You have been warned.")
    , enable_shard_aware_drivers(this, "enable_shard_aware_drivers", value_status::Used, true, "Enable native transport drivers to use connection-per-shard for better performance")
@@ -909,6 +914,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , force_schema_commit_log(this, "force_schema_commit_log", value_status::Used, false,
        "Use separate schema commit log unconditionally rater than after restart following discovery of cluster-wide support for it.")
    , task_ttl_seconds(this, "task_ttl_in_seconds", liveness::LiveUpdate, value_status::Used, 10, "Time for which information about finished task stays in memory.")
+    , nodeops_watchdog_timeout_seconds(this, "nodeops_watchdog_timeout_seconds", liveness::LiveUpdate, value_status::Used, 120, "Time in seconds after which node operations abort when not hearing from the coordinator")
+    , nodeops_heartbeat_interval_seconds(this, "nodeops_heartbeat_interval_seconds", liveness::LiveUpdate, value_status::Used, 10, "Period of heartbeat ticks in node operations")
    , cache_index_pages(this, "cache_index_pages", liveness::LiveUpdate, value_status::Used, false,
        "Keep SSTable index pages in the global cache after a SSTable read. Expected to improve performance for workloads with big partitions, but may degrade performance for workloads with small partitions.")
    , x_log2_compaction_groups(this, "x_log2_compaction_groups", value_status::Used, 0, "Controls static number of compaction groups per table per shard. For X groups, set the option to log (base 2) of X. Example: Value of 3 implies 8 groups.")
--- a/db/config.hh
+++ b/db/config.hh
@@ -175,6 +175,7 @@ public:
    named_value<uint32_t> failure_detector_timeout_in_ms;
    named_value<sstring> commitlog_sync;
    named_value<uint32_t> commitlog_segment_size_in_mb;
+    named_value<uint32_t> schema_commitlog_segment_size_in_mb;
    named_value<uint32_t> commitlog_sync_period_in_ms;
    named_value<uint32_t> commitlog_sync_batch_window_in_ms;
    named_value<int64_t> commitlog_total_space_in_mb;
@@ -331,6 +332,7 @@ public:
    named_value<bool> enable_sstables_mc_format;
    named_value<bool> enable_sstables_md_format;
    named_value<sstring> sstable_format;
+    named_value<bool> table_digest_insensitive_to_expiry;
    named_value<bool> enable_dangerous_direct_import_of_cassandra_counters;
    named_value<bool> enable_shard_aware_drivers;
    named_value<bool> enable_ipv6_dns_lookup;
@@ -388,6 +390,8 @@ public:
    named_value<bool> force_schema_commit_log;

    named_value<uint32_t> task_ttl_seconds;
+    named_value<uint32_t> nodeops_watchdog_timeout_seconds;
+    named_value<uint32_t> nodeops_heartbeat_interval_seconds;

    named_value<bool> cache_index_pages;

@@ -401,6 +405,10 @@ public:
    named_value<uint64_t> wasm_udf_yield_fuel;
    named_value<uint64_t> wasm_udf_total_fuel;
    named_value<size_t> wasm_udf_memory_limit;
+    // wasm_udf_reserved_memory is static because the options in db::config
+    // are parsed using seastar::app_template, while this option is used for
+    // configuring the Seastar memory subsystem.
+    static constexpr size_t wasm_udf_reserved_memory = 50 * 1024 * 1024;

    seastar::logging_settings logging_settings(const log_cli::options&) const;

--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -7,6 +7,7 @@
 */

 #include <seastar/core/print.hh>
+#include <seastar/core/coroutine.hh>
 #include "db/system_keyspace.hh"
 #include "db/large_data_handler.hh"
 #include "sstables/sstables.hh"
@@ -55,11 +56,11 @@ void large_data_handler::start() {
 }

 future<> large_data_handler::stop() {
-    if (!running()) {
-        return make_ready_future<>();
+    if (running()) {
+        _running = false;
+        large_data_logger.info("Waiting for {} background handlers", max_concurrency - _sem.available_units());
+        co_await _sem.wait(max_concurrency);
    }
-    _running = false;
-    return _sem.wait(max_concurrency);
 }

 void large_data_handler::plug_system_keyspace(db::system_keyspace& sys_ks) noexcept {
--- a/db/schema_features.hh
+++ b/db/schema_features.hh
@@ -24,6 +24,10 @@ enum class schema_feature {
    PER_TABLE_PARTITIONERS,
    SCYLLA_KEYSPACES,
    SCYLLA_AGGREGATES,
+
+    // When enabled, schema_mutations::digest() will skip empty mutations (with only tombstones),
+    // so that the digest remains the same after schema tables are compacted.
+    TABLE_DIGEST_INSENSITIVE_TO_EXPIRY,
 };

 using schema_features = enum_set<super_enum<schema_feature,
@@ -33,7 +37,8 @@ using schema_features = enum_set<super_enum<schema_feature,
    schema_feature::CDC_OPTIONS,
    schema_feature::PER_TABLE_PARTITIONERS,
    schema_feature::SCYLLA_KEYSPACES,
-    schema_feature::SCYLLA_AGGREGATES
+    schema_feature::SCYLLA_AGGREGATES,
+    schema_feature::TABLE_DIGEST_INSENSITIVE_TO_EXPIRY
    >>;

 }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -93,15 +93,18 @@ static bool is_extra_durable(const sstring& ks_name, const sstring& cf_name) {
 /** system.schema_* tables used to store keyspace/table/type attributes prior to C* 3.0 */
 namespace db {

-schema_ctxt::schema_ctxt(const db::config& cfg, std::shared_ptr<data_dictionary::user_types_storage> uts)
-    : _extensions(cfg.extensions())
+schema_ctxt::schema_ctxt(const db::config& cfg, std::shared_ptr<data_dictionary::user_types_storage> uts, 
+                         const gms::feature_service& features, replica::database* db)
+    : _db(db)
+    , _features(features)
+    , _extensions(cfg.extensions())
    , _murmur3_partitioner_ignore_msb_bits(cfg.murmur3_partitioner_ignore_msb_bits())
    , _schema_registry_grace_period(cfg.schema_registry_grace_period())
    , _user_types(std::move(uts))
 {}

-schema_ctxt::schema_ctxt(const replica::database& db)
-    : schema_ctxt(db.get_config(), db.as_user_types_storage())
+schema_ctxt::schema_ctxt(replica::database& db)
+    : schema_ctxt(db.get_config(), db.as_user_types_storage(), db.features(), &db)
 {}

 schema_ctxt::schema_ctxt(distributed<replica::database>& db)
@@ -148,7 +151,8 @@ static future<> merge_tables_and_views(distributed<service::storage_proxy>& prox
    std::map<table_id, schema_mutations>&& tables_before,
    std::map<table_id, schema_mutations>&& tables_after,
    std::map<table_id, schema_mutations>&& views_before,
-    std::map<table_id, schema_mutations>&& views_after);
+    std::map<table_id, schema_mutations>&& views_after,
+    bool reload);

 struct [[nodiscard]] user_types_to_drop final {
    seastar::noncopyable_function<future<> ()> drop;
@@ -161,7 +165,7 @@ static future<user_types_to_drop> merge_types(distributed<service::storage_proxy
 static future<> merge_functions(distributed<service::storage_proxy>& proxy, schema_result before, schema_result after);
 static future<> merge_aggregates(distributed<service::storage_proxy>& proxy, schema_result before, schema_result after, schema_result scylla_before, schema_result scylla_after);

-static future<> do_merge_schema(distributed<service::storage_proxy>&, std::vector<mutation>, bool do_flush);
+static future<> do_merge_schema(distributed<service::storage_proxy>&, std::vector<mutation>, bool do_flush, bool reload);

 using computed_columns_map = std::unordered_map<bytes, column_computation_ptr>;
 static computed_columns_map get_computed_columns(const schema_mutations& sm);
@@ -941,18 +945,18 @@ future<> update_schema_version_and_announce(sharded<db::system_keyspace>& sys_ks
 * @throws ConfigurationException If one of metadata attributes has invalid value
 * @throws IOException If data was corrupted during transportation or failed to apply fs operations
 */
-future<> merge_schema(sharded<db::system_keyspace>& sys_ks, distributed<service::storage_proxy>& proxy, gms::feature_service& feat, std::vector<mutation> mutations)
+future<> merge_schema(sharded<db::system_keyspace>& sys_ks, distributed<service::storage_proxy>& proxy, gms::feature_service& feat, std::vector<mutation> mutations, bool reload)
 {
    if (this_shard_id() != 0) {
        // mutations must be applied on the owning shard (0).
        co_await smp::submit_to(0, [&, fmuts = freeze(mutations)] () mutable -> future<> {
-            return merge_schema(sys_ks, proxy, feat, unfreeze(fmuts));
+            return merge_schema(sys_ks, proxy, feat, unfreeze(fmuts), reload);
        });
        co_return;
    }
    co_await with_merge_lock([&] () mutable -> future<> {
        bool flush_schema = proxy.local().get_db().local().get_config().flush_schema_tables_after_modification();
-        co_await do_merge_schema(proxy, std::move(mutations), flush_schema);
+        co_await do_merge_schema(proxy, std::move(mutations), flush_schema, reload);
        co_await update_schema_version_and_announce(sys_ks, proxy, feat.cluster_schema_features());
    });
 }
@@ -1095,7 +1099,7 @@ future<> store_column_mapping(distributed<service::storage_proxy>& proxy, schema
 // and or filesystem calls, e.g. fsync.
 constexpr size_t max_concurrent = 8;

-static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations, bool do_flush)
+static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations, bool do_flush, bool reload)
 {
    slogger.trace("do_merge_schema: {}", mutations);
    schema_ptr s = keyspaces();
@@ -1110,6 +1114,12 @@ static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std:
        delete_schema_version(mutation);
    }

+    if (reload) {
+        for (auto&& ks : proxy.local().get_db().local().get_non_system_keyspaces()) {
+            keyspaces.emplace(ks);
+        }
+    }
+
    // current state of the schema
    auto&& old_keyspaces = co_await read_schema_for_keyspaces(proxy, KEYSPACES, keyspaces);
    auto&& old_column_families = co_await read_tables_for_keyspaces(proxy, keyspaces, tables());
@@ -1145,18 +1155,16 @@ static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std:
    auto types_to_drop = co_await merge_types(proxy, std::move(old_types), std::move(new_types));
    co_await merge_tables_and_views(proxy,
        std::move(old_column_families), std::move(new_column_families),
-        std::move(old_views), std::move(new_views));
+        std::move(old_views), std::move(new_views), reload);
    co_await merge_functions(proxy, std::move(old_functions), std::move(new_functions));
    co_await merge_aggregates(proxy, std::move(old_aggregates), std::move(new_aggregates), std::move(old_scylla_aggregates), std::move(new_scylla_aggregates));
    co_await types_to_drop.drop();

-    co_await proxy.local().get_db().invoke_on_all([&] (replica::database& db) -> future<> {
-        // it is safe to drop a keyspace only when all nested ColumnFamilies where deleted
-        for (auto keyspace_to_drop : keyspaces_to_drop) {
-            db.drop_keyspace(keyspace_to_drop);
-            co_await db.get_notifier().drop_keyspace(keyspace_to_drop);
-        }
-    });
+    auto& sharded_db = proxy.local().get_db();
+    // it is safe to drop a keyspace only when all nested ColumnFamilies where deleted
+    for (auto keyspace_to_drop : keyspaces_to_drop) {
+        co_await replica::database::drop_keyspace_on_all_shards(sharded_db, keyspace_to_drop);
+    }
 }

 future<lw_shared_ptr<query::result_set>> extract_scylla_specific_keyspace_info(distributed<service::storage_proxy>& proxy, const schema_result_value_type& partition) {
@@ -1205,19 +1213,18 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
        slogger.info("Altering keyspace {}", key);
        altered.emplace_back(key);
    }
-    co_await proxy.local().get_db().invoke_on_all([&] (replica::database& db) -> future<> {
-        for (auto&& val : created) {
-            auto scylla_specific_rs = co_await extract_scylla_specific_keyspace_info(proxy, val);
-            auto ksm = create_keyspace_from_schema_partition(val, std::move(scylla_specific_rs));
-            co_await db.create_keyspace(ksm, proxy.local().get_erm_factory());
-            co_await db.get_notifier().create_keyspace(ksm);
-        }
-        {
-            for (auto& name : altered) {
-                co_await db.update_keyspace(proxy, name);
-            };
-        }
-    });
+    auto& sharded_db = proxy.local().get_db();
+    for (auto&& val : created) {
+        auto scylla_specific_rs = co_await extract_scylla_specific_keyspace_info(proxy, val);
+        auto ksm = create_keyspace_from_schema_partition(val, std::move(scylla_specific_rs));
+        co_await replica::database::create_keyspace_on_all_shards(sharded_db, proxy, *ksm);
+    }
+    for (auto& name : altered) {
+        auto v = co_await db::schema_tables::read_schema_partition_for_keyspace(proxy, db::schema_tables::KEYSPACES, name);
+        auto scylla_specific_rs = co_await db::schema_tables::extract_scylla_specific_keyspace_info(proxy, v);
+        auto tmp_ksm = db::schema_tables::create_keyspace_from_schema_partition(v, scylla_specific_rs);
+        co_await replica::database::update_keyspace_on_all_shards(sharded_db, proxy, *tmp_ksm);
+    }
    co_return dropped;
 }

@@ -1252,6 +1259,7 @@ enum class schema_diff_side {
 static schema_diff diff_table_or_view(distributed<service::storage_proxy>& proxy,
    std::map<table_id, schema_mutations>&& before,
    std::map<table_id, schema_mutations>&& after,
+    bool reload,
    noncopyable_function<schema_ptr (schema_mutations sm, schema_diff_side)> create_schema)
 {
    schema_diff d;
@@ -1272,6 +1280,13 @@ static schema_diff diff_table_or_view(distributed<service::storage_proxy>& proxy
        slogger.info("Altering {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
        d.altered.emplace_back(schema_diff::altered_schema{s_before, s});
    }
+    if (reload) {
+        for (auto&& key: diff.entries_in_common) {
+            auto s = create_schema(std::move(after.at(key)), schema_diff_side::right);
+            slogger.info("Reloading {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
+            d.altered.emplace_back(schema_diff::altered_schema {s, s});
+        }
+    }
    return d;
 }

@@ -1284,12 +1299,13 @@ static future<> merge_tables_and_views(distributed<service::storage_proxy>& prox
    std::map<table_id, schema_mutations>&& tables_before,
    std::map<table_id, schema_mutations>&& tables_after,
    std::map<table_id, schema_mutations>&& views_before,
-    std::map<table_id, schema_mutations>&& views_after)
+    std::map<table_id, schema_mutations>&& views_after,
+    bool reload)
 {
-    auto tables_diff = diff_table_or_view(proxy, std::move(tables_before), std::move(tables_after), [&] (schema_mutations sm, schema_diff_side) {
+    auto tables_diff = diff_table_or_view(proxy, std::move(tables_before), std::move(tables_after), reload, [&] (schema_mutations sm, schema_diff_side) {
        return create_table_from_mutations(proxy, std::move(sm));
    });
-    auto views_diff = diff_table_or_view(proxy, std::move(views_before), std::move(views_after), [&] (schema_mutations sm, schema_diff_side side) {
+    auto views_diff = diff_table_or_view(proxy, std::move(views_before), std::move(views_after), reload, [&] (schema_mutations sm, schema_diff_side side) {
        // The view schema mutation should be created with reference to the base table schema because we definitely know it by now.
        // If we don't do it we are leaving a window where write commands to this schema are illegal.
        // There are 3 possibilities:
@@ -2216,15 +2232,15 @@ std::vector<mutation> make_create_aggregate_mutations(schema_features features,
    mutation& m = p.first;
    clustering_key& ckey = p.second;

-    data_type state_type = aggregate->sfunc().arg_types()[0];
+    data_type state_type = aggregate->sfunc()->arg_types()[0];
    if (aggregate->has_finalfunc()) {
-        m.set_clustered_cell(ckey, "final_func", aggregate->finalfunc().name().name, timestamp);
+        m.set_clustered_cell(ckey, "final_func", aggregate->finalfunc()->name().name, timestamp);
    }
    if (aggregate->initcond()) {
        m.set_clustered_cell(ckey, "initcond", state_type->deserialize(*aggregate->initcond()).to_parsable_string(), timestamp);
    }
    m.set_clustered_cell(ckey, "return_type", aggregate->return_type()->as_cql3_type().to_string(), timestamp);
-    m.set_clustered_cell(ckey, "state_func", aggregate->sfunc().name().name, timestamp);
+    m.set_clustered_cell(ckey, "state_func", aggregate->sfunc()->name().name, timestamp);
    m.set_clustered_cell(ckey, "state_type", state_type->as_cql3_type().to_string(), timestamp);
    std::vector<mutation> muts = {m};

@@ -2233,7 +2249,7 @@ std::vector<mutation> make_create_aggregate_mutations(schema_features features,
        auto sa_p = get_mutation(sa_schema, *aggregate);
        mutation& sa_mut = sa_p.first;
        clustering_key& sa_ckey = sa_p.second;
-        sa_mut.set_clustered_cell(sa_ckey, "reduce_func", aggregate->reducefunc().name().name, timestamp);
+        sa_mut.set_clustered_cell(sa_ckey, "reduce_func", aggregate->reducefunc()->name().name, timestamp);
        sa_mut.set_clustered_cell(sa_ckey, "state_type", state_type->as_cql3_type().to_string(), timestamp);

        muts.emplace_back(sa_mut);
@@ -2964,7 +2980,7 @@ schema_ptr create_table_from_mutations(const schema_ctxt& ctxt, schema_mutations
    if (version) {
        builder.with_version(*version);
    } else {
-        builder.with_version(sm.digest());
+        builder.with_version(sm.digest(ctxt.features().cluster_schema_features()));
    }

    if (auto partitioner = sm.partitioner()) {
@@ -3195,7 +3211,7 @@ view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm
    if (version) {
        builder.with_version(*version);
    } else {
-        builder.with_version(sm.digest());
+        builder.with_version(sm.digest(ctxt.features().cluster_schema_features()));
    }

    auto base_id = table_id(row.get_nonnull<utils::UUID>("base_table_id"));
--- a/db/schema_tables.hh
+++ b/db/schema_tables.hh
@@ -14,6 +14,7 @@
 #include "schema_fwd.hh"
 #include "schema_features.hh"
 #include "hashing.hh"
+#include "gms/feature_service.hh"
 #include "schema_mutations.hh"
 #include "types/map.hh"
 #include "query-result-set.hh"
@@ -66,8 +67,8 @@ class config;

 class schema_ctxt {
 public:
-    schema_ctxt(const config&, std::shared_ptr<data_dictionary::user_types_storage> uts);
-    schema_ctxt(const replica::database&);
+    schema_ctxt(const config&, std::shared_ptr<data_dictionary::user_types_storage> uts, const gms::feature_service&, replica::database* = nullptr);
+    schema_ctxt(replica::database&);
    schema_ctxt(distributed<replica::database>&);
    schema_ctxt(distributed<service::storage_proxy>&);

@@ -87,7 +88,17 @@ public:
        return *_user_types;
    }

+    const gms::feature_service& features() const {
+        return _features;
+    }
+
+    replica::database* get_db() const {
+        return _db;
+    }
+
 private:
+    replica::database* _db;
+    const gms::feature_service& _features;
    const db::extensions& _extensions;
    const unsigned _murmur3_partitioner_ignore_msb_bits;
    const uint32_t _schema_registry_grace_period;
@@ -128,6 +139,7 @@ schema_ptr indexes();
 schema_ptr tables();
 schema_ptr scylla_tables(schema_features features = schema_features::full());
 schema_ptr views();
+schema_ptr types();
 schema_ptr computed_columns();
 // Belongs to the "system" keyspace
 schema_ptr scylla_table_schema_history();
@@ -184,7 +196,7 @@ future<mutation> read_keyspace_mutation(distributed<service::storage_proxy>&, co
 // Must be called on shard 0.
 future<semaphore_units<>> hold_merge_lock() noexcept;

-future<> merge_schema(sharded<db::system_keyspace>& sys_ks, distributed<service::storage_proxy>& proxy, gms::feature_service& feat, std::vector<mutation> mutations);
+future<> merge_schema(sharded<db::system_keyspace>& sys_ks, distributed<service::storage_proxy>& proxy, gms::feature_service& feat, std::vector<mutation> mutations, bool reload);

 // Recalculates the local schema version.
 //
--- a/db/size_estimates_virtual_reader.cc
+++ b/db/size_estimates_virtual_reader.cc
@@ -295,7 +295,7 @@ future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_ra
 }

 future<> size_estimates_mutation_reader::fast_forward_to(position_range pr) {
-    forward_buffer_to(pr.start());
+    clear_buffer();
    _end_of_stream = false;
    if (_partition_reader) {
        return _partition_reader->fast_forward_to(std::move(pr));
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -2276,7 +2276,10 @@ public:
        add_partition(mutation_sink, "trace_probability", format("{:.2}", tracing::tracing::get_local_tracing_instance().get_trace_probability()));
        co_await add_partition(mutation_sink, "memory", [this] () {
            struct stats {
-                uint64_t total = 0;
+                // take the pre-reserved memory into account, as seastar only returns
+                // the stats of memory managed by the seastar allocator, but we instruct
+                // it to reserve addition memory for system.
+                uint64_t total = db::config::wasm_udf_reserved_memory;
                uint64_t free = 0;
                static stats reduce(stats a, stats b) { return stats{a.total + b.total, a.free + b.free}; }
            };
@@ -3344,11 +3347,11 @@ mutation system_keyspace::make_group0_history_state_id_mutation(
        using namespace std::chrono;
        assert(*gc_older_than >= gc_clock::duration{0});

-        auto ts_millis = duration_cast<milliseconds>(microseconds{ts});
-        auto gc_older_than_millis = duration_cast<milliseconds>(*gc_older_than);
-        assert(gc_older_than_millis < ts_millis);
+        auto ts_micros = microseconds{ts};
+        auto gc_older_than_micros = duration_cast<microseconds>(*gc_older_than);
+        assert(gc_older_than_micros < ts_micros);

-        auto tomb_upper_bound = utils::UUID_gen::min_time_UUID(ts_millis - gc_older_than_millis);
+        auto tomb_upper_bound = utils::UUID_gen::min_time_UUID(ts_micros - gc_older_than_micros);
        // We want to delete all entries with IDs smaller than `tomb_upper_bound`
        // but the deleted range is of the form (x, +inf) since the schema is reversed.
        auto range = query::clustering_range::make_starting_with({
--- a/db/view/build_progress_virtual_reader.hh
+++ b/db/view/build_progress_virtual_reader.hh
@@ -172,7 +172,7 @@ class build_progress_virtual_reader {
        }

        virtual future<> fast_forward_to(position_range range) override {
-            forward_buffer_to(range.start());
+            clear_buffer();
            _end_of_stream = false;
            return _underlying.fast_forward_to(std::move(range));
        }
@@ -197,7 +197,7 @@ public:
            streamed_mutation::forwarding fwd,
            mutation_reader::forwarding fwd_mr) {
        return flat_mutation_reader_v2(std::make_unique<build_progress_reader>(
-                std::move(s),
+                s,
                std::move(permit),
                _db.find_column_family(s->ks_name(), system_keyspace::v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
                range,
--- a/db/view/row_locking.cc
+++ b/db/view/row_locking.cc
@@ -85,29 +85,25 @@ future<row_locker::lock_holder>
 row_locker::lock_ck(const dht::decorated_key& pk, const clustering_key_prefix& cpk, bool exclusive, db::timeout_clock::time_point timeout, stats& stats) {
    mylog.debug("taking shared lock on partition {}, and {} lock on row {} in it", pk, (exclusive ? "exclusive" : "shared"), cpk);
    auto tracker = latency_stats_tracker(exclusive ? stats.exclusive_row : stats.shared_row);
+    auto ck = cpk;
+    // Create a two-level lock entry for the partition if it doesn't exist already.
    auto i = _two_level_locks.try_emplace(pk, this).first;
+    // The two-level lock entry we've just created is guaranteed to be kept alive as long as it's locked.
+    // Initiating read locking in the background below ensures that even if the two-level lock is currently
+    // write-locked, releasing the write-lock will synchronously engage any waiting
+    // locks and will keep the entry alive.
    future<lock_type::holder> lock_partition = i->second._partition_lock.hold_read_lock(timeout);
-    auto j = i->second._row_locks.find(cpk);
-    if (j == i->second._row_locks.end()) {
-        // Not yet locked, need to create the lock. This makes a copy of cpk.
-        try {
-            j = i->second._row_locks.emplace(cpk, lock_type()).first;
-        } catch(...) {
-            // If this emplace() failed, e.g., out of memory, we fail. We
-            // could do nothing - the partition lock we already started
-            // taking will be unlocked automatically after being locked.
-            // But it's better form to wait for the work we started, and it
-            // will also allow us to remove the hash-table row we added.
-            return lock_partition.then([ex = std::current_exception()] (auto lock) {
-                // The lock is automatically released when "lock" goes out of scope.
-                // TODO: unlock (lock = {}) now, search for the partition in the
-                // hash table (we know it's still there, because we held the lock until
-                // now) and remove the unused lock from the hash table if still unused.
-                return make_exception_future<row_locker::lock_holder>(std::current_exception());
-            });
+    return lock_partition.then([this, pk = &i->first, row_locks = &i->second._row_locks, ck = std::move(ck), exclusive, tracker = std::move(tracker), timeout] (auto lock1) mutable {
+        auto j = row_locks->find(ck);
+        if (j == row_locks->end()) {
+            // Not yet locked, need to create the lock.
+            j = row_locks->emplace(std::move(ck), lock_type()).first;
        }
-    }
-    return lock_partition.then([this, pk = &i->first, cpk = &j->first, &row_lock = j->second, exclusive, tracker = std::move(tracker), timeout] (auto lock1) mutable {
+        auto* cpk = &j->first;
+        auto& row_lock = j->second;
+        // Like to the two-level lock entry above, the row_lock entry we've just created
+        // is guaranteed to be kept alive as long as it's locked.
+        // Initiating read/write locking in the background below ensures that.
        auto lock_row = exclusive ? row_lock.hold_write_lock(timeout) : row_lock.hold_read_lock(timeout);
        return lock_row.then([this, pk, cpk, exclusive, tracker = std::move(tracker), lock1 = std::move(lock1)] (auto lock2) mutable {
            lock1.release();
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -1676,7 +1676,7 @@ future<> mutate_MV(
            auto mut_ptr = remote_endpoints.empty() ? std::make_unique<frozen_mutation>(std::move(mut.fm)) : std::make_unique<frozen_mutation>(mut.fm);
            tracing::trace(tr_state, "Locally applying view update for {}.{}; base token = {}; view token = {}",
                    mut.s->ks_name(), mut.s->cf_name(), base_token, view_token);
-            local_view_update = service::get_local_storage_proxy().mutate_locally(mut.s, *mut_ptr, tr_state, db::commitlog::force_sync::no).then_wrapped(
+            local_view_update = service::get_local_storage_proxy().mutate_mv_locally(mut.s, *mut_ptr, tr_state, db::commitlog::force_sync::no).then_wrapped(
                    [s = mut.s, &stats, &cf_stats, tr_state, base_token, view_token, my_address, mut_ptr = std::move(mut_ptr),
                            units = sem_units.split(sem_units.count())] (future<>&& f) {
                --stats.writes;
@@ -1825,6 +1825,8 @@ future<> view_builder::start(service::migration_manager& mm) {
            (void)_build_step.trigger();
            return make_ready_future<>();
        });
+    }).handle_exception_type([] (const seastar::sleep_aborted& e) {
+        vlogger.debug("start aborted: {}", e.what());
    }).handle_exception([] (std::exception_ptr eptr) {
        vlogger.error("start failed: {}", eptr);
        return make_ready_future<>();
@@ -2523,32 +2525,33 @@ update_backlog node_update_backlog::add_fetch(unsigned shard, update_backlog bac
    return std::max(backlog, _max.load(std::memory_order_relaxed));
 }

-future<bool> check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const sstring& ks_name, const sstring& cf_name) {
-    return sys_dist_ks.view_status(ks_name, cf_name).then([] (std::unordered_map<locator::host_id, sstring>&& view_statuses) {
-        return boost::algorithm::any_of(view_statuses | boost::adaptors::map_values, [] (const sstring& view_status) {
-            return view_status == "STARTED";
+future<bool> check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const sstring& ks_name,
+        const sstring& cf_name) {
+    using view_statuses_type = std::unordered_map<locator::host_id, sstring>;
+    return sys_dist_ks.view_status(ks_name, cf_name).then([&tm] (view_statuses_type&& view_statuses) {
+        return boost::algorithm::any_of(view_statuses, [&tm] (const view_statuses_type::value_type& view_status) {
+            // Only consider status of known hosts.
+            return view_status.second == "STARTED" && tm.get_endpoint_for_host_id(view_status.first);
        });
    });
 }

-future<bool> check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const replica::table& t, streaming::stream_reason reason) {
+future<bool> check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const replica::table& t,
+        streaming::stream_reason reason) {
    if (is_internal_keyspace(t.schema()->ks_name())) {
        return make_ready_future<bool>(false);
    }
    if (reason == streaming::stream_reason::repair && !t.views().empty()) {
        return make_ready_future<bool>(true);
    }
-    return do_with(t.views(), [&sys_dist_ks] (auto& views) {
+    return do_with(t.views(), [&sys_dist_ks, &tm] (auto& views) {
        return map_reduce(views,
-                [&sys_dist_ks] (const view_ptr& view) { return check_view_build_ongoing(sys_dist_ks, view->ks_name(), view->cf_name()); },
+                [&sys_dist_ks, &tm] (const view_ptr& view) { return check_view_build_ongoing(sys_dist_ks, tm, view->ks_name(), view->cf_name()); },
                false,
                std::logical_or<bool>());
    });
 }

-const size_t view_updating_consumer::buffer_size_soft_limit{1 * 1024 * 1024};
-const size_t view_updating_consumer::buffer_size_hard_limit{2 * 1024 * 1024};
-
 void view_updating_consumer::do_flush_buffer() {
    _staging_reader_handle.pause();

@@ -2571,6 +2574,10 @@ void view_updating_consumer::do_flush_buffer() {
 }

 void view_updating_consumer::flush_builder() {
+    _buffer.emplace_back(_mut_builder->flush());
+}
+
+void view_updating_consumer::end_builder() {
    _mut_builder->consume_end_of_partition();
    if (auto mut_opt = _mut_builder->consume_end_of_stream()) {
        _buffer.emplace_back(std::move(*mut_opt));
@@ -2579,11 +2586,9 @@ void view_updating_consumer::flush_builder() {
 }

 void view_updating_consumer::maybe_flush_buffer_mid_partition() {
-    if (_buffer_size >= buffer_size_hard_limit) {
+    if (_buffer_size >= _buffer_size_hard_limit) {
        flush_builder();
-        auto dk = _buffer.back().decorated_key();
        do_flush_buffer();
-        consume_new_partition(dk);
    }
 }

--- a/db/view/view_update_checks.hh
+++ b/db/view/view_update_checks.hh
@@ -22,9 +22,13 @@ class system_distributed_keyspace;

 }

+namespace locator {
+class token_metadata;
+}
+
 namespace db::view {

-future<bool> check_view_build_ongoing(db::system_distributed_keyspace& sys_dist_ks, const sstring& ks_name, const sstring& cf_name);
-future<bool> check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const replica::table& t, streaming::stream_reason reason);
+future<bool> check_needs_view_update_path(db::system_distributed_keyspace& sys_dist_ks, const locator::token_metadata& tm, const replica::table& t,
+        streaming::stream_reason reason);

 }
--- a/db/view/view_update_generator.cc
+++ b/db/view/view_update_generator.cc
@@ -157,11 +157,11 @@ future<> view_update_generator::start() {
                            service::get_local_streaming_priority(),
                            nullptr,
                            ::mutation_reader::forwarding::no);
+                    auto close_sr = deferred_close(staging_sstable_reader);

                    inject_failure("view_update_generator_consume_staging_sstable");
                    auto result = staging_sstable_reader.consume_in_thread(view_updating_consumer(s, std::move(permit), *t, sstables, _as, staging_sstable_reader_handle),
                        dht::incremental_owned_ranges_checker::make_partition_filter(_db.get_keyspace_local_ranges(s->ks_name())));
-                    staging_sstable_reader.close().get();
                    if (result == stop_iteration::yes) {
                        break;
                    }
--- a/db/view/view_updating_consumer.hh
+++ b/db/view/view_updating_consumer.hh
@@ -33,8 +33,17 @@ public:
    // We prefer flushing on partition boundaries, so at the end of a partition,
    // we flush on reaching the soft limit. Otherwise we continue accumulating
    // data. We flush mid-partition if we reach the hard limit.
-    static const size_t buffer_size_soft_limit;
-    static const size_t buffer_size_hard_limit;
+    static constexpr size_t buffer_size_soft_limit_default = 1 * 1024 * 1024;
+    static constexpr size_t buffer_size_hard_limit_default = 2 * 1024 * 1024;
+private:
+    size_t _buffer_size_soft_limit = buffer_size_soft_limit_default;
+    size_t _buffer_size_hard_limit = buffer_size_hard_limit_default;
+public:
+    // Meant only for usage in tests.
+    void set_buffer_size_limit_for_testing_purposes(size_t sz) {
+        _buffer_size_soft_limit = sz;
+        _buffer_size_hard_limit = sz;
+    }

 private:
    schema_ptr _schema;
@@ -49,6 +58,7 @@ private:
 private:
    void do_flush_buffer();
    void flush_builder();
+    void end_builder();
    void maybe_flush_buffer_mid_partition();

 public:
@@ -71,7 +81,11 @@ public:

    void consume_new_partition(const dht::decorated_key& dk) {
        _mut_builder.emplace(_schema);
-        _mut_builder->consume_new_partition(dk);
+        // Further accounting is inaccurate as we base it on the consumed
+        // mutation-fragments, not on their final form in the mutation.
+        // This is good enough, as long as the difference is small and mostly
+        // constant (per fragment).
+        _buffer_size += _mut_builder->consume_new_partition(dk).memory_usage(*_schema);
    }

    void consume(tombstone t) {
@@ -113,8 +127,8 @@ public:
        if (_as->abort_requested()) {
            return stop_iteration::yes;
        }
-        flush_builder();
-        if (_buffer_size >= buffer_size_soft_limit) {
+        end_builder();
+        if (_buffer_size >= _buffer_size_soft_limit) {
            do_flush_buffer();
        }
        return stop_iteration::no;
--- a/direct_failure_detector/failure_detector.cc
+++ b/direct_failure_detector/failure_detector.cc
@@ -478,7 +478,15 @@ static future<bool> ping_with_timeout(pinger::endpoint_id id, clock::timepoint_t

    auto f = pinger.ping(id, timeout_as);
    auto sleep_and_abort = [] (clock::timepoint_t timeout, abort_source& timeout_as, clock& c) -> future<> {
-        co_await c.sleep_until(timeout, timeout_as);
+        co_await c.sleep_until(timeout, timeout_as).then_wrapped([&timeout_as] (auto&& f) {
+            // Avoid throwing if sleep was aborted.
+            if (f.failed() && timeout_as.abort_requested()) {
+                // Expected (if ping() resolved first or we were externally aborted).
+                f.ignore_ready_future();
+                return make_ready_future<>();
+            }
+            return f;
+        });
        if (!timeout_as.abort_requested()) {
            // We resolved before `f`. Abort the operation.
            timeout_as.request_abort();
@@ -501,8 +509,6 @@ static future<bool> ping_with_timeout(pinger::endpoint_id id, clock::timepoint_t
    // Wait on the sleep as well (it should return shortly, being aborted) so we don't discard the future.
    try {
        co_await std::move(sleep_and_abort);
-    } catch (const sleep_aborted&) {
-        // Expected (if `f` resolved first or we were externally aborted).
    } catch (...) {
        // There should be no other exceptions, but just in case... log it and discard,
        // we want to propagate exceptions from `f`, not from sleep.
--- a/dist/common/scripts/scylla_coredump_setup
+++ b/dist/common/scripts/scylla_coredump_setup
@@ -42,7 +42,8 @@ if __name__ == '__main__':
        if systemd_unit.available('systemd-coredump@.service'):
            dropin = '''
 [Service]
-TimeoutStartSec=infinity
+RuntimeMaxSec=infinity
+TimeoutSec=infinity
 '''[1:-1]
            os.makedirs('/etc/systemd/system/systemd-coredump@.service.d', exist_ok=True)
            with open('/etc/systemd/system/systemd-coredump@.service.d/timeout.conf', 'w') as f:
--- a/dist/common/scripts/scylla_fstrim_setup
+++ b/dist/common/scripts/scylla_fstrim_setup
@@ -16,7 +16,7 @@ if __name__ == '__main__':
    if os.getuid() > 0:
        print('Requires root permission.')
        sys.exit(1)
-    systemd_unit('scylla-fstrim.timer').unmask()
    systemd_unit('scylla-fstrim.timer').enable()
+    systemd_unit('scylla-fstrim.timer').start()
    if is_redhat_variant() or is_arch() or is_suse_variant():
        systemd_unit('fstrim.timer').disable()
--- a/dist/common/scripts/scylla_kernel_check
+++ b/dist/common/scripts/scylla_kernel_check
@@ -25,7 +25,7 @@ if __name__ == '__main__':
    run('dd if=/dev/zero of=/var/tmp/kernel-check.img bs=1M count=128', shell=True, check=True, stdout=DEVNULL, stderr=DEVNULL)
    run('mkfs.xfs /var/tmp/kernel-check.img', shell=True, check=True, stdout=DEVNULL, stderr=DEVNULL)
    run('mount /var/tmp/kernel-check.img /var/tmp/mnt -o loop', shell=True, check=True, stdout=DEVNULL, stderr=DEVNULL)
-    ret = run('iotune --fs-check --evaluation-directory /var/tmp/mnt', shell=True).returncode
+    ret = run('iotune --fs-check --evaluation-directory /var/tmp/mnt --default-log-level error', shell=True).returncode
    run('umount /var/tmp/mnt', shell=True, check=True)
    shutil.rmtree('/var/tmp/mnt')
    os.remove('/var/tmp/kernel-check.img')
--- a/dist/common/scripts/scylla_raid_setup
+++ b/dist/common/scripts/scylla_raid_setup
@@ -9,6 +9,7 @@

 import os
 import argparse
+import distutils.util
 import pwd
 import grp
 import sys
@@ -37,11 +38,14 @@ if __name__ == '__main__':
                        help='force constructing RAID when only one disk is specified')
    parser.add_argument('--raid-level', default='0',
                        help='specify RAID level')
-    parser.add_argument('--online-discard', default=True,
+    parser.add_argument('--online-discard', default="True",
                        help='Enable XFS online discard (trim SSD cells after file deletion)')

    args = parser.parse_args()

+    # Allow args.online_discard to be used as a boolean value
+    args.online_discard = distutils.util.strtobool(args.online_discard)
+
    root = args.root.rstrip('/')
    if args.volume_role == 'all':
        mount_at=root
@@ -125,9 +129,12 @@ if __name__ == '__main__':
                procs.append(proc)
    for proc in procs:
        proc.wait()
+    for disk in disks:
+        run(f'wipefs -a {disk}', shell=True, check=True)
    if raid:
        run('udevadm settle', shell=True, check=True)
        run('mdadm --create --verbose --force --run {raid} --level={level} -c1024 --raid-devices={nr_disk} {disks}'.format(raid=fsdev, level=args.raid_level, nr_disk=len(disks), disks=args.disks.replace(',', ' ')), shell=True, check=True)
+        run(f'wipefs -a {fsdev}', shell=True, check=True)
        run('udevadm settle', shell=True, check=True)

    major_minor = os.stat(fsdev).st_rdev
@@ -138,7 +145,7 @@ if __name__ == '__main__':
    # and it also cannot be smaller than the sector size.
    block_size = max(1024, sector_size)
    run('udevadm settle', shell=True, check=True)
-    run(f'mkfs.xfs -b size={block_size} {fsdev} -f -K', shell=True, check=True)
+    run(f'mkfs.xfs -b size={block_size} {fsdev} -K', shell=True, check=True)
    run('udevadm settle', shell=True, check=True)

    if is_debian_variant():
--- a/dist/redhat/scylla.spec
+++ b/dist/redhat/scylla.spec
@@ -7,7 +7,7 @@ Group:          Applications/Databases
 License:        AGPLv3
 URL:            http://www.scylladb.com/
 Source0:        %{reloc_pkg}
-Requires:       %{product}-server = %{version} %{product}-conf = %{version} %{product}-python3 = %{version} %{product}-kernel-conf = %{version} %{product}-jmx = %{version} %{product}-tools = %{version} %{product}-tools-core = %{version} %{product}-node-exporter = %{version}
+Requires:       %{product}-server = %{version}-%{release} %{product}-conf = %{version}-%{release} %{product}-python3 = %{version}-%{release} %{product}-kernel-conf = %{version}-%{release} %{product}-jmx = %{version}-%{release} %{product}-tools = %{version}-%{release} %{product}-tools-core = %{version}-%{release} %{product}-node-exporter = %{version}-%{release}
 Obsoletes:	scylla-server < 1.1

 %global _debugsource_template %{nil}
@@ -54,7 +54,7 @@ Group:          Applications/Databases
 Summary:        The Scylla database server
 License:        AGPLv3
 URL:            http://www.scylladb.com/
-Requires:       %{product}-conf  = %{version} %{product}-python3 = %{version}
+Requires:       %{product}-conf  = %{version}-%{release} %{product}-python3 = %{version}-%{release}
 Conflicts:      abrt
 AutoReqProv:    no

--- a/docs/_utils/redirects.yaml
+++ b/docs/_utils/redirects.yaml
@@ -1,6 +1,77 @@
 ### a dictionary of redirections
 #old path: new path

+# removing the Enterprise upgrade guides from the Open Source documentation
+
+/stable/upgrade/upgrade-enterprise/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-ubuntu.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-ubuntu.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-debian.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-image.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/upgrade-guide-from-2021.1-to-2022.1-image.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/metric-update-2021.1-to-2022.1.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.1-to-2022.1/metric-update-2021.1-to-2022.1.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-ubuntu-16-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-ubuntu-16-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-ubuntu-18-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-ubuntu-18-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/upgrade-guide-from-2020.1-to-2021.1-debian.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/metric-update-2020.1-to-2021.1.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.1-to-2021.1/metric-update-2020.1-to-2021.1.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-ubuntu-16-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-ubuntu-16-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-ubuntu-18-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-ubuntu-18-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/upgrade-guide-from-2019.1-to-2020.1-debian.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/metric-update-2019.1-to-2020.1.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.1-to-2020.1/metric-update-2019.1-to-2020.1.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/upgrade-guide-from-2018.1-to-2019.1-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/upgrade-guide-from-2018.1-to-2019.1-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/upgrade-guide-from-2018.1-to-2019.1-ubuntu-16-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/upgrade-guide-from-2018.1-to-2019.1-ubuntu-16-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/metric-update-2018.1-to-2019.1.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.1-to-2019.1/metric-update-2018.1-to-2019.1.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/upgrade-guide-from-2017.1-to-2018.1-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/upgrade-guide-from-2017.1-to-2018.1-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/upgrade-guide-from-2017.1-to-2018.1-ubuntu.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/upgrade-guide-from-2017.1-to-2018.1-ubuntu.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/upgrade-guide-from-2017.1-to-2018.1-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/upgrade-guide-from-2017.1-to-2018.1-debian.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/metric-update-2017.1-to-2018.1.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.1-to-2018.1/metric-update-2017.1-to-2018.1.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-ubuntu-14-to-16.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-ubuntu-14-to-16.html
+/stable/getting-started/install-scylla/unified-installer.html#unified-installed-upgrade: https://enterprise.docs.scylladb.com/stable/getting-started/install-scylla/unified-installer.html#unified-installed-upgrade
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-image.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-image.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-ubuntu-18-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-ubuntu-18-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-ubuntu-20-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-ubuntu-20-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-debian-10.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2022.x.y-to-2022.x.z/upgrade-guide-from-2022.x.y-to-2022.x.z-debian-10.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-ubuntu-16-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-ubuntu-16-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-ubuntu-18-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-ubuntu-18-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-ubuntu-20-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-ubuntu-20-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-debian-9.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-debian-9.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-debian-10.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2021.x.y-to-2021.x.z/upgrade-guide-from-2021.x.y-to-2021.x.z-debian-10.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-ubuntu-16-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-ubuntu-16-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-ubuntu-18-04.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-ubuntu-18-04.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-debian-9.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-debian-9.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-debian-10.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2020.x.y-to-2020.x.z/upgrade-guide-from-2020.x.y-to-2020.x.z-debian-10.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/upgrade-guide-from-2019.x.y-to-2019.x.z-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/upgrade-guide-from-2019.x.y-to-2019.x.z-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/upgrade-guide-from-2019.x.y-to-2019.x.z-ubuntu.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/upgrade-guide-from-2019.x.y-to-2019.x.z-ubuntu.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/upgrade-guide-from-2019.x.y-to-2019.x.z-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2019.x.y-to-2019.x.z/upgrade-guide-from-2019.x.y-to-2019.x.z-debian.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/upgrade-guide-from-2018.x.y-to-2018.x.z-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/upgrade-guide-from-2018.x.y-to-2018.x.z-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/upgrade-guide-from-2018.x.y-to-2018.x.z-ubuntu.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/upgrade-guide-from-2018.x.y-to-2018.x.z-ubuntu.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/upgrade-guide-from-2018.x.y-to-2018.x.z-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2018.x.y-to-2018.x.z/upgrade-guide-from-2018.x.y-to-2018.x.z-debian.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/index.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/index.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/upgrade-guide-from-2017.x.y-to-2017.x.z-rpm.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/upgrade-guide-from-2017.x.y-to-2017.x.z-rpm.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/upgrade-guide-from-2017.x.y-to-2017.x.z-ubuntu.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/upgrade-guide-from-2017.x.y-to-2017.x.z-ubuntu.html
+/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/upgrade-guide-from-2017.x.y-to-2017.x.z-debian.html: https://enterprise.docs.scylladb.com/stable/upgrade/upgrade-enterprise/upgrade-guide-from-2017.x.y-to-2017.x.z/upgrade-guide-from-2017.x.y-to-2017.x.z-debian.html
+
+# removing the Enterprise-only content from the Open Source documentation
+
+/stable/using-scylla/workload-prioritization: https://enterprise.docs.scylladb.com//stable/using-scylla/workload-prioritization.html
+/stable/operating-scylla/security/encryption-at-rest: https://enterprise.docs.scylladb.com/stable/operating-scylla/security/encryption-at-rest.html
+/stable/operating-scylla/security/ldap-authentication: https://enterprise.docs.scylladb.com/stable/operating-scylla/security/ldap-authentication.html
+/stable/operating-scylla/security/ldap-authorization: https://enterprise.docs.scylladb.com/stable/operating-scylla/security/ldap-authorization.html
+/stable/operating-scylla/security/auditing: https://enterprise.docs.scylladb.com/stable/operating-scylla/security/auditing.html
+
 # unifying the Ubunut upgrade guide for different Ubuntu versions: from 5.0 to 2022.1

 /stable/upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu-18-04.html: /stable/upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-ubuntu.html
@@ -1112,14 +1183,14 @@ tls-ssl/index.html: /stable/operating-scylla/security
 /using-scylla/integrations/integration_kairos/index.html: /stable/using-scylla/integrations/integration-kairos
 /upgrade/ami_upgrade/index.html: /stable/upgrade/ami-upgrade

-/scylla-cloud/cloud-setup/gcp-vpc-peering/index.html: /stable/scylla-cloud/cloud-setup/GCP/gcp-vpc-peering
-/scylla-cloud/cloud-setup/GCP/gcp-vcp-peering/index.html: /stable/scylla-cloud/cloud-setup/GCP/gcp-vpc-peering
+/scylla-cloud/cloud-setup/gcp-vpc-peering/index.html: https://cloud.docs.scylladb.com/stable/cloud-setup/gcp-vpc-peering.html
+/scylla-cloud/cloud-setup/GCP/gcp-vcp-peering/index.html: https://cloud.docs.scylladb.com/stable/cloud-setup/gcp-vpc-peering.html

 # move scylla cloud for AWS to dedicated directory
-/scylla-cloud/cloud-setup/aws-vpc-peering/index.html: /stable/scylla-cloud/cloud-setup/AWS/aws-vpc-peering
-/scylla-cloud/cloud-setup/cloud-prom-proxy/index.html: /stable/scylla-cloud/cloud-setup/AWS/cloud-prom-proxy
-/scylla-cloud/cloud-setup/outposts/index.html: /stable/scylla-cloud/cloud-setup/AWS/outposts
-/scylla-cloud/cloud-setup/scylla-cloud-byoa/index.html: /stable/scylla-cloud/cloud-setup/AWS/scylla-cloud-byoa
+/scylla-cloud/cloud-setup/aws-vpc-peering/index.html: https://cloud.docs.scylladb.com/stable/cloud-setup/aws-vpc-peering.html
+/scylla-cloud/cloud-setup/cloud-prom-proxy/index.html: https://cloud.docs.scylladb.com/stable/monitoring/cloud-prom-proxy.html
+/scylla-cloud/cloud-setup/outposts/index.html: https://cloud.docs.scylladb.com/stable/cloud-setup/outposts.html
+/scylla-cloud/cloud-setup/scylla-cloud-byoa/index.html: https://cloud.docs.scylladb.com/stable/cloud-setup/scylla-cloud-byoa.html
 /scylla-cloud/cloud-services/scylla_cloud_costs/index.html: /stable/scylla-cloud/cloud-services/scylla-cloud-costs
 /scylla-cloud/cloud-services/scylla_cloud_managin_versions/index.html: /stable/scylla-cloud/cloud-services/scylla-cloud-managin-versions
 /scylla-cloud/cloud-services/scylla_cloud_support_alerts_sla/index.html: /stable/scylla-cloud/cloud-services/scylla-cloud-support-alerts-sla
--- a/docs/alternator/compatibility.md
+++ b/docs/alternator/compatibility.md
@@ -161,6 +161,10 @@ events appear in the Streams API as normal deletions - without the
 distinctive marker on deletions which are really expirations.
 See <https://github.com/scylladb/scylla/issues/5060>.

+<!--- REMOVE IN FUTURE VERSIONS - Remove the note below in version 5.3/2023.1 -->
+
+> **Note** This feature is experimental in versions earlier than ScyllaDB Open Source 5.2 and ScyllaDB Enterprise 2022.2.
+
 ---


--- a/docs/architecture/raft.rst
+++ b/docs/architecture/raft.rst
@@ -5,7 +5,7 @@ Raft Consensus Algorithm in ScyllaDB
 Introduction
 --------------
 ScyllaDB was originally designed, following Apache Cassandra, to use gossip for topology and schema updates and the Paxos consensus algorithm for
-strong data consistency (:doc:`LWT </using-scylla/lwt>`). To achieve stronger consistency without performance penalty, ScyllaDB 5.x has turned to Raft - a consensus algorithm designed as an alternative to both gossip and Paxos.
+strong data consistency (:doc:`LWT </using-scylla/lwt>`). To achieve stronger consistency without performance penalty, ScyllaDB has turned to Raft - a consensus algorithm designed as an alternative to both gossip and Paxos.

 Raft is a consensus algorithm that implements a distributed, consistent, replicated log across members (nodes). Raft implements consensus by first electing a distinguished leader, then giving the leader complete responsibility for managing the replicated log. The leader accepts log entries from clients, replicates them on other servers, and tells servers when it is safe to apply log entries to their state machines.

@@ -13,9 +13,9 @@ Raft uses a heartbeat mechanism to trigger a leader election. All servers start

 Leader selection is described in detail in the `Raft paper <https://raft.github.io/raft.pdf>`_.

-ScyllaDB 5.x may use Raft to maintain schema updates in every node (see below). Any schema update, like ALTER, CREATE or DROP TABLE, is first committed as an entry in the replicated Raft log, and, once stored on most replicas, applied to all nodes **in the same order**, even in the face of a node or network failures.
+ScyllaDB can use Raft to maintain schema updates in every node (see below). Any schema update, like ALTER, CREATE or DROP TABLE, is first committed as an entry in the replicated Raft log, and, once stored on most replicas, applied to all nodes **in the same order**, even in the face of a node or network failures.

-Following ScyllaDB 5.x releases will use Raft to guarantee consistent topology updates similarly.
+Upcoming ScyllaDB releases will use Raft to guarantee consistent topology updates similarly.

 .. _raft-quorum-requirement:

@@ -26,90 +26,55 @@ Raft requires at least a quorum of nodes in a cluster to be available. If multip
 and the quorum is lost, the cluster is unavailable for schema updates. See :ref:`Handling Failures <raft-handling-failures>`
 for information on how to handle failures.

-
-Upgrade Considerations for ScyllaDB 5.0 and Later
-==================================================
-
 Note that when you have a two-DC cluster with the same number of nodes in each DC, the cluster will lose the quorum if one
 of the DCs is down.
 **We recommend configuring three DCs per cluster to ensure that the cluster remains available and operational when one DC is down.**

+.. _enabling-raft-existing-cluster:
+
 Enabling Raft
 ---------------

-Enabling Raft in ScyllaDB 5.0 and 5.1
-=====================================
-
-.. warning::
-  In ScyllaDB 5.0 and 5.1, Raft is an experimental feature.
-
-It is not possible to enable Raft in an existing cluster in ScyllaDB 5.0 and 5.1.
-In order to have a Raft-enabled cluster in these versions, you must create a new cluster with Raft enabled from the start.
-
-.. warning::
-
-   **Do not** use Raft in production clusters in ScyllaDB 5.0 and 5.1. Such clusters won't be able to correctly upgrade to ScyllaDB 5.2.
-
-   Use Raft only for testing and experimentation in clusters which can be thrown away.
-
-.. warning::
-    Once enabled, Raft cannot be disabled on your cluster. The cluster nodes will fail to restart if you remove the Raft feature.
-
-When creating a new cluster, add ``raft`` to the list of experimental features in your ``scylla.yaml`` file:
-
-.. code-block:: yaml
-
-    experimental_features:
-     - raft
-
-.. _enabling-raft-existing-cluster:
-
-Enabling Raft in ScyllaDB 5.2 and further
-=========================================
-
-.. TODO include enterprise versions in this documentation
-
 .. note::
-  In ScyllaDB 5.2, Raft is Generally Available and can be safely used for consistent schema management.
-  In ScyllaDB 5.3 it will become enabled by default.
-  In further versions it will be mandatory.
+  In ScyllaDB 5.2 and ScyllaDB Enterprise 2023.1 Raft is Generally Available and can be safely used for consistent schema management.
+  In further versions, it will be mandatory.

-ScyllaDB 5.2 and later comes equipped with a procedure that can setup Raft-based consistent cluster management in an existing cluster. We refer to this as the **internal Raft upgrade procedure** (do not confuse with the :doc:`ScyllaDB version upgrade procedure </upgrade/upgrade-opensource/upgrade-guide-from-5.1-to-5.2/upgrade-guide-from-5.1-to-5.2-generic>`).
+ScyllaDB Open Source 5.2 and later, and ScyllaDB Enterprise 2023.1 and later come equipped with a procedure that can setup Raft-based consistent cluster management in an existing cluster. We refer to this as the **Raft upgrade procedure** (do not confuse with the :doc:`ScyllaDB version upgrade procedure </upgrade/index/>`).

 .. warning::
    Once enabled, Raft cannot be disabled on your cluster. The cluster nodes will fail to restart if you remove the Raft feature.

-To enable Raft in an existing cluster in Scylla 5.2 and beyond:
+To enable Raft in an existing cluster, you need to enable the ``consistent_cluster_management`` option in the ``scylla.yaml`` file 
+for **each node** in the cluster: 

-* ensure that the schema is synchronized in the cluster by executing :doc:`nodetool describecluster </operating-scylla/nodetool-commands/describecluster>` on each node and ensuring that the schema version is the same on all nodes,
-* then perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>`, updating the ``scylla.yaml`` file for **each node** in the cluster before restarting it to enable the ``consistent_cluster_management`` flag:
+#. Ensure that the schema is synchronized in the cluster by executing :doc:`nodetool describecluster </operating-scylla/nodetool-commands/describecluster>` on each node and ensuring that the schema version is the same on all nodes.
+#. Perform a :doc:`rolling restart </operating-scylla/procedures/config-change/rolling-restart/>`, updating the ``scylla.yaml`` file for **each node** in the cluster before restarting it to enable the ``consistent_cluster_management`` option:

-.. code-block:: yaml
+    .. code-block:: yaml

-   consistent_cluster_management: true
+       consistent_cluster_management: true

-When all the nodes in the cluster and updated and restarted, the cluster will start the **internal Raft upgrade procedure**.
-**You must then verify** that the internal Raft upgrade procedure has finished successfully. Refer to the :ref:`next section <verify-raft-procedure>`.
+When all the nodes in the cluster and updated and restarted, the cluster will start the **Raft upgrade procedure**.
+**You must then verify** that the Raft upgrade procedure has finished successfully. Refer to the :ref:`next section <verify-raft-procedure>`.

-You can also enable the ``consistent_cluster_management`` flag while performing :doc:`rolling upgrade from 5.1 to 5.2 </upgrade/upgrade-opensource/upgrade-guide-from-5.1-to-5.2/upgrade-guide-from-5.1-to-5.2-generic>`: update ``scylla.yaml`` before restarting each node. The internal Raft upgrade procedure will start as soon as the last node was upgraded and restarted. As above, this requires :ref:`verifying <verify-raft-procedure>` that this internal procedure successfully finishes.
+Alternatively, you can enable the ``consistent_cluster_management`` option when you are:

-Finally, you can enable the ``consistent_cluster_management`` flag when creating a new cluster. This does not use the internal Raft upgrade procedure; instead, Raft is functioning in the cluster and managing schema right from the start.
+* Performing a rolling upgrade from version 5.1 to 5.2 or version 2022.x to 2023.1 by updating ``scylla.yaml`` before restarting each node. The Raft upgrade procedure will start as soon as the last node was upgraded and restarted. As above, this requires :ref:`verifying <verify-raft-procedure>` that the procedure successfully finishes.
+* Creating a new cluster. This does not use the Raft upgrade procedure; instead, Raft is functioning in the cluster and managing schema right from the start.

 Until all nodes are restarted with ``consistent_cluster_management: true``, it is still possible to turn this option back off. Once enabled on every node, it must remain turned on (or the node will refuse to restart).

 .. _verify-raft-procedure:

-Verifying that the internal Raft upgrade procedure finished successfully
+Verifying that the Raft upgrade procedure finished successfully
 ========================================================================

-.. versionadded:: 5.2
-
-The internal Raft upgrade procedure starts as soon as every node in the cluster restarts with ``consistent_cluster_management`` flag enabled in ``scylla.yaml``.
+The Raft upgrade procedure starts as soon as every node in the cluster restarts with ``consistent_cluster_management`` flag enabled in ``scylla.yaml``.

 .. TODO: update the above sentence once 5.3 and later are released.

 The procedure requires **full cluster availability** to correctly setup the Raft algorithm; after the setup finishes, Raft can proceed with only a majority of nodes, but this initial setup is an exception.
-An unlucky event, such as a hardware failure, may cause one of your nodes to fail. If this happens before the internal Raft upgrade procedure finishes, the procedure will get stuck and your intervention will be required.
+An unlucky event, such as a hardware failure, may cause one of your nodes to fail. If this happens before the Raft upgrade procedure finishes, the procedure will get stuck and your intervention will be required.

 To verify that the procedure finishes, look at the log of every Scylla node (using ``journalctl _COMM=scylla``). Search for the following patterns:

@@ -204,8 +169,6 @@ If some nodes are **dead and irrecoverable**, you'll need to perform a manual re
 Verifying that Raft is enabled
 ===============================

-.. versionadded:: 5.2
-
 You can verify that Raft is enabled on your cluster by performing the following query on each node:

 .. code-block:: sql
@@ -224,7 +187,7 @@ The query should return:

 on every node.

-If the query returns 0 rows, or ``value`` is ``synchronize`` or ``use_pre_raft_procedures``, it means that the cluster is in the middle of the internal Raft upgrade procedure; consult the :ref:`relevant section <verify-raft-procedure>`.
+If the query returns 0 rows, or ``value`` is ``synchronize`` or ``use_pre_raft_procedures``, it means that the cluster is in the middle of the Raft upgrade procedure; consult the :ref:`relevant section <verify-raft-procedure>`.

 If ``value`` is ``recovery``, it means that the cluster is in the middle of the manual recovery procedure. The procedure must be finished. Consult :ref:`the section about Raft recovery <recover-raft-procedure>`.

@@ -276,12 +239,8 @@ Examples
     - Schema updates are possible and safe.
     - Try restarting the node. If the node is dead, :doc:`replace it with a new node </operating-scylla/procedures/cluster-management/replace-dead-node/>`.
   * - 2 nodes
-     - Cluster is not fully operational. The data is available for reads and writes, but schema changes are impossible.
+     - Data is available for reads and writes, schema changes are impossible.
     - Restart at least 1 of the 2 nodes that are down to regain quorum. If you can’t recover at least 1 of the 2 nodes, consult the :ref:`manual Raft recovery section <recover-raft-procedure>`.
-   * - 1 datacenter
-     - Cluster is not fully operational. The data is available for reads and writes, but schema changes are impossible.
-     - When the DC comes back online, restart the nodes. If the DC does not come back online and nodes are lost, consult the :ref:`manual Raft recovery section <recover-raft-procedure>`.
-

 .. list-table:: Cluster B: 2 datacenters, 6  nodes (3 nodes per DC)
   :widths: 20 40 40
@@ -294,10 +253,10 @@ Examples
     - Schema updates are possible and safe.
     - Try restarting the node(s). If the node is dead, :doc:`replace it with a new node </operating-scylla/procedures/cluster-management/replace-dead-node/>`.
   * - 3 nodes
-     - Cluster is not fully operational. The data is available for reads and writes, but schema changes are impossible.
+     - Data is available for reads and writes, schema changes are impossible.
     - Restart 1 of the 3 nodes that are down to regain quorum. If you can’t recover at least 1 of the 3 failed nodes, consult the :ref:`manual Raft recovery section <recover-raft-procedure>`.
   * - 1DC
-     - Cluster is not fully operational. The data is available for reads and writes, but schema changes are impossible.
+     - Data is available for reads and writes, schema changes are impossible.
     - When the DCs come back online, restart the nodes. If the DC fails to come back online and the nodes are lost, consult the :ref:`manual Raft recovery section <recover-raft-procedure>`.


@@ -315,7 +274,7 @@ Examples
     - Schema updates are possible and safe.
     - When the DC comes back online, try restarting the nodes in the cluster. If the nodes are dead, :doc:`add 3 new nodes in a new region </operating-scylla/procedures/cluster-management/add-dc-to-existing-dc/>`.
   * - 2 DCs
-     - Cluster is not fully operational. The data is available for reads and writes, but schema changes are impossible.
+     - Data is available for reads and writes, schema changes are impossible.
     - When the DCs come back online, restart the nodes. If at least one DC fails to come back online and the nodes are lost, consult the :ref:`manual Raft recovery section <recover-raft-procedure>`.

 .. _recover-raft-procedure:
@@ -323,26 +282,24 @@ Examples
 Raft manual recovery procedure
 ==============================

-.. versionadded:: 5.2
-
 The manual Raft recovery procedure applies to the following situations:

-* :ref:`The internal Raft upgrade procedure <verify-raft-procedure>` got stuck because one of your nodes failed in the middle of the procedure and is irrecoverable,
+* :ref:`The Raft upgrade procedure <verify-raft-procedure>` got stuck because one of your nodes failed in the middle of the procedure and is irrecoverable,
 * or the cluster was running Raft but a majority of nodes (e.g. 2 our of 3) failed and are irrecoverable. Raft cannot progress unless a majority of nodes is available.

 .. warning::

   Perform the manual recovery procedure **only** if you're dealing with **irrecoverable** nodes. If it is possible to restart your nodes, do that instead of manual recovery.

-.. warning::
+.. note::

   Before proceeding, make sure that the irrecoverable nodes are truly dead, and not, for example, temporarily partitioned away due to a network failure. If it is possible for the 'dead' nodes to come back to life, they might communicate and interfere with the recovery procedure and cause unpredictable problems.

   If you have no means of ensuring that these irrecoverable nodes won't come back to life and communicate with the rest of the cluster, setup firewall rules or otherwise isolate your alive nodes to reject any communication attempts from these dead nodes.

-During the manual recovery procedure you'll enter a special ``RECOVERY`` mode, remove all faulty nodes (using the standard :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`), delete the internal Raft data, and restart the cluster. This will cause the cluster to perform the internal Raft upgrade procedure again, initializing the Raft algorithm from scratch. The manual recovery procedure is applicable both to clusters which were not running Raft in the past and then had Raft enabled, and to clusters which were bootstrapped using Raft.
+During the manual recovery procedure you'll enter a special ``RECOVERY`` mode, remove all faulty nodes (using the standard :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`), delete the internal Raft data, and restart the cluster. This will cause the cluster to perform the Raft upgrade procedure again, initializing the Raft algorithm from scratch. The manual recovery procedure is applicable both to clusters which were not running Raft in the past and then had Raft enabled, and to clusters which were bootstrapped using Raft.

-.. warning::
+.. note::

   Entering ``RECOVERY`` mode requires a node restart. Restarting an additional node while some nodes are already dead may lead to unavailability of data queries (assuming that you haven't lost it already). For example, if you're using the standard RF=3, CL=QUORUM setup, and you're recovering from a stuck of upgrade procedure because one of your nodes is dead, restarting another node will cause temporary data query unavailability (until the node finishes restarting). Prepare your service for downtime before proceeding.

@@ -393,4 +350,3 @@ Learn More About Raft
 * `Making Schema Changes Safe with Raft <https://www.scylladb.com/presentations/making-schema-changes-safe-with-raft/>`_ - A Scylla Summit talk by Konstantin Osipov (register for access)
 * `The Future of Consensus in ScyllaDB 5.0 and Beyond <https://www.scylladb.com/presentations/the-future-of-consensus-in-scylladb-5-0-and-beyond/>`_ - A Scylla Summit talk by Tomasz Grabiec (register for access)

-
--- a/docs/architecture/ringarchitecture/index.rst
+++ b/docs/architecture/ringarchitecture/index.rst
@@ -70,9 +70,7 @@ You can use the ``nodetool`` command to describe different aspects of your nodes

 ``$ nodetool ring <keyspace>``

-Outputs all tokens of a node, and displays the token ring information_.  It produces output as follows for a single datacenter:
-
-.. _information: /operating-scylla/nodetool-commands/ring/
+Outputs all tokens of a node, and displays the :doc:`token ring information </operating-scylla/nodetool-commands/ring>`.  It produces output as follows for a single datacenter:

 .. code-block:: shell

--- a/docs/cql/ddl.rst
+++ b/docs/cql/ddl.rst
@@ -746,9 +746,7 @@ CDC options

 .. versionadded:: 3.2 Scylla Open Source

-The following options are to be used with Change Data Capture. Available as an experimental feature from Scylla Open Source 3.2. 
-To use this feature, you must enable the :ref:`experimental tag <yaml_enabling_experimental_features>` in the scylla.yaml.
-
+The following options can be used with Change Data Capture.

 +---------------------------+-----------------+------------------------------------------------------------------------------------------------------------------------+
 | option                    |  default        | description                                                                                                            |
@@ -823,7 +821,8 @@ The ``tombstone_gc`` option allows you to prevent data resurrection. With the ``
 are only removed after :term:`repair` is performed. Unlike  ``gc_grace_seconds``, ``tombstone_gc`` has no time constraints - when 
 the ``repair`` mode is on, tombstones garbage collection will wait until repair is run. 

-The ``tombstone_gc`` option can be enabled using ``ALTER TABLE`` and ``CREATE TABLE``. For example:
+You can enable the after-repair tombstone GC by setting the ``repair`` mode using 
+``ALTER TABLE`` or ``CREATE TABLE``. For example:

 .. code-block:: cql

@@ -833,10 +832,6 @@ The ``tombstone_gc`` option can be enabled using ``ALTER TABLE`` and ``CREATE TA

    ALTER TABLE ks.cf WITH tombstone_gc = {'mode':'repair'} ;

-.. note::
-  The ``tombstone_gc`` option was added in ScyllaDB 5.0 as an experimental feature, and it is disabled by default. 
-  You need to explicitly specify the ``repair`` mode table property to enable the feature. 
-
 The following modes are available:

 .. list-table::
@@ -846,7 +841,7 @@ The following modes are available:
   * - Mode
     - Description
   * - ``timeout``
-     - Tombstone GC is performed after the wait time specified with ``gc_grace_seconds``. Default in ScyllaDB 5.0.
+     - Tombstone GC is performed after the wait time specified with ``gc_grace_seconds`` (default).
   * - ``repair``
     - Tombstone GC is performed after repair is run.
   * - ``disabled``
--- a/docs/cql/dml.rst
+++ b/docs/cql/dml.rst
@@ -609,7 +609,7 @@ of eventual consistency on an event of a timestamp collision:

 ``INSERT`` statements happening concurrently at different cluster
 nodes proceed without coordination. Eventually cell values
-supplied by a statement with the highest timestamp will prevail.
+supplied by a statement with the highest timestamp will prevail (see :ref:`update ordering <update-ordering>`).

 Unless a timestamp is provided by the client, Scylla will automatically
 generate a timestamp with microsecond precision for each
@@ -618,7 +618,7 @@ by the same node are unique. Timestamps assigned at different
 nodes are not guaranteed to be globally unique.
 With a steadily high write rate timestamp collision
 is not unlikely. If it happens, i.e. two ``INSERTS`` have the same
-timestamp, the lexicographically bigger value prevails:
+timestamp, a conflict resolution algorithm determines which of the inserted cells prevails (see :ref:`update ordering <update-ordering>`).

 Please refer to the :ref:`UPDATE <update-parameters>` section for more information on the :token:`update_parameter`.

@@ -726,8 +726,8 @@ Similarly to ``INSERT``, ``UPDATE`` statement happening concurrently at differen
 cluster nodes proceed without coordination. Cell values
 supplied by a statement with the highest timestamp will prevail.
 If two ``UPDATE`` statements or ``UPDATE`` and ``INSERT``
-statements have the same timestamp,
-lexicographically bigger value prevails.
+statements have the same timestamp, a conflict resolution algorithm determines which cells prevails
+(see :ref:`update ordering <update-ordering>`).

 Regarding the :token:`assignment`:

@@ -768,7 +768,7 @@ parameters:
  Scylla ensures that query timestamps created by the same coordinator node are unique (even across different shards
  on the same node). However, timestamps assigned at different nodes are not guaranteed to be globally unique.
  Note that with a steadily high write rate, timestamp collision is not unlikely. If it happens, e.g. two INSERTS
-  have the same timestamp, conflicting cell values are compared and the cells with the lexicographically bigger value prevail.
+  have the same timestamp, a conflict resolution algorithm determines which of the inserted cells prevails (see :ref:`update ordering <update-ordering>` for more information):
 - ``TTL``: specifies an optional Time To Live (in seconds) for the inserted values. If set, the inserted values are
  automatically removed from the database after the specified time. Note that the TTL concerns the inserted values, not
  the columns themselves. This means that any subsequent update of the column will also reset the TTL (to whatever TTL
@@ -778,6 +778,55 @@ parameters:
 - ``TIMEOUT``: specifies a timeout duration for the specific request.
  Please refer to the :ref:`SELECT <using-timeout>` section for more information.

+.. _update-ordering:
+
+Update ordering
+~~~~~~~~~~~~~~~
+
+:ref:`INSERT <insert-statement>`, :ref:`UPDATE <update-statement>`, and :ref:`DELETE <delete_statement>`
+operations are ordered by their ``TIMESTAMP``.
+
+Ordering of such changes is done at the cell level, where each cell carries a write ``TIMESTAMP``,
+other attributes related to its expiration when it has a non-zero time-to-live (``TTL``),
+and the cell value.
+
+The fundamental rule for ordering cells that insert, update, or delete data in a given row and column
+is that the cell with the highest timestamp wins.
+
+However, it is possible that multiple such cells will carry the same ``TIMESTAMP``.
+There could be several reasons for ``TIMESTAMP`` collision:
+
+* Benign collision can be caused by "replay" of a mutation, e.g., due to client retry, or due to internal processes.
+  In such cases, the cells are equivalent, and any of them can be selected arbitrarily.
+* ``TIMESTAMP`` collisions might be normally caused by parallel queries that are served
+  by different coordinator nodes. The coordinators might calculate the same write ``TIMESTAMP``
+  based on their local time in microseconds.
+* Collisions might also happen with user-provided timestamps if the application does not guarantee
+  unique timestamps with the ``USING TIMESTAMP`` parameter (see :ref:`Update parameters <update-parameters>` for more information).
+
+As said above, in the replay case, ordering of cells should not matter, as they carry the same value
+and same expiration attributes, so picking any of them will reach the same result.
+However, other ``TIMESTAMP`` conflicts must be resolved in a consistent way by all nodes.
+Otherwise, if nodes would have picked an arbitrary cell in case of a conflict and they would
+reach different results, reading from different replicas would detect the inconsistency and trigger
+read-repair that will generate yet another cell that would still conflict with the existing cells,
+with no guarantee for convergence.
+
+Therefore, Scylla implements an internal, consistent conflict-resolution algorithm
+that orders cells with conflicting ``TIMESTAMP`` values based on other properties, like:
+
+* whether the cell is a tombstone or a live cell,
+* whether the cell has an expiration time,
+* the cell ``TTL``,
+* and finally, what value the cell carries.
+
+The conflict-resolution algorithm is documented in `Scylla's internal documentation <https://github.com/scylladb/scylladb/blob/master/docs/dev/timestamp-conflict-resolution.md>`_
+and it may be subject to change.
+
+Reliable serialization can be achieved using unique write ``TIMESTAMP``
+and by using :doc:`Lightweight Transactions (LWT) </using-scylla/lwt>` to ensure atomicity of
+:ref:`INSERT <insert-statement>`, :ref:`UPDATE <update-statement>`, and :ref:`DELETE <delete_statement>`.
+
 .. _delete_statement:

 DELETE
@@ -817,7 +866,7 @@ For more information on the :token:`update_parameter` refer to the :ref:`UPDATE
 In a ``DELETE`` statement, all deletions within the same partition key are applied atomically,
 meaning either all columns mentioned in the statement are deleted or none.
 If ``DELETE`` statement has the same timestamp as ``INSERT`` or
-``UPDATE`` of the same primary key, delete operation prevails.
+``UPDATE`` of the same primary key, delete operation prevails (see :ref:`update ordering <update-ordering>`).

 A ``DELETE`` operation can be conditional through the use of an ``IF`` clause, similar to ``UPDATE`` and ``INSERT``
 statements. Each such ``DELETE`` gets a globally unique timestamp.
--- a/docs/dev/timestamp-conflict-resolution.md
+++ b/docs/dev/timestamp-conflict-resolution.md
@@ -0,0 +1,37 @@
+# Timestamp conflict resolution
+
+The fundamental rule for ordering cells that insert, update, or delete data in a given row and column
+is that the cell with the highest timestamp wins.
+
+However, it is possible that multiple such cells will carry the same `TIMESTAMP`.
+In this case, conflicts must be resolved in a consistent way by all nodes.
+Otherwise, if nodes would have picked an arbitrary cell in case of a conflict and they would
+reach different results, reading from different replicas would detect the inconsistency and trigger
+read-repair that will generate yet another cell that would still conflict with the existing cells,
+with no guarantee for convergence.
+
+The first tie-breaking rule when two cells have the same write timestamp is that
+dead cells win over live cells; and if both cells are deleted, the one with the later deletion time prevails.
+
+If both cells are alive, their expiration time is examined.
+Cells that are written with a non-zero TTL (either implicit, as determined by
+the table's default TTL, or explicit, `USING TTL`) are due to expire
+TTL seconds after the time they were written (as determined by the coordinator,
+and rounded to 1 second resolution). That time is the cell's expiration time.
+When cells expire, they become tombstones, shadowing any data written with a write timestamp
+less than or equal to the timestamp of the expiring cell.
+Therefore, cells that have an expiration time win over cells with no expiration time.
+
+If both cells have an expiration time, the one with the latest expiration time wins;
+and if they have the same expiration time (in whole second resolution),
+their write time is derived from the expiration time less the original time-to-live value
+and the one that was written at a later time prevails.
+
+Finally, if both cells are live and have no expiration, or have the same expiration time and time-to-live,
+the cell with the lexicographically bigger value prevails.
+
+Note that when multiple columns are INSERTed or UPDATEed using the same timestamp,
+SELECTing those columns might return a result that mixes cells from either upsert.
+This may happen when both upserts have no expiration time, or both their expiration time and TTL are the
+same, respectively (in whole second resolution). In such a case, cell selection would be based on the cell values
+in each column, independently of each other.
--- a/docs/getting-started/index.rst
+++ b/docs/getting-started/index.rst
@@ -25,7 +25,7 @@ Getting Started
  :id: "getting-started"
  :class: my-panel

-  * `Install ScyllaDB (Binary Packages, Docker, or EC2) <https://www.scylladb.com/download/>`_ - Links to the ScyllaDB Download Center
+  * `Install ScyllaDB (Binary Packages, Docker, or EC2) <https://www.scylladb.com/download/#core>`_ - Links to the ScyllaDB Download Center
  
  * :doc:`Configure ScyllaDB </getting-started/system-configuration/>`
  * :doc:`Run ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
--- a/docs/getting-started/install-scylla/index.rst
+++ b/docs/getting-started/install-scylla/index.rst
@@ -20,7 +20,7 @@ Install ScyllaDB

  Keep your versions up-to-date. The two latest versions are supported. Also always install the latest patches for your version. 

-  * Download and install ScyllaDB Server, Drivers and Tools in `Scylla Download Center <https://www.scylladb.com/download/#server/>`_
+  * Download and install ScyllaDB Server, Drivers and Tools in `ScyllaDB Download Center <https://www.scylladb.com/download/#core>`_
  * :doc:`ScyllaDB Web Installer for Linux <scylla-web-installer>`
  * :doc:`ScyllaDB Unified Installer (relocatable executable) <unified-installer>`
  * :doc:`Air-gapped Server Installation <air-gapped-install>`
--- a/docs/getting-started/install-scylla/scylla-web-installer.rst
+++ b/docs/getting-started/install-scylla/scylla-web-installer.rst
@@ -4,7 +4,7 @@ ScyllaDB Web Installer for Linux

 ScyllaDB Web Installer is a platform-agnostic installation script you can run with ``curl`` to install ScyllaDB on Linux.

-See `ScyllaDB Download Center <https://www.scylladb.com/download/#server>`_ for information on manually installing ScyllaDB with platform-specific installation packages.
+See `ScyllaDB Download Center <https://www.scylladb.com/download/#core>`_ for information on manually installing ScyllaDB with platform-specific installation packages.

 Prerequisites
 --------------
--- a/docs/getting-started/os-support.rst
+++ b/docs/getting-started/os-support.rst
@@ -25,11 +25,7 @@ ScyllaDB Open Source

 .. note:: 

-    Recommended OS and ScyllaDB AMI/Image OS for ScyllaDB Open Source:
-
-       - Ubuntu 20.04 for versions 4.6 and later.
-       - CentOS 7 for versions earlier than 4.6.
-
+    The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.

 +----------------------------+----------------------------------+-----------------------------+---------+-------+
 | Linux Distributions        |       Ubuntu                     |    Debian                   | CentOS /| Rocky/|
@@ -37,6 +33,8 @@ ScyllaDB Open Source
 +----------------------------+------+------+------+------+------+------+------+-------+-------+---------+-------+
 | ScyllaDB Version / Version | 14.04| 16.04| 18.04|20.04 |22.04 | 8    | 9    |  10   |  11   | 7       |   8   |
 +============================+======+======+======+======+======+======+======+=======+=======+=========+=======+
+|   5.2                      | |x|  | |x|  | |x|  | |v|  | |v|  | |x|  | |x|  | |v|   | |v|   | |v|     | |v|   |
+----------------------------+------+------+------+------+------+------+------+-------+-------+---------+-------+
 |   5.1                      | |x|  | |x|  | |v|  | |v|  | |v|  | |x|  | |x|  | |v|   | |v|   | |v|     | |v|   |
 +----------------------------+------+------+------+------+------+------+------+-------+-------+---------+-------+
 |   5.0                      | |x|  | |x|  | |v|  | |v|  | |v|  | |x|  | |x|  | |v|   | |v|   | |v|     | |v|   |
@@ -63,17 +61,18 @@ ScyllaDB Open Source
 +----------------------------+------+------+------+------+------+------+------+-------+-------+---------+-------+


-All releases are available as a Docker container, EC2 AMI, and a GCP image (GCP image from version 4.3).
+All releases are available as a Docker container, EC2 AMI, and a GCP image (GCP image from version 4.3). Since 
+version 5.2, the ScyllaDB AMI/Image OS for ScyllaDB Open Source is based on Ubuntu 22.04.
+


 ScyllaDB Enterprise
 --------------------

 .. note:: 
-   Recommended OS and ScyllaDB AMI/Image OS for ScyllaDB Enterprise:

-    - Ubuntu 20.04 for versions 2021.1 and later.
-    - CentOS 7 for versions earlier than 2021.1.
+    The recommended OS for ScyllaDB Enterprise is Ubuntu 22.04.
+

 +----------------------------+-----------------------------------+---------------------------+--------+-------+
 | Linux Distributions        |  Ubuntu                           | Debian                    | CentOS/| Rocky/|
@@ -81,11 +80,13 @@ ScyllaDB Enterprise
 +----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
 | ScyllaDB Version / Version | 14.04| 16.04| 18.04| 20.04| 22.04 | 8    | 9    | 10   | 11   |  7     | 8     |
 +============================+======+======+======+======+=======+======+======+======+======+========+=======+
+|   2023.1                   | |x|  | |x|  | |x|  | |v|  | |v|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
 |   2022.2                   | |x|  | |x|  | |v|  | |v|  | |v|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
 +----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2022.1                   | |x|  | |x|  | |v|  | |v|  | |x|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
+|   2022.1                   | |x|  | |x|  | |v|  | |v|  | |v|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
 +----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2021.1                   | |x|  | |v|  | |v|  | |v|  | |x|   | |x|  | |v|  | |v|  | |x|  | |v|    | |v|   |
+|   2021.1                   | |x|  | |v|  | |v|  | |v|  | |v|   | |x|  | |v|  | |v|  | |x|  | |v|    | |v|   |
 +----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
 |   2020.1                   | |x|  | |v|  | |v|  |  |x| | |x|   | |x|  | |v|  | |v|  | |x|  | |v|    | |v|   |
 +----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
@@ -95,4 +96,5 @@ ScyllaDB Enterprise
 +----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+


-All releases are available as a Docker container, EC2 AMI, and a GCP image (GCP image from version 2021.1).
+All releases are available as a Docker container, EC2 AMI, and a GCP image (GCP image from version 2021.1). Since 
+version 2023.1, the ScyllaDB AMI/Image OS for ScyllaDB Enterprise is based on Ubuntu 22.04.
--- a/docs/getting-started/system-requirements.rst
+++ b/docs/getting-started/system-requirements.rst
@@ -325,7 +325,7 @@ Storage: each instance can support  maximum of 24 local SSD of 375 GB partitions
 Microsoft Azure
 ---------------

-The `Lsv2-series <https://azure.microsoft.com/en-us/blog/announcing-the-general-availability-of-lsv2-series-azure-virtual-machines/>`_  features high throughput, low latency, and directly mapped local NVMe storage. The Lsv2 VMs run on the AMD EPYCTM 7551 processor with an all-core boost of 2.55GHz.
+The `Lsv3-series <https://learn.microsoft.com/en-us/azure/virtual-machines/lsv3-series/>`_  of Azure Virtual Machines (Azure VMs) features high-throughput, low latency, directly mapped local NVMe storage. These VMs run on the 3rd Generation Intel® Xeon® Platinum 8370C (Ice Lake) processor in a hyper-threaded configuration.


 .. list-table::
@@ -336,32 +336,32 @@ The `Lsv2-series <https://azure.microsoft.com/en-us/blog/announcing-the-general-
     - vCPU
     - Mem (GB)
     - Storage
-   * - L8s_v2
+   * - Standard_L8s_v3
     - 8
     - 64
     - 1 x 1.92 TB
-   * - L16s_v2
+   * - Standard_L16s_v3
     - 16
     - 128
     - 2 x 1.92 TB
-   * - L32s_v2
+   * - Standard_L32s_v3
     - 32
     - 256
     - 4 x 1.92 TB
-   * - L48s_v2
+   * - Standard_L48s_v3
     - 48
     - 384
     - 6 x 1.92 TB     
-   * - L64s_v2
+   * - Standard_L64s_v3
     - 64
     - 512
     - 8 x  1.92 TB
-   * - L80s_v2
+   * - Standard_L80s_v3
     - 80
     - 640
     - 10 x 1.92 TB
       
-More on Azure Lsv2 instances `here <https://azure.microsoft.com/en-us/blog/announcing-the-general-availability-of-lsv2-series-azure-virtual-machines/>`_
+More on Azure Lsv3 instances `here <https://learn.microsoft.com/en-us/azure/virtual-machines/lsv3-series/>`_

 Oracle Cloud Infrastructure (OCI)
 ----------------------------------------
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,173 +1,53 @@
-:full-width:
-:hide-version-warning:
-:hide-pre-content:
-:hide-post-content:
-:hide-sidebar:
-:hide-secondary-sidebar:
-:landing:
+:orphan:

-.. title:: Welcome to ScyllaDB Documentation
+====================================
+ScyllaDB Open Source Documentation
+====================================

-.. hero-box::
-  :title: Welcome to ScyllaDB Documentation
-  :image: /_static/img/mascots/scylla-docs.svg
-  :search_box:
+.. meta::
+   :title: ScyllaDB Open Source Documentation
+   :description: ScyllaDB Open Source Documentation
+   :keywords: ScyllaDB Open Source, Scylla Open Source, Scylla docs, ScyllaDB documentation, Scylla Documentation

-  The most up-to-date documents for the fastest, best performing, high availability NoSQL database.
+About This User Guide
+-----------------------

-.. raw:: html
+ScyllaDB is a distributed NoSQL wide-column database for data-intensive apps that require 
+high performance and low latency.

-  <div class="landing__content landing__content">
+This user guide covers topics related to ScyllaDB Open Source - an open-source project that allows you to evaluate 
+experimental features, review the `source code <https://github.com/scylladb/scylladb>`_, and add your contributions
+to the project.

-.. raw:: html
+For topics related to other ScyllaDB flavors, see the documentation for `ScyllaDB Enterprise <https://enterprise.docs.scylladb.com/>`_ and 
+`ScyllaDB Cloud <https://cloud.docs.scylladb.com/>`_.

-  <div class="topics-grid topics-grid--scrollable grid-container full">
+Documentation Highlights
+--------------------------

-  <div class="grid-x grid-margin-x hs">
+* :doc:`Install ScyllaDB Open Source </getting-started/install-scylla/index>`
+* :doc:`Configure ScyllaDB Open Source </getting-started/system-configuration/>`
+* :doc:`Cluster Management Procedures </operating-scylla/procedures/cluster-management/index>`
+* :doc:`Upgrade ScyllaDB Open Source </upgrade/index>`
+* :doc:`CQL Reference </cql/index>`
+* :doc:`ScyllaDB Drivers </using-scylla/drivers/index>`

-.. topic-box::
-  :title: New to ScyllaDB? Start here!
-  :link: https://cloud.docs.scylladb.com/stable/scylladb-basics/
-  :class: large-4
-  :anchor: ScyllaDB Basics
+ScyllaDB Community
+--------------------------

-  Learn the essentials of ScyllaDB.
+Join the ScyllaDB Open Source community:

+* Contribute to the ScyllaDB Open Source `project <https://github.com/scylladb/scylladb>`_.
+* Join the `ScyllaDB Community Forum <https://forum.scylladb.com/>`_.
+* Join our `Slack Channel <https://slack.scylladb.com/>`_.
+* Sign up for the `scylladb-users <https://groups.google.com/d/forum/scylladb-users>`_ Google group.

-.. topic-box::
-  :title: Let us manage your DB
-  :link: https://cloud.docs.scylladb.com
-  :class: large-4
-  :anchor: ScyllaDB Cloud Documentation
+Learn How to Use ScyllaDB
+---------------------------

-  Simplify application development with ScyllaDB Cloud - a fully managed database-as-a-service.
-
-.. topic-box::
-  :title: Manage your own DB
-  :link: getting-started
-  :class: large-4
-  :anchor: ScyllaDB Open Source and Enterprise Documentation
-
-  Deploy and manage your database in your own environment.
-
-
-.. raw:: html
-
-  </div></div>
-
-.. raw:: html
-
-  <div class="topics-grid topics-grid--products">
-
-      <h2 class="topics-grid__title">Our Products</h2>
-
-      <div class="grid-container full">
-          <div class="grid-x grid-margin-x">
-
-.. topic-box::
-  :title: ScyllaDB Enterprise
-  :link: getting-started
-  :image: /_static/img/mascots/scylla-enterprise.svg
-  :class: topic-box--product,large-3,small-6
-
-  ScyllaDB’s most stable high-performance enterprise-grade NoSQL database.
-
-.. topic-box::
-  :title: ScyllaDB Open Source
-  :link: getting-started
-  :image: /_static/img/mascots/scylla-opensource.svg
-  :class: topic-box--product,large-3,small-6
-
-  A high-performance NoSQL database with a close-to-the-hardware, shared-nothing approach.
-
-.. topic-box::
-  :title: ScyllaDB Cloud
-  :link: https://cloud.docs.scylladb.com
-  :image: /_static/img/mascots/scylla-cloud.svg
-  :class: topic-box--product,large-3,small-6
-
-  A fully managed NoSQL database as a service powered by ScyllaDB Enterprise.
-
-.. topic-box::
-  :title: ScyllaDB Alternator
-  :link: https://docs.scylladb.com/stable/alternator/alternator.html
-  :image: /_static/img/mascots/scylla-alternator.svg
-  :class: topic-box--product,large-3,small-6
-
-  Open source Amazon DynamoDB-compatible API.
-
-.. topic-box::
-  :title: ScyllaDB Monitoring Stack
-  :link: https://monitoring.docs.scylladb.com
-  :image: /_static/img/mascots/scylla-monitor.svg
-  :class: topic-box--product,large-3,small-6
-
-  Complete open source monitoring solution for your ScyllaDB clusters.
-
-.. topic-box::
-  :title: ScyllaDB Manager
-  :link: https://manager.docs.scylladb.com
-  :image: /_static/img/mascots/scylla-manager.svg
-  :class: topic-box--product,large-3,small-6
-
-  Hassle-free ScyllaDB NoSQL database management for scale-out clusters.
-
-.. topic-box::
-  :title: ScyllaDB Drivers
-  :link: https://docs.scylladb.com/stable/using-scylla/drivers/
-  :image: /_static/img/mascots/scylla-drivers.svg
-  :class: topic-box--product,large-3,small-6
-
-  Shard-aware drivers for superior performance. 
-
-.. topic-box::
-  :title: ScyllaDB Operator
-  :link: https://operator.docs.scylladb.com
-  :image: /_static/img/mascots/scylla-enterprise.svg
-  :class: topic-box--product,large-3,small-6
-
-  Easily run and manage your ScyllaDB cluster on Kubernetes.
-
-.. raw:: html
-
-  </div></div></div>
-
-.. raw:: html
-
-  <div class="topics-grid">
-
-      <h2 class="topics-grid__title">Learn More About ScyllaDB</h2>
-      <p class="topics-grid__text"></p>
-      <div class="grid-container full">
-          <div class="grid-x grid-margin-x">
-
-.. topic-box::
-  :title: Attend ScyllaDB University
-  :link: https://university.scylladb.com/
-  :image: /_static/img/mascots/scylla-university.png
-  :class: large-6,small-12
-  :anchor: Find a Class
-
-  | Register to take a *free* class at ScyllaDB University.
-  | There are several learning paths to choose from.
-
-.. topic-box::
-  :title: Register for a Webinar
-  :link: https://www.scylladb.com/resources/webinars/
-  :image: /_static/img/mascots/scylla-with-computer-2.png
-  :class: large-6,small-12
-  :anchor: Find a Webinar
-
-  | You can either participate in a live webinar or see a recording on demand.
-  | There are several webinars to choose from.
-
-.. raw:: html
-
-  </div></div></div>
-
-.. raw:: html
-
-  </div>
+You can learn to use ScyllaDB by taking **free courses** at `ScyllaDB University <https://university.scylladb.com/>`_. 
+In addition, you can read our `blog <https://www.scylladb.com/blog/>`_ and attend ScyllaDB's 
+`webinars, workshops, and conferences <https://www.scylladb.com/company/events/>`_.

 .. toctree::
  :hidden:
@@ -179,7 +59,6 @@
  architecture/index
  troubleshooting/index
  kb/index
-  ScyllaDB University <https://university.scylladb.com/>
  faq
  Contribute to ScyllaDB <contribute>
  glossary
--- a/docs/operating-scylla/admin-tools/scylla-sstable.rst
+++ b/docs/operating-scylla/admin-tools/scylla-sstable.rst
@@ -17,7 +17,7 @@ This tool is similar to SStableDump_, with notable differences:
 * Expanded scope: this tool supports much more than dumping SStable data components (see `Supported Operations`_).
 * More flexible on how schema is obtained and where SStables are located: SStableDump_ only supports dumping SStables located in their native data directory. To dump an SStable, one has to clone the entire ScyllaDB data directory tree, including system table directories and even config files. ``scylla sstable`` can dump sstables from any path with multiple choices on how to obtain the schema, see Schema_.

-Currently, SStableDump_ works better on production systems as it automatically loads the schema from the system tables, unlike ``scylla sstable``, which has to be provided with the schema explicitly. On the other hand ``scylla sstable`` works better for off-line investigations, as it can be used with as little as just a schema definition file and a single sstable. In the future we plan on closing this gap -- adding support for automatic schema-loading for ``scylla sstable`` too -- and completely supplant SStableDump_ with ``scylla sstable``.
+``scylla sstable`` was developed to supplant SStableDump_ as ScyllaDB-native tool, better tailored for the needs of ScyllaDB.

 .. _SStableDump: /operating-scylla/admin-tools/sstabledump

@@ -35,14 +35,33 @@ You can specify more than one SStable.

 Schema
 ------
+
 All operations need a schema to interpret the SStables with.
-Currently, there are two ways to obtain the schema:
+This tool tries to auto-detect the location of the ScyllaDB data directories and the name of the table the SStable belongs to.
+If the SStable is located in a ScyllaDB data directory, it works out-of-the-box, without any additional input from the user.
+If the SStable is located at an external path, you need to specify the names of the keyspace and table to which the SStable belongs. In addition, some hints as to where the ScyllaDB data directory is located may also be required.

+The schema can be obtained in the following ways:
+
+* Auto-detected - If the SStable is located in the table's directory within the ScyllaDB data directory.
+* ``--keyspace=KEYSPACE --table=TABLE`` - If the SStable is located at an external location, but the ScyllaDB data directory or the config file are located at the standard location. The tool also reads the ``SCYLLA_CONF`` and ``SCYLLA_HOME`` environment variables to try to locate the configuration file.
 * ``--schema-file FILENAME`` - Read the schema definition from a file.
-* ``--system-schema KEYSPACE.TABLE`` - Use the known definition of built-in tables (only works for system tables).
+* ``--system-schema --keyspace=KEYSPACE --table=TABLE`` - Use the known definition of built-in tables (only works for system tables).
+* ``--scylla-data-dir SCYLLA_DATA_DIR_PATH --keyspace=KEYSPACE --table=TABLE`` - Read the schema tables from the data directory at the provided location, needs the keyspace and table name to be provided with ``--keyspace`` and ``--table``.
+* ``--scylla-yaml-file SCYLLA_YAML_FILE_PATH --keyspace=KEYSPACE --table=TABLE`` - Read the schema tables from the data directory path obtained from the configuration, needs the keyspace and table name to be provided with ``--keyspace`` and ``--table``.

-By default, the tool uses the first method: ``--schema-file schema.cql``; i.e. it assumes there is a schema file named ``schema.cql`` in the working directory.
-If this fails, it will exit with an error.
+By default (no schema-related options are provided), the tool will try the following sequence:
+
+* Try to load schema from ``schema.cql``.
+* Try to deduce the ScyllaDB data directory path and table names from the SStable path.
+* Try to load the schema from the ScyllaDB directory located at the standard location (``/var/lib/scylla``). For this to succeed, the table name has to be provided via ``--keyspace`` and ``--table``.
+* Try to load the schema from the ScyllaDB directory path obtained from config at the standard location (``./conf/scylla.yaml``). ``SCYLLA_CONF`` and ``SCYLLA_HOME`` environment variables are also checked. For this to succeed, the table name has to be provided via ``--keyspace`` and ``--table``.
+
+The tool stops after the first successful attempt. If none of the above succeed, an error message will be printed.
+A user provided schema in ``schema.cql`` (if present) always takes precedence over other methods. This is deliberate, to allow to manually override the schema to be used.
+
+schema.cql
+^^^^^^^^^^

 The schema file should contain all definitions needed to interpret data belonging to the table.

@@ -72,7 +91,7 @@ Note:
 * The schema file doesn't have to be called ``schema.cql``, this is just the default name. Any file name is supported (with any extension).

 Dropped columns
-^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~

 The examined sstable might have columns which were dropped from the schema definition. In this case providing the up-do-date schema will not be enough, the tool will fail when attempting to process a cell for the dropped column.
 Dropped columns can be provided to the tool in the form of insert statements into the ``system_schema.dropped_columns`` system table, in the schema definition file. Example:
@@ -282,34 +301,6 @@ The content is dumped in JSON, using the following schema:
        },
        "pos": Uint64
    }
-    )",
-                dump_index_operation},
-    /* dump-compression-info */
-        {"dump-compression-info",
-                "Dump content of sstable compression info(s)",
-    R"(
-    Dumps the content of the compression-info component. Contains compression
-    parameters and maps positions into the uncompressed data to that into compressed
-    data. Note that compression happens over chunks with configurable size, so to
-    get data at a position in the middle of a compressed chunk, the entire chunk has
-    to be decompressed.
-    For more information about the sstable components and the format itself, visit
-    https://docs.scylladb.com/architecture/sstable/.
-
-    The content is dumped in JSON, using the following schema:
-
-    $ROOT := { "$sstable_path": $SSTABLE, ... }
-
-    $SSTABLE := {
-        "name": String,
-        "options": {
-            "$option_name": String,
-            ...
-        },
-        "chunk_len": Uint,
-        "data_len": Uint64,
-        "offsets": [Uint64, ...]
-    }

 dump-compression-info
 ^^^^^^^^^^^^^^^^^^^^^
--- a/docs/operating-scylla/admin-tools/sstable2json.rst
+++ b/docs/operating-scylla/admin-tools/sstable2json.rst
@@ -4,9 +4,7 @@ SSTable2json

 This tool allows you to converts SSTable into a JSON format file.
 SSTable2json supported when using Scylla 2.x or lower version.
-In newer versions, the tool is named SSTabledump_.
-
-.. _SSTabledump: /operating-scylla/admin-tools/sstabledump
+In newer versions, the tool is named :doc:`SSTabledump </operating-scylla/admin-tools/sstabledump>`.

 .. note:: 

--- a/docs/operating-scylla/admin-tools/sstabledump.rst
+++ b/docs/operating-scylla/admin-tools/sstabledump.rst
@@ -3,11 +3,9 @@ SSTabledump

 This tool allows you to converts SSTable into a JSON format file.
 SSTabledump supported when using Scylla 3.0, Scylla Enterprise 2019.1, and newer versions.
-In older versions, the tool is named SSTable2json_.
-If you need more flexibility or want to dump more than just the data-component, see scylla-sstable_.
+In older versions, the tool is named :doc:`SSTable2json </operating-scylla/admin-tools/sstable2json>`.
+If you need more flexibility or want to dump more than just the data-component, see :doc:`scylla-sstable </operating-scylla/admin-tools/scylla-sstable>`.

-.. _SSTable2json: /operating-scylla/admin-tools/sstable2json
-.. _scylla-sstable: /operating-scylla/admin-tools/scylla-sstable

 Use the full path to the data file when executing the command.

--- a/docs/operating-scylla/admin.rst
+++ b/docs/operating-scylla/admin.rst
@@ -91,12 +91,17 @@ The :code:`scylla-server` file contains configuration related to starting up the

 .. include:: /operating-scylla/scylla-yaml.inc

+.. _admin-compression:
+
 Compression
 -----------

 In Scylla, you can configure compression at rest and compression in transit.
 For compression in transit, you can configure compression between nodes or between the client and the node.

+
+.. _admin-client-node-compression:
+
 Client - Node Compression
 ^^^^^^^^^^^^^^^^^^^^^^^^^^

--- a/docs/operating-scylla/nodetool-commands/repair.rst
+++ b/docs/operating-scylla/nodetool-commands/repair.rst
@@ -41,14 +41,6 @@ Scylla nodetool repair command supports the following options:

     nodetool repair -et 90874935784
     nodetool repair --end-token 90874935784
-
- ``-seq``, ``--sequential`` Use *-seq* to carry out a sequential repair.
-
-  For example, a sequential repair of all keyspaces on a node:
-
-  ::
-
-     nodetool repair -seq
     
 - ``-hosts`` ``--in-hosts`` syncs the **repair master** data subset only between a list of nodes, using host ID or Address. The list *must* include the **repair master**.

--- a/docs/operating-scylla/nodetool-commands/snapshot.rst
+++ b/docs/operating-scylla/nodetool-commands/snapshot.rst
@@ -108,10 +108,7 @@ Each of the snapshots is a **hardlink** to to the SSTable directory.
 Additional Resources
 ^^^^^^^^^^^^^^^^^^^^

-* `Backup your data`_ 
-* `Scylla Snapshots`_
-
-.. _`Backup your data`: /operating-scylla/procedures/backup-restore/backup
-.. _`Scylla Snapshots`: /kb/snapshots
+* :doc:`Backup your data </operating-scylla/procedures/backup-restore/backup>`
+* :doc:`Scylla Snapshots </kb/snapshots>`

 .. include:: /rst_include/apache-copyrights.rst
--- a/docs/operating-scylla/procedures/backup-restore/backup.rst
+++ b/docs/operating-scylla/procedures/backup-restore/backup.rst
@@ -17,12 +17,8 @@ The backup includes two procedures. These are:
 Full Backup - Snapshots
 =======================

-Snapshots are taken using `nodetool snapshot`_. First, the command flushes the MemTables from memory to SSTables on disk, and afterward, it creates a hard link for each SSTable in each keyspace.
-With time, SSTables are compacted, but the hard link keeps a copy of each file. This takes up an increasing amount of disk space. It is important to clear space by `clean unnecessary snapshots`_.
-
-.. _`nodetool snapshot`: /operating-scylla/nodetool-commands/snapshot
-
-.. _`clean unnecessary snapshots`: /operating-scylla/procedures/backup-restore/delete_snapshot
+Snapshots are taken using :doc:`nodetool snapshot </operating-scylla/nodetool-commands/snapshot>`. First, the command flushes the MemTables from memory to SSTables on disk, and afterward, it creates a hard link for each SSTable in each keyspace.
+With time, SSTables are compacted, but the hard link keeps a copy of each file. This takes up an increasing amount of disk space. It is important to clear space by :doc:`clean unnecessary snapshots </operating-scylla/procedures/backup-restore/delete-snapshot>`.

 **Procedure**

@@ -77,8 +73,6 @@ Incremental Backup
 Additional Resources
 ====================

-* `Scylla Snapshots`_
+* :doc:`Scylla Snapshots </kb/snapshots>`


-.. _`Scylla Snapshots`: /kb/snapshots
-
--- a/docs/operating-scylla/procedures/cassandra-to-scylla-migration-process.rst
+++ b/docs/operating-scylla/procedures/cassandra-to-scylla-migration-process.rst
@@ -133,9 +133,7 @@ Procedure

 See the full code example `here <https://github.com/scylladb/scylla-code-samples/tree/master/dual_writes>`_

-3. On each Apache Cassandra node, take a snapshot for every keyspace using the `nodetool snapshot`_ command. This will flush all SSTables to disk and generate a ``snapshots`` folder with an epoch timestamp for each underlying table in that keyspace. 
-
-.. _`nodetool snapshot`: /operating-scylla/nodetool-commands/snapshot
+3. On each Apache Cassandra node, take a snapshot for every keyspace using the :doc:`nodetool snapshot </operating-scylla/nodetool-commands/snapshot>` comand. This will flush all SSTables to disk and generate a ``snapshots`` folder with an epoch timestamp for each underlying table in that keyspace. 

   Folder path post snapshot: ``/var/lib/cassandra/data/keyspace/table-[uuid]/snapshots/[epoch_timestamp]/``

--- a/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst
+++ b/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst
@@ -3,6 +3,7 @@
 * endpoint_snitch - ``grep endpoint_snitch /etc/scylla/scylla.yaml``
 * Scylla version - ``scylla --version``
 * Authenticator - ``grep authenticator /etc/scylla/scylla.yaml``
+* consistent_cluster_management - ``grep consistent_cluster_management /etc/scylla/scylla.yaml``

 .. Note:: 

--- a/docs/operating-scylla/procedures/cluster-management/add-dc-to-existing-dc.rst
+++ b/docs/operating-scylla/procedures/cluster-management/add-dc-to-existing-dc.rst
@@ -119,6 +119,7 @@ Add New DC
   * **listen_address** - IP address that Scylla used to connect to the other Scylla nodes in the cluster.
   * **endpoint_snitch** - Set the selected snitch.
   * **rpc_address** - Address for client connections (Thrift, CQL).
+   * **consistent_cluster_management** - set to the same value as used by your existing nodes.

   The parameters ``seeds``, ``cluster_name`` and ``endpoint_snitch`` need to match the existing cluster.

@@ -200,6 +201,11 @@ Add New DC

 #. If you are using Scylla Monitoring, update the `monitoring stack <https://monitoring.docs.scylladb.com/stable/install/monitoring_stack.html#configure-scylla-nodes-from-files>`_ to monitor it. If you are using Scylla Manager, make sure you install the `Manager Agent <https://manager.docs.scylladb.com/stable/install-scylla-manager-agent.html>`_ and Manager can access the new DC.

+Handling Failures
+=================
+
+If one of the new nodes starts bootstrapping but then fails in the middle e.g. due to a power loss, you can retry bootstrap (by restarting the node). If you don't want to retry, or the node refuses to boot on subsequent attempts, consult the :doc:`Handling Membership Change Failures document</operating-scylla/procedures/cluster-management/handling-membership-change-failures>`.
+
 Configure the Client not to Connect to the New DC
 -------------------------------------------------

--- a/docs/operating-scylla/procedures/cluster-management/add-node-to-cluster.rst
+++ b/docs/operating-scylla/procedures/cluster-management/add-node-to-cluster.rst
@@ -54,6 +54,8 @@ Procedure

    * **seeds** - Specifies the IP address of an existing node in the cluster. The new node will use this IP to connect to the cluster and learn the cluster topology and state.

+    * **consistent_cluster_management** - set to the same value as used by your existing nodes.
+
   .. note:: 

       In earlier versions of ScyllaDB, seed nodes assisted in gossip. Starting with Scylla Open Source 4.3 and Scylla Enterprise 2021.1, the seed concept in gossip has been removed. If you are using an earlier version of ScyllaDB, you need to configure the seeds parameter in the following way:
@@ -117,3 +119,8 @@ Procedure
       You don't need to restart the Scylla service after modifying the seeds list in ``scylla.yaml``.

 #. If you are using Scylla Monitoring, update the `monitoring stack <https://monitoring.docs.scylladb.com/stable/install/monitoring_stack.html#configure-scylla-nodes-from-files>`_ to monitor it. If you are using Scylla Manager, make sure you install the `Manager Agent <https://manager.docs.scylladb.com/stable/install-scylla-manager-agent.html>`_, and Manager can access it.
+
+Handling Failures
+=================
+
+If the node starts bootstrapping but then fails in the middle e.g. due to a power loss, you can retry bootstrap (by restarting the node). If you don't want to retry, or the node refuses to boot on subsequent attempts, consult the :doc:`Handling Membership Change Failures document</operating-scylla/procedures/cluster-management/handling-membership-change-failures>`.
--- a/docs/operating-scylla/procedures/cluster-management/create-cluster-multidc.rst
+++ b/docs/operating-scylla/procedures/cluster-management/create-cluster-multidc.rst
@@ -70,6 +70,7 @@ the file can be found under ``/etc/scylla/``
 - **listen_address** - IP address that the Scylla use to connect to other Scylla nodes in the cluster
 - **endpoint_snitch** - Set the selected snitch
 - **rpc_address** - Address for client connection (Thrift, CQLSH)
+- **consistent_cluster_management** - ``true`` by default, can be set to ``false`` if you don't want to use Raft for consistent schema management in this cluster (will be mandatory in later versions). Check the :doc:`Raft in ScyllaDB document</architecture/raft/>` to learn more.

 3. In the ``cassandra-rackdc.properties`` file, edit the rack and data center information. 
 The file can be found under ``/etc/scylla/``.
--- a/docs/operating-scylla/procedures/cluster-management/create-cluster.rst
+++ b/docs/operating-scylla/procedures/cluster-management/create-cluster.rst
@@ -26,6 +26,7 @@ The file can be found under ``/etc/scylla/``
 - **listen_address** - IP address that Scylla used to connect to other Scylla nodes in the cluster
 - **endpoint_snitch** - Set the selected snitch
 - **rpc_address** - Address for client connection (Thrift, CQL)
+- **consistent_cluster_management** - ``true`` by default, can be set to ``false`` if you don't want to use Raft for consistent schema management in this cluster (will be mandatory in later versions). Check the :doc:`Raft in ScyllaDB document</architecture/raft/>` to learn more.

 3. This step needs to be done **only** if you are using the **GossipingPropertyFileSnitch**. If not, skip this step.
 In the ``cassandra-rackdc.properties`` file, edit the parameters listed below.
--- a/docs/operating-scylla/procedures/cluster-management/ec2-dc.rst
+++ b/docs/operating-scylla/procedures/cluster-management/ec2-dc.rst
@@ -63,6 +63,7 @@ Perform the following steps for each node in the new cluster:
     * **rpc_address** - Address for client connection (Thrift, CQL).
     * **broadcast_address** - The IP address a node tells other nodes in the cluster to contact it by.
     * **broadcast_rpc_address** - Default: unset. The RPC address to broadcast to drivers and other Scylla nodes. It cannot be set to 0.0.0.0. If left blank, it will be set to the value of ``rpc_address``. If ``rpc_address`` is set to 0.0.0.0, ``broadcast_rpc_address`` must be explicitly configured.
+     * **consistent_cluster_management** - ``true`` by default, can be set to ``false`` if you don't want to use Raft for consistent schema management in this cluster (will be mandatory in later versions). Check the :doc:`Raft in ScyllaDB document</architecture/raft/>` to learn more.

 #. After you have installed and configured Scylla and edited ``scylla.yaml`` file on all the nodes, start the node specified with the ``seeds`` parameter. Then start the rest of the nodes in your cluster, one at a time, using
   ``sudo systemctl start scylla-server``.
--- a/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
+++ b/docs/operating-scylla/procedures/cluster-management/handling-membership-change-failures.rst
@@ -0,0 +1,204 @@
+Handling Cluster Membership Change Failures
+*******************************************
+
+A failure may happen in the middle of a cluster membership change (that is bootstrap, decommission, removenode, or replace), such as loss of power. If that happens, you should ensure that the cluster is brought back to a consistent state as soon as possible. Further membership changes might be impossible until you do so.
+
+For example, a node that crashed in the middle of decommission might leave the cluster in a state where it considers the node to still be a member, but the node itself will refuse to restart and communicate with the cluster. This particular case is very unlikely - it requires a specifically timed crash to happen, after the data streaming phase of decommission finishes but before the node commits that it left. But if it happens, you won't be able to bootstrap other nodes (they will try to contact the partially-decommissioned node and fail) until you remove the remains of the node that crashed.
+
+---------------------------
+Handling a Failed Bootstrap
+---------------------------
+
+If a failure happens when trying to bootstrap a new node to the cluster, you can try bootstrapping the node again by restarting it.
+
+If the failure persists or you decided that you don't want to bootstrap the node anymore, follow the instructions in the :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section to remove the remains of the bootstrapping node. You can then clear the node's data directories and attempt to bootstrap it again.
+
+------------------------------
+Handling a Failed Decommission
+------------------------------
+
+There are two cases.
+
+Most likely the failure happened during the data repair/streaming phase - before the node tried to leave the token ring. Look for a log message containing "leaving token ring" in the logs of the node that you tried to decommission. For example:
+
+.. code-block:: console
+
+    INFO  2023-03-14 13:08:38,323 [shard 0] storage_service - decommission[5b2e752e-964d-4f36-871f-254491f4e8cc]: leaving token ring
+
+If the message is **not** present, the failure happened before the node tried to leave the token ring. In that case you can simply restart the node and attempt to decommission it again.
+
+If the message is present, the node attempted to leave the token ring, but it might have left the cluster only partially before the failure. **Do not try to restart the node**. Instead, you must make sure that the node is dead and remove any leftovers using the :doc:`removenode operation </operating-scylla/nodetool-commands/removenode/>`. See :ref:`cleaning up after a failed membership change <cleaning-up-after-change>`. Trying to restart the node after such failure results in unpredictable behavior - it may restart normally, it may refuse to restart, or it may even try to rebootstrap.
+
+If you don't have access to the node's logs anymore, assume the second case (the node might have attempted to leave the token ring), **do not try to restart the node**, instead follow the :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section.
+
+----------------------------
+Handling a Failed Removenode
+----------------------------
+
+Simply retry the removenode operation.
+
+If you somehow lost the host ID of the node that you tried to remove, follow the instructions in :ref:`cleaning up after a failed membership change <cleaning-up-after-change>`.
+
+--------------------------
+Handling a Failed Replace
+--------------------------
+
+Replace is a special case of bootstrap, but the bootstrapping node tries to take the place of another dead node. You can retry a failed replace operation by restarting the replacing node.
+
+If the failure persists or you decided that you don't want to perform the replace anymore, follow the instructions in  :ref:`cleaning up after a failed membership change <cleaning-up-after-change>` section to remove the remains of the replacing node. You can then clear the node's data directories and attempt to replace again. Alternatively, you can remove the dead node which you initially tried to replace using :doc:`removenode </operating-scylla/nodetool-commands/removenode/>`, and perform a regular bootstrap.
+
+.. _cleaning-up-after-change:
+
+--------------------------------------------
+Cleaning up after a Failed Membership Change
+--------------------------------------------
+
+After a failed membership change, the cluster may contain remains of a node that tried to leave or join - other nodes may consider the node a member, possibly in a transitioning state. It is important to remove any such "ghost" members. Their presence may reduce the cluster's availability, performance, or prevent further membership changes.
+
+You need to determine the host IDs of any potential ghost members, then remove them using the :doc:`removenode operation </operating-scylla/nodetool-commands/removenode/>`. Note that after a failed replace, there may be two different host IDs that you'll want to find and run ``removenode`` on: the new replacing node and the old node that you tried to replace. (Or you can remove the new node only, then try to replace the old node again.)
+
+Step One: Determining Host IDs of Ghost Members
+===============================================
+
+* After a failed bootstrap, you need to determine the host ID of the node that tried to bootstrap, if it managed to generate a host ID (it might not have chosen the host ID yet if it failed very early in the procedure, in which case there's nothing to remove). Look for a message containing ``system_keyspace - Setting local host id to`` in the node's logs, which will contain the node's host ID. For example: ``system_keyspace - Setting local host id to f180b78b-6094-434d-8432-7327f4d4b38d``. If you don't have access to the node's logs, read the generic method below.
+* After a failed decommission, you need to determine the host ID of the node that tried to decommission. You can search the node's logs as in the failed bootstrap case (see above), or you can use the generic method below.
+* After a failed removenode, you need to determine the host ID of the node that you tried to remove. You should already have it, since executing a removenode requires the host ID in the first place. But if you lost it somehow, read the generic method below.
+* After a failed replace, you need to determine the host ID of the replacing node. Search the node's logs as in the failed bootstrap case (see above), or you can use the generic method below. You may also want to determine the host ID of the replaced node - either to attempt replacing it again after removing the remains of the previous replacing node, or to remove it using :doc:`nodetool removenode </operating-scylla/nodetool-commands/removenode/>`. You should already have the host ID of the replaced node if you used the ``replace_node_first_boot`` option to perform the replace.
+
+If you cannot determine the ghost members' host ID using the suggestions above, use the method described below. The approach differs depending on whether Raft is enabled in your cluster.
+
+.. tabs::
+
+    .. group-tab:: Raft enabled
+
+        #. Make sure there are no ongoing membership changes.
+
+        #. Execute the following CQL query on one of your nodes to retrieve the Raft group 0 ID:
+
+            .. code-block:: cql
+
+                select value from system.scylla_local where key = 'raft_group0_id'
+
+            For example:
+
+            .. code-block:: cql
+
+                cqlsh> select value from system.scylla_local where key = 'raft_group0_id';
+
+                 value
+                --------------------------------------
+                 607fef80-c276-11ed-a6f6-3075f294cc65
+
+        #. Use the obtained Raft group 0 ID to query the set of all cluster members' host IDs (which includes the ghost members), by executing the following query:
+
+            .. code-block:: cql
+
+                select server_id from system.raft_state where group_id = <group0_id>
+
+            replace ``<group0_id>`` with the group 0 ID that you obtained. For example:
+
+            .. code-block:: cql
+
+                cqlsh> select server_id from system.raft_state where group_id = 607fef80-c276-11ed-a6f6-3075f294cc65;
+
+                 server_id
+                --------------------------------------
+                 26a9badc-6e96-4b86-a8df-5173e5ab47fe
+                 7991e7f5-692e-45a0-8ae5-438be5bc7c4f
+                 aff11c6d-fbe7-4395-b7ca-3912d7dba2c6
+
+        #. Execute the following CQL query to obtain the host IDs of all token ring members:
+
+            .. code-block:: cql
+
+                select host_id, up from system.cluster_status;
+
+            For example:
+
+            .. code-block:: cql
+
+                cqlsh> select peer, host_id, up from system.cluster_status;
+
+                 peer      | host_id                              | up
+                -----------+--------------------------------------+-------
+                 127.0.0.3 |                                 null | False
+                 127.0.0.1 | 26a9badc-6e96-4b86-a8df-5173e5ab47fe |  True
+                 127.0.0.2 | 7991e7f5-692e-45a0-8ae5-438be5bc7c4f |  True
+
+            The output of this query is similar to the output of ``nodetool status``.
+
+            We included the ``up`` column to see which nodes are down and the ``peer`` column to see their IP addresses.
+
+            In this example, one of the nodes tried to decommission and crashed as soon as it left the token ring but before it left the Raft group. Its entry will show up in ``system.cluster_status`` queries with ``host_id = null``, like above, until the cluster is restarted.
+
+        #. A host ID belongs to a ghost member if:
+
+            * It appears in the ``system.raft_state`` query but not in the ``system.cluster_status`` query,
+            * Or it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
+
+            In our example, the ghost member's host ID was ``aff11c6d-fbe7-4395-b7ca-3912d7dba2c6`` because it appeared in the ``system.raft_state`` query but not in the ``system.cluster_status`` query.
+
+            If you're unsure whether a given row in the ``system.cluster_status`` query corresponds to a node in your cluster, you can connect to each node in the cluster and execute ``select host_id from system.local`` (or search the node's logs) to obtain that node's host ID, collecting the host IDs of all nodes in your cluster. Then check if each host ID from the ``system.cluster_status`` query appears in your collected set; if not, it's a ghost member.
+
+            A good rule of thumb is to look at the members marked as down (``up = False`` in ``system.cluster_status``) - ghost members are eventually marked as down by the remaining members of the cluster. But remember that a real member might also be marked as down if it was shutdown or partitioned away from the rest of the cluster. If in doubt, connect to each node and collect their host IDs, as described in the previous paragraph.
+
+    .. group-tab:: Raft disabled
+
+        #. Make sure there are no ongoing membership changes.
+
+        #. Execute the following CQL query on one of your nodes to obtain the host IDs of all token ring members:
+
+            .. code-block:: cql
+
+                select peer, host_id, up from system.cluster_status;
+
+            For example:
+
+            .. code-block:: cql
+
+                cqlsh> select peer, host_id, up from system.cluster_status;
+
+                 peer      | host_id                              | up
+                -----------+--------------------------------------+-------
+                 127.0.0.3 | 42405b3b-487e-4759-8590-ddb9bdcebdc5 | False
+                 127.0.0.1 | 4e3ee715-528f-4dc9-b10f-7cf294655a9e |  True
+                 127.0.0.2 | 225a80d0-633d-45d2-afeb-a5fa422c9bd5 |  True
+
+            The output of this query is similar to the output of ``nodetool status``.
+
+            We included the ``up`` column to see which nodes are down.
+
+            In this example, one of the 3 nodes tried to decommission but crashed while it was leaving the token ring. The node is in a partially left state and will refuse to restart, but other nodes still consider it as a normal member. We'll have to use ``removenode`` to clean up after it.
+
+        #. A host ID belongs to a ghost member if it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
+
+            If you're unsure whether a given row in the ``system.cluster_status`` query corresponds to a node in your cluster, you can connect to each node in the cluster and execute ``select host_id from system.local`` (or search the node's logs) to obtain that node's host ID, collecting the host IDs of all nodes in your cluster. Then check if each host ID from the ``system.cluster_status`` query appears in your collected set; if not, it's a ghost member.
+
+            A good rule of thumb is to look at the members marked as down (``up = False`` in ``system.cluster_status``) - ghost members are eventually marked as down by the remaining members of the cluster. But remember that a real member might also be marked as down if it was shutdown or partitioned away from the rest of the cluster. If in doubt, connect to each node and collect their host IDs, as described in the previous paragraph.
+
+            In our example, the ghost member's host ID is ``42405b3b-487e-4759-8590-ddb9bdcebdc5`` because it is the only member marked as down and we can verify that the other two rows appearing in ``system.cluster_status`` belong to the remaining 2 nodes in the cluster.
+
+In some cases, even after a failed topology change, there may be no ghost members left - for example, if a bootstrapping node crashed very early in the procedure or a decommissioning node crashed after it committed the membership change but before it finalized its own shutdown steps.
+
+If any ghost members are present, proceed to the next step.
+
+Step Two: Removing the Ghost Members
+====================================
+
+Given the host IDs of ghost members, you can remove them using ``removenode``; follow the :doc:`documentation for removenode operation </operating-scylla/nodetool-commands/removenode/>`.
+
+If you're executing ``removenode`` too quickly after a failed membership change, an error similar to the following might pop up:
+
+.. code-block:: console
+
+    nodetool: Scylla API server HTTP POST to URL '/storage_service/remove_node' failed: seastar::rpc::remote_verb_error (node_ops_cmd_check: Node 127.0.0.2 rejected node_ops_cmd=removenode_abort from node=127.0.0.1 with ops_uuid=0ba0a5ab-efbd-4801-a31c-034b5f55487c, pending_node_ops={b47523f2-de6a-4c38-8490-39127dba6b6a}, pending node ops is in progress)
+
+In that case simply wait for 2 minutes before trying ``removenode`` again.
+
+If ``removenode`` returns an error like:
+
+.. code-block:: console
+
+    nodetool: Scylla API server HTTP POST to URL '/storage_service/remove_node' failed: std::runtime_error (removenode[12e7e05b-d1ae-4978-b6a6-de0066aa80d8]: Host ID 42405b3b-487e-4759-8590-ddb9bdcebdc5 not found in the cluster)
+
+and you're sure that you're providing the correct Host ID, it means that the member was already removed and you don't have to clean up after it.
--- a/docs/operating-scylla/procedures/cluster-management/index.rst
+++ b/docs/operating-scylla/procedures/cluster-management/index.rst
@@ -25,6 +25,7 @@ Cluster Management Procedures
   Safely Shutdown Your Cluster <safe-shutdown>
   Safely Restart Your Cluster <safe-start>
   Cluster Membership Change <membership-changes>
+   Handling Membership Change Failures <handling-membership-change-failures>
   repair-based-node-operation

 .. panel-box::
@@ -80,6 +81,8 @@ Cluster Management Procedures

  * :doc:`Cluster Membership Change Notes </operating-scylla/procedures/cluster-management/membership-changes/>`

+  * :doc:`Handling Membership Change Failures </operating-scylla/procedures/cluster-management/handling-membership-change-failures>`
+
  * :ref:`Add Bigger Nodes to a Cluster <add-bigger-nodes-to-a-cluster>`

  * :doc:`Repair Based Node Operations (RBNO) </operating-scylla/procedures/cluster-management/repair-based-node-operation>`
--- a/docs/operating-scylla/procedures/cluster-management/remove-node.rst
+++ b/docs/operating-scylla/procedures/cluster-management/remove-node.rst
@@ -49,6 +49,11 @@ Removing a Running Node

   .. include:: /rst_include/clean-data-code.rst

+Handling Failures
+-----------------
+
+If ``nodetool decommission`` starts executing but then fails in the middle e.g. due to a power loss, consult the :doc:`Handling Membership Change Failures document</operating-scylla/procedures/cluster-management/handling-membership-change-failures>`.
+
 ----------------------------
 Removing an Unavailable Node
 ----------------------------
@@ -81,7 +86,6 @@ the ``nodetool removenode`` operation will fail. To ensure successful operation
  ``nodetool removenode`` (not required when :doc:`Repair Based Node Operations (RBNO) <repair-based-node-operation>` for ``removenode`` 
  is enabled).

-
 Additional Information
 ----------------------
 * :doc:`Nodetool Reference </operating-scylla/nodetool>`
--- a/docs/operating-scylla/procedures/cluster-management/replace-dead-node-or-more.rst
+++ b/docs/operating-scylla/procedures/cluster-management/replace-dead-node-or-more.rst
@@ -25,6 +25,7 @@ Login to one of the nodes in the cluster with (UN) status, collect the following
 * seeds - ``cat /etc/scylla/scylla.yaml | grep seeds:``
 * endpoint_snitch - ``cat /etc/scylla/scylla.yaml | grep endpoint_snitch``
 * Scylla version - ``scylla --version``
+* consistent_cluster_management - ``grep consistent_cluster_management /etc/scylla/scylla.yaml``

 Procedure
 ---------
--- a/docs/operating-scylla/procedures/cluster-management/replace-dead-node.rst
+++ b/docs/operating-scylla/procedures/cluster-management/replace-dead-node.rst
@@ -66,6 +66,8 @@ Procedure

    - **rpc_address** - Address for client connection (Thrift, CQL)

+    - **consistent_cluster_management** - set to the same value as used by your existing nodes.
+
 #. Add the ``replace_node_first_boot`` parameter to the ``scylla.yaml`` config file on the new node. This line can be added to any place in the config file. After a successful node replacement, there is no need to remove it from the ``scylla.yaml`` file. (Note: The obsolete parameters "replace_address" and "replace_address_first_boot" are not supported and should not be used). The value of the ``replace_node_first_boot`` parameter should be the Host ID of the node to be replaced.

    For example (using the Host ID of the failed node from above):
@@ -150,6 +152,12 @@ Procedure
    .. note:: 
       When :doc:`Repair Based Node Operations (RBNO) <repair-based-node-operation>` for **replace** is enabled, there is no need to rerun repair.

+
+Handling Failures
+-----------------
+
+If the new node starts and begins the replace operation but then fails in the middle e.g. due to a power loss, you can retry the replace (by restarting the node). If you don't want to retry, or the node refuses to boot on subsequent attempts, consult the :doc:`Handling Membership Change Failures document</operating-scylla/procedures/cluster-management/handling-membership-change-failures>`.
+
 ------------------------------
 Setup RAID Following a Restart
 ------------------------------
--- a/docs/operating-scylla/procedures/maintenance/repair.rst
+++ b/docs/operating-scylla/procedures/maintenance/repair.rst
@@ -15,6 +15,7 @@ As long as the cluster can satisfy the required consistency level (usually quoru

 * :doc:`Hinted Handoff </architecture/anti-entropy/hinted-handoff>`
 * :doc:`Read Repair </architecture/anti-entropy/read-repair>`
+* :doc:`Repair Based Node Operations </operating-scylla/procedures/cluster-management/repair-based-node-operation>`
 * Repair - described in the following sections

 Repair Overview
@@ -50,30 +51,6 @@ Row-level repair improves Scylla in two ways:
  * keeping the data in a temporary buffer.
  * using the cached data to calculate the checksum and send it to the replicas.

-Repair Base Operation
---------------------
-
-.. versionadded:: 4.0 Scylla Open Source (disabled)
-
-ScyllaDB has two mechanisms to synchronize data between nodes:
-
-* Streaming - used for cluster topology changes, such as adding or removing nodes.
-* Row Level Repair - an offline process that compares and syncs data between nodes .
-
-With *Repair Base Operation*, Scylla uses row-level repair as the unified underlying mechanism for repair operation **and** all node operations, e.g., bootstrap, decommission, remove node, replace node, rebuild node.
-
-This safer process makes the node operations resumable, syncing only the inconsistent data.
-Also, replaced nodes now accept writes, which means there is no longer a need to repair after replacing a node.
-
-**This feature is disabled by default.**
-
-
-You can enable or disable this feature with a configuration parameter in the *scylla.yaml*:
-
-.. code-block:: none
-
-   enable_repair_based_node_ops: [true|false]
-
 See also

 * `Scylla Manager documentation <https://manager.docs.scylladb.com/>`_
--- a/docs/operating-scylla/procedures/tips/benchmark-tips.rst
+++ b/docs/operating-scylla/procedures/tips/benchmark-tips.rst
@@ -198,7 +198,7 @@ By default ScyllaDB will try to use cache, but since the data won’t be used ag
 As a consequence it can lead to bad latency on operational workloads due to increased rate of cache misses.
 To prevent this problem, queries from analytical workloads can bypass the cache using the ‘bypass cache’ option.

-:ref:`Bypass Cache <select-statement>` is only available with Scylla Enterprise.
+See :ref:`Bypass Cache <bypass-cache>` for more information.

 Batching
 ========
--- a/docs/operating-scylla/procedures/tips/best-practices-scylla-on-docker.rst
+++ b/docs/operating-scylla/procedures/tips/best-practices-scylla-on-docker.rst
@@ -306,9 +306,8 @@ First, download the file locally to the node:

  sudo docker exec -it some-scylla.2.0.1 curl -o file.csv https://<url>.com/<path>/<path>/<file>.csv

-Once you have the ``.csv`` downloaded, you can use the CQL ``COPY FROM`` command as explained here_ to load the data into ScyllaDB.
+Once you have the ``.csv`` downloaded, you can use the CQL ``COPY FROM`` command as explained :doc:`here </cql/cqlsh>` to load the data into ScyllaDB.

-.. _here: /getting-started/cqlsh/

 Such a copy command might look like this:

--- a/docs/operating-scylla/security/security-checklist.rst
+++ b/docs/operating-scylla/security/security-checklist.rst
@@ -31,7 +31,11 @@ Encryption on Transit, Client to Node and Node to Node
 Encryption on Transit protects your communication against a 3rd interception on the network connection.
 Configure Scylla to use TLS/SSL for all the connections. Use TLS/SSL to encrypt communication between Scylla nodes and client applications.

-See:
+.. only:: enterprise
+
+    Starting with version 2023.1.1, you can run ScyllaDB Enterprise on FIPS-enabled Ubuntu, 
+    which uses FIPS 140-2 certified libraries (such as OpenSSL, GnuTLS, and more) and Linux 
+    kernel in FIPS mode.

 * :doc:`Encryption Data in Transit Client to Node </operating-scylla/security/client-node-encryption>`

--- a/docs/troubleshooting/error-messages/create-mv.rst
+++ b/docs/troubleshooting/error-messages/create-mv.rst
@@ -4,9 +4,8 @@ A Removed Node was not Removed Properly from the Seed Node List
 Phenonoma
 ^^^^^^^^^

-Failed to create `materialized view`_ after node was removed from the cluster. 
+Failed to create :doc:`materialized view </cql/mv>` after node was removed from the cluster. 

-.. _`materialized view`: /getting-started/mv/

 Error message:

@@ -27,9 +26,7 @@ How to Verify

 Scylla logs show the error message above.

-To verify that the node wasn't remove properly use the `nodetool gossipinfo`_ command
-
-.. _`nodetool gossipinfo`: /operating-scylla/nodetool-commands/gossipinfo/
+To verify that the node wasn't remove properly use the :doc:`nodetool gossipinfo </operating-scylla/nodetool-commands/gossipinfo>` command

 For example:

--- a/docs/upgrade/_common/upgrade-guide-from-2022.x.y-to-2022.x.z-ubuntu-and-debian.rst
+++ b/docs/upgrade/_common/upgrade-guide-from-2022.x.y-to-2022.x.z-ubuntu-and-debian.rst
@@ -68,7 +68,7 @@ Gracefully stop the node

 .. code:: sh

-   sudo service scylla-enterprise-server stop
+   sudo service scylla-server stop

 Download and install the new release
 ------------------------------------
@@ -92,13 +92,13 @@ Start the node

 .. code:: sh

-   sudo service scylla-enterprise-server start
+   sudo service scylla-server start

 Validate
 --------
 1. Check cluster status with ``nodetool status`` and make sure **all** nodes, including the one you just upgraded, are in UN status.
 2. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` to check the ScyllaDB version.
-3. Check scylla-enterprise-server log (by ``journalctl _COMM=scylla``) and ``/var/log/syslog`` to validate there are no errors.
+3. Check scylla-server log (by ``journalctl _COMM=scylla``) and ``/var/log/syslog`` to validate there are no errors.
 4. Check again after 2 minutes to validate no new issues are introduced.

 Once you are sure the node upgrade is successful, move to the next node in the cluster.
@@ -130,7 +130,7 @@ Gracefully shutdown ScyllaDB
 .. code:: sh

   nodetool drain
-   sudo service scylla-enterprise-server stop
+   sudo service scylla-server stop

 Downgrade to the previous release
 ----------------------------------
@@ -164,7 +164,7 @@ Start the node

 .. code:: sh

-   sudo service scylla-enterprise-server start
+   sudo service scylla-server start

 Validate
 --------
--- a/Show More
+++ b/Show More