doc: remove wrong image upgrade info (5.2-to-2023.1)

This commit removes the information about the recommended way of upgrading ScyllaDB images - by updating ScyllaDB and OS packages in one step. This upgrade procedure is not supported (it was implemented, but then reverted). Refs https://github.com/scylladb/scylladb/issues/15733 Closes scylladb/scylladb#21876 Fixes https://github.com/scylladb/scylla-enterprise/issues/5041 Fixes https://github.com/scylladb/scylladb/issues/21898 (cherry picked from commit 98860905d8)
repair: use find_column_family in insert_repair_meta
2024-12-12 15:27:24 +02:00 · 2024-08-14 22:20:38 +03:00 · 2024-08-14 20:15:50 +03:00 · 2024-08-07 10:52:39 +02:00 · 2024-08-05 16:28:19 +02:00 · 2024-08-02 11:00:08 +02:00
279 changed files with 5921 additions and 3567 deletions
--- a/.github/scripts/label_promoted_commits.py
+++ b/.github/scripts/label_promoted_commits.py
@@ -0,0 +1,87 @@
+from github import Github
+import argparse
+import re
+import sys
+import os
+
+try:
+    github_token = os.environ["GITHUB_TOKEN"]
+except KeyError:
+    print("Please set the 'GITHUB_TOKEN' environment variable")
+    sys.exit(1)
+
+
+def parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--repository', type=str, required=True,
+                        help='Github repository name (e.g., scylladb/scylladb)')
+    parser.add_argument('--commit_before_merge', type=str, required=True, help='Git commit ID to start labeling from ('
+                                                                               'newest commit).')
+    parser.add_argument('--commit_after_merge', type=str, required=True,
+                        help='Git commit ID to end labeling at (oldest '
+                             'commit, exclusive).')
+    parser.add_argument('--update_issue', type=bool, default=False, help='Set True to update issues when backport was '
+                                                                         'done')
+    parser.add_argument('--ref', type=str, required=True, help='PR target branch')
+    return parser.parse_args()
+
+
+def add_comment_and_close_pr(pr, comment):
+    if pr.state == 'open':
+        pr.create_issue_comment(comment)
+        pr.edit(state="closed")
+
+
+def mark_backport_done(repo, ref_pr_number, branch):
+    pr = repo.get_pull(int(ref_pr_number))
+    label_to_remove = f'backport/{branch}'
+    label_to_add = f'{label_to_remove}-done'
+    current_labels = [label.name for label in pr.get_labels()]
+    if label_to_remove in current_labels:
+        pr.remove_from_labels(label_to_remove)
+    if label_to_add not in current_labels:
+        pr.add_to_labels(label_to_add)
+
+
+def main():
+    # This script is triggered by a push event to either the master branch or a branch named branch-x.y (where x and y represent version numbers). Based on the pushed branch, the script performs the following actions:
+    # - When ref branch is `master`, it will add the `promoted-to-master` label, which we need later for the auto backport process
+    # - When ref branch is `branch-x.y` (which means we backported a patch), it will replace in the original PR the `backport/x.y` label with `backport/x.y-done` and will close the backport PR (Since GitHub close only the one referring to default branch)
+    args = parser()
+    pr_pattern = re.compile(r'Closes .*#([0-9]+)')
+    target_branch = re.search(r'branch-(\d+\.\d+)', args.ref)
+    g = Github(github_token)
+    repo = g.get_repo(args.repository, lazy=False)
+    commits = repo.compare(head=args.commit_after_merge, base=args.commit_before_merge)
+    processed_prs = set()
+    # Print commit information
+    for commit in commits.commits:
+        print(f'Commit sha is: {commit.sha}')
+        match = pr_pattern.search(commit.commit.message)
+        if match:
+            pr_number = int(match.group(1))
+            if pr_number in processed_prs:
+                continue
+            if target_branch:
+                pr = repo.get_pull(pr_number)
+                branch_name = target_branch[1]
+                refs_pr = re.findall(r'Refs (?:#|https.*?)(\d+)', pr.body)
+                if refs_pr:
+                    print(f'branch-{target_branch.group(1)}, pr number is: {pr_number}')
+                    # 1. change the backport label of the parent PR to note that
+                    #    we've merge the corresponding backport PR
+                    # 2. close the backport PR and leave a comment on it to note
+                    #    that it has been merged with a certain git commit,
+                    ref_pr_number = refs_pr[0]
+                    mark_backport_done(repo, ref_pr_number, branch_name)
+                    comment = f'Closed via {commit.sha}'
+                    add_comment_and_close_pr(pr, comment)
+            else:
+                print(f'master branch, pr number is: {pr_number}')
+                pr = repo.get_pull(pr_number)
+                pr.add_to_labels('promoted-to-master')
+            processed_prs.add(pr_number)
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/workflows/add-label-when-promoted.yaml
+++ b/.github/workflows/add-label-when-promoted.yaml
@@ -0,0 +1,36 @@
+name: Check if commits are promoted
+
+on:
+  push:
+    branches:
+      - master
+      - branch-*.*
+
+env:
+  DEFAULT_BRANCH: 'master'
+
+jobs:
+  check-commit:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+    steps:
+      - name: Dump GitHub context
+        env:
+          GITHUB_CONTEXT: ${{ toJson(github) }}
+        run: echo "$GITHUB_CONTEXT"
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ github.repository }}
+          ref: ${{ env.DEFAULT_BRANCH }}
+          fetch-depth: 0  # Fetch all history for all tags and branches
+
+      - name: Install dependencies
+        run: sudo apt-get install -y python3-github
+
+      - name: Run python script
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: python .github/scripts/label_promoted_commits.py --commit_before_merge ${{ github.event.before }} --commit_after_merge ${{ github.event.after }} --repository ${{ github.repository }} --ref ${{ github.ref }}
--- a/2
+++ b/2
@@ -78,7 +78,7 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=5.4.2
+VERSION=5.4.10

 if test -f version
 then
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -208,7 +208,10 @@ protected:
        sstring local_dc = topology.get_datacenter();
        std::unordered_set<gms::inet_address> local_dc_nodes = topology.get_datacenter_endpoints().at(local_dc);
        for (auto& ip : local_dc_nodes) {
-            if (_gossiper.is_alive(ip)) {
+            // Note that it's not enough for the node to be is_alive() - a
+            // node joining the cluster is also "alive" but not responsive to
+            // requests. We need the node to be in normal state. See #19694.
+            if (_gossiper.is_normal(ip)) {
                rjson::push_back(results, rjson::from_string(ip.to_sstring()));
            }
        }
--- a/api/api-doc/raft.json
+++ b/api/api-doc/raft.json
@@ -0,0 +1,43 @@
+{
+   "apiVersion":"0.0.1",
+   "swaggerVersion":"1.2",
+   "basePath":"{{Protocol}}://{{Host}}",
+   "resourcePath":"/raft",
+   "produces":[
+      "application/json"
+   ],
+   "apis":[
+      {
+         "path":"/raft/trigger_snapshot/{group_id}",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Triggers snapshot creation and log truncation for the given Raft group",
+               "type":"string",
+               "nickname":"trigger_snapshot",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"group_id",
+                     "description":"The ID of the group which should get snapshotted",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  },
+                  {
+                     "name":"timeout",
+                     "description":"Timeout in seconds after which the endpoint returns a failure. If not provided, 60s is used.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"long",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      }
+   ]
+}
--- a/api/api.cc
+++ b/api/api.cc
@@ -31,6 +31,7 @@
 #include "api/config.hh"
 #include "task_manager.hh"
 #include "task_manager_test.hh"
+#include "raft.hh"

 logging::logger apilog("api");

@@ -294,6 +295,18 @@ future<> set_server_task_manager_test(http_context& ctx) {

 #endif

+future<> set_server_raft(http_context& ctx, sharded<service::raft_group_registry>& raft_gr) {
+    auto rb = std::make_shared<api_registry_builder>(ctx.api_doc);
+    return ctx.http_server.set_routes([rb, &ctx, &raft_gr] (routes& r) {
+        rb->register_function(r, "raft", "The Raft API");
+        set_raft(ctx, r, raft_gr);
+    });
+}
+
+future<> unset_server_raft(http_context& ctx) {
+    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_raft(ctx, r); });
+}
+
 void req_params::process(const request& req) {
    // Process mandatory parameters
    for (auto& [name, ent] : params) {
@@ -301,7 +314,7 @@ void req_params::process(const request& req) {
            continue;
        }
        try {
-            ent.value = req.param[name];
+            ent.value = req.get_path_param(name);
        } catch (std::out_of_range&) {
            throw httpd::bad_param_exception(fmt::format("Mandatory parameter '{}' was not provided", name));
        }
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -23,6 +23,7 @@ class load_meter;
 class storage_proxy;
 class storage_service;
 class raft_group0_client;
+class raft_group_registry;

 } // namespace service

@@ -117,5 +118,7 @@ future<> set_server_compaction_manager(http_context& ctx);
 future<> set_server_done(http_context& ctx);
 future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg);
 future<> set_server_task_manager_test(http_context& ctx);
+future<> set_server_raft(http_context&, sharded<service::raft_group_registry>&);
+future<> unset_server_raft(http_context&);

 }
--- a/api/collectd.cc
+++ b/api/collectd.cc
@@ -54,7 +54,7 @@ static const char* str_to_regex(const sstring& v) {
 void set_collectd(http_context& ctx, routes& r) {
    cd::get_collectd.set(r, [](std::unique_ptr<request> req) {

-        auto id = ::make_shared<scollectd::type_instance_id>(req->param["pluginid"],
+        auto id = ::make_shared<scollectd::type_instance_id>(req->get_path_param("pluginid"),
                req->get_query_param("instance"), req->get_query_param("type"),
                req->get_query_param("type_instance"));

@@ -91,7 +91,7 @@ void set_collectd(http_context& ctx, routes& r) {
    });

    cd::enable_collectd.set(r, [](std::unique_ptr<request> req) -> future<json::json_return_type> {
-        std::regex plugin(req->param["pluginid"].c_str());
+        std::regex plugin(req->get_path_param("pluginid").c_str());
        std::regex instance(str_to_regex(req->get_query_param("instance")));
        std::regex type(str_to_regex(req->get_query_param("type")));
        std::regex type_instance(str_to_regex(req->get_query_param("type_instance")));
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -333,7 +333,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t{0}, [](replica::column_family& cf) {
            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
        }, std::plus<>());
    });
@@ -353,7 +353,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
                return active_memtable->region().occupancy().total_space();
            }), uint64_t(0));
@@ -369,7 +369,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
            return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
                return active_memtable->region().occupancy().used_space();
            }), uint64_t(0));
@@ -394,7 +394,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
            return cf.occupancy().total_space();
        }, std::plus<int64_t>());
    });
@@ -410,7 +410,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
            return cf.occupancy().used_space();
        }, std::plus<int64_t>());
    });
@@ -425,7 +425,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::memtable_switch_count);
+        return get_cf_stats(ctx,req->get_path_param("name") ,&replica::column_family_stats::memtable_switch_count);
    });

    cf::get_all_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -434,7 +434,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    // FIXME: this refers to partitions, not rows.
    cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
            utils::estimated_histogram res(0);
            for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
                res.merge(i->get_stats_metadata().estimated_partition_size);
@@ -446,7 +446,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    // FIXME: this refers to partitions, not rows.
    cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
            uint64_t res = 0;
            for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
                res += i->get_stats_metadata().estimated_partition_size.count();
@@ -457,7 +457,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
            utils::estimated_histogram res(0);
            for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
                res.merge(i->get_stats_metadata().estimated_cells_count);
@@ -474,7 +474,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::pending_flushes);
+        return get_cf_stats(ctx,req->get_path_param("name") ,&replica::column_family_stats::pending_flushes);
    });

    cf::get_all_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -482,7 +482,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats_count(ctx,req->param["name"] ,&replica::column_family_stats::reads);
+        return get_cf_stats_count(ctx,req->get_path_param("name") ,&replica::column_family_stats::reads);
    });

    cf::get_all_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -490,7 +490,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats_count(ctx, req->param["name"] ,&replica::column_family_stats::writes);
+        return get_cf_stats_count(ctx, req->get_path_param("name") ,&replica::column_family_stats::writes);
    });

    cf::get_all_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -498,19 +498,19 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
+        return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::reads);
    });

    cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
+        return get_cf_rate_and_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::reads);
    });

    cf::get_read_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats_sum(ctx,req->param["name"] ,&replica::column_family_stats::reads);
+        return get_cf_stats_sum(ctx,req->get_path_param("name") ,&replica::column_family_stats::reads);
    });

    cf::get_write_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats_sum(ctx, req->param["name"] ,&replica::column_family_stats::writes);
+        return get_cf_stats_sum(ctx, req->get_path_param("name") ,&replica::column_family_stats::writes);
    });

    cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -522,11 +522,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
+        return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::writes);
    });

    cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
+        return get_cf_rate_and_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::writes);
    });

    cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -538,7 +538,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
            return cf.estimate_pending_compactions();
        }, std::plus<int64_t>());
    });
@@ -550,7 +550,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_stats(ctx, req->param["name"], &replica::column_family_stats::live_sstable_count);
+        return get_cf_stats(ctx, req->get_path_param("name"), &replica::column_family_stats::live_sstable_count);
    });

    cf::get_all_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -558,11 +558,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_unleveled_sstables.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_unleveled_sstables(ctx, req->param["name"]);
+        return get_cf_unleveled_sstables(ctx, req->get_path_param("name"));
    });

    cf::get_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return sum_sstable(ctx, req->param["name"], false);
+        return sum_sstable(ctx, req->get_path_param("name"), false);
    });

    cf::get_all_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -570,7 +570,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return sum_sstable(ctx, req->param["name"], true);
+        return sum_sstable(ctx, req->get_path_param("name"), true);
    });

    cf::get_all_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
@@ -579,7 +579,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    // FIXME: this refers to partitions, not rows.
    cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], INT64_MAX, min_partition_size, min_int64);
+        return map_reduce_cf(ctx, req->get_path_param("name"), INT64_MAX, min_partition_size, min_int64);
    });

    // FIXME: this refers to partitions, not rows.
@@ -589,7 +589,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    // FIXME: this refers to partitions, not rows.
    cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_partition_size, max_int64);
+        return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), max_partition_size, max_int64);
    });

    // FIXME: this refers to partitions, not rows.
@@ -600,7 +600,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    // FIXME: this refers to partitions, not rows.
    cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
        // Cassandra 3.x mean values are truncated as integrals.
-        return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
+        return map_reduce_cf(ctx, req->get_path_param("name"), integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
    });

    // FIXME: this refers to partitions, not rows.
@@ -610,7 +610,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_get_false_positive();
@@ -628,7 +628,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_get_recent_false_positive();
@@ -646,7 +646,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), ratio_holder(), [] (replica::column_family& cf) {
            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
        }, std::plus<>());
    });
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), ratio_holder(), [] (replica::column_family& cf) {
            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
        }, std::plus<>());
    });
@@ -670,7 +670,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_size();
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_memory_size();
@@ -706,7 +706,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->get_summary().memory_footprint();
@@ -729,7 +729,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
        // We are missing the off heap memory calculation
        // Return 0 is the wrong value. It's a work around
        // until the memory calculation will be available
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
+        //auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
        return make_ready_future<json::json_return_type>(0);
    });

@@ -742,7 +742,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    cf::get_speculative_retries.set(r, [] (std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
+        //auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
        return make_ready_future<json::json_return_type>(0);
    });

@@ -755,7 +755,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    cf::get_key_cache_hit_rate.set(r, [] (std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
+        //auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
        return make_ready_future<json::json_return_type>(0);
    });

@@ -780,7 +780,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    cf::get_row_cache_hit_out_of_range.set(r, [] (std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
+        //auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
        return make_ready_future<json::json_return_type>(0);
    });

@@ -791,7 +791,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
+        return map_reduce_cf_raw(ctx, req->get_path_param("name"), utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -807,7 +807,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
+        return map_reduce_cf_raw(ctx, req->get_path_param("name"), utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().misses.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -824,57 +824,57 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
            return cf.get_stats().cas_prepare.histogram();
        });
    });

    cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
            return cf.get_stats().cas_accept.histogram();
        });
    });

    cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
            return cf.get_stats().cas_learn.histogram();
        });
    });

    cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
+        return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
            return cf.get_stats().estimated_sstable_per_read;
        },
        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::tombstone_scanned);
+        return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::tombstone_scanned);
    });

    cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::live_scanned);
+        return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::live_scanned);
    });

    cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
+        //auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
        std::vector<double> res;
        return make_ready_future<json::json_return_type>(res);
    });

    cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
-        auto uuid = get_uuid(req.param["name"], ctx.db.local());
+        auto uuid = get_uuid(req.get_path_param("name"), ctx.db.local());
        replica::column_family& cf = ctx.db.local().find_column_family(uuid);
        return !cf.is_auto_compaction_disabled_by_user();
    });

    cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/enable_auto_compaction: name={}", req->param["name"]);
+        apilog.info("column_family/enable_auto_compaction: name={}", req->get_path_param("name"));
        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
            auto g = replica::database::autocompaction_toggle_guard(db);
-            return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
+            return foreach_column_family(ctx, req->get_path_param("name"), [](replica::column_family &cf) {
                cf.enable_auto_compaction();
            }).then([g = std::move(g)] {
                return make_ready_future<json::json_return_type>(json_void());
@@ -883,10 +883,10 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/disable_auto_compaction: name={}", req->param["name"]);
+        apilog.info("column_family/disable_auto_compaction: name={}", req->get_path_param("name"));
        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
            auto g = replica::database::autocompaction_toggle_guard(db);
-            return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
+            return foreach_column_family(ctx, req->get_path_param("name"), [](replica::column_family &cf) {
                return cf.disable_auto_compaction();
            }).then([g = std::move(g)] {
                return make_ready_future<json::json_return_type>(json_void());
@@ -895,14 +895,14 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_tombstone_gc.set(r, [&ctx] (const_req req) {
-        auto uuid = get_uuid(req.param["name"], ctx.db.local());
+        auto uuid = get_uuid(req.get_path_param("name"), ctx.db.local());
        replica::table& t = ctx.db.local().find_column_family(uuid);
        return t.tombstone_gc_enabled();
    });

    cf::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/enable_tombstone_gc: name={}", req->param["name"]);
-        return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
+        apilog.info("column_family/enable_tombstone_gc: name={}", req->get_path_param("name"));
+        return foreach_column_family(ctx, req->get_path_param("name"), [](replica::table& t) {
            t.set_tombstone_gc_enabled(true);
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
@@ -910,8 +910,8 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        apilog.info("column_family/disable_tombstone_gc: name={}", req->param["name"]);
-        return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
+        apilog.info("column_family/disable_tombstone_gc: name={}", req->get_path_param("name"));
+        return foreach_column_family(ctx, req->get_path_param("name"), [](replica::table& t) {
            t.set_tombstone_gc_enabled(false);
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
@@ -919,7 +919,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_built_indexes.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
-        auto ks_cf = parse_fully_qualified_cf_name(req->param["name"]);
+        auto ks_cf = parse_fully_qualified_cf_name(req->get_path_param("name"));
        auto&& ks = std::get<0>(ks_cf);
        auto&& cf_name = std::get<1>(ks_cf);
        return sys_ks.local().load_view_build_progress().then([ks, cf_name, &ctx](const std::vector<db::system_keyspace_view_build_progress>& vb) mutable {
@@ -957,7 +957,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto uuid = get_uuid(req->param["name"], ctx.db.local());
+        auto uuid = get_uuid(req->get_path_param("name"), ctx.db.local());

        return ctx.db.map_reduce(sum_ratio<double>(), [uuid](replica::database& db) {
            replica::column_family& cf = db.find_column_family(uuid);
@@ -968,21 +968,21 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
            return cf.get_stats().reads.histogram();
        });
    });

    cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
            return cf.get_stats().writes.histogram();
        });
    });

    cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<http::request> req) {
        sstring strategy = req->get_query_param("class_name");
-        apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->param["name"], strategy);
-        return foreach_column_family(ctx, req->param["name"], [strategy](replica::column_family& cf) {
+        apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->get_path_param("name"), strategy);
+        return foreach_column_family(ctx, req->get_path_param("name"), [strategy](replica::column_family& cf) {
            cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
        }).then([] {
                return make_ready_future<json::json_return_type>(json_void());
@@ -990,7 +990,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_compaction_strategy_class.set(r, [&ctx](const_req req) {
-        return ctx.db.local().find_column_family(get_uuid(req.param["name"], ctx.db.local())).get_compaction_strategy().name();
+        return ctx.db.local().find_column_family(get_uuid(req.get_path_param("name"), ctx.db.local())).get_compaction_strategy().name();
    });

    cf::set_compression_parameters.set(r, [](std::unique_ptr<http::request> req) {
@@ -1006,7 +1006,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
    });

    cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        return map_reduce_cf_raw(ctx, req->param["name"], std::vector<uint64_t>(), [](const replica::column_family& cf) {
+        return map_reduce_cf_raw(ctx, req->get_path_param("name"), std::vector<uint64_t>(), [](const replica::column_family& cf) {
            return cf.sstable_count_per_level();
        }, concat_sstable_count_per_level).then([](const std::vector<uint64_t>& res) {
            return make_ready_future<json::json_return_type>(res);
@@ -1015,7 +1015,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace

    cf::get_sstables_for_key.set(r, [&ctx](std::unique_ptr<http::request> req) {
        auto key = req->get_query_param("key");
-        auto uuid = get_uuid(req->param["name"], ctx.db.local());
+        auto uuid = get_uuid(req->get_path_param("name"), ctx.db.local());

        return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
            auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
@@ -1031,7 +1031,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace


    cf::toppartitions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        auto name = req->param["name"];
+        auto name = req->get_path_param("name");
        auto [ks, cf] = parse_fully_qualified_cf_name(name);

        api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
@@ -1058,7 +1058,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
        }
        auto [ks, cf] = parse_fully_qualified_cf_name(*params.get("name"));
        auto flush = params.get_as<bool>("flush_memtables").value_or(true);
-        apilog.info("column_family/force_major_compaction: name={} flush={}", req->param["name"], flush);
+        apilog.info("column_family/force_major_compaction: name={} flush={}", req->get_path_param("name"), flush);

        auto keyspace = validate_keyspace(ctx, ks);
        std::vector<table_info> table_infos = {table_info{
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -7,6 +7,7 @@
 */

 #include <seastar/core/coroutine.hh>
+#include <seastar/coroutine/exception.hh>

 #include "compaction_manager.hh"
 #include "compaction/compaction_manager.hh"
@@ -109,7 +110,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    });

    cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto ks_name = validate_keyspace(ctx, req->param);
+        auto ks_name = validate_keyspace(ctx, req);
        auto table_names = parse_tables(ks_name, ctx, req->query_parameters, "tables");
        if (table_names.empty()) {
            table_names = map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
@@ -152,10 +153,13 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    });

    cm::get_compaction_history.set(r, [&ctx] (std::unique_ptr<http::request> req) {
-        std::function<future<>(output_stream<char>&&)> f = [&ctx](output_stream<char>&& s) {
-            return do_with(output_stream<char>(std::move(s)), true, [&ctx] (output_stream<char>& s, bool& first){
-                return s.write("[").then([&ctx, &s, &first] {
-                    return ctx.db.local().get_compaction_manager().get_compaction_history([&s, &first](const db::compaction_history_entry& entry) mutable {
+        std::function<future<>(output_stream<char>&&)> f = [&ctx] (output_stream<char>&& out) -> future<> {
+            auto s = std::move(out);
+            bool first = true;
+            std::exception_ptr ex;
+            try {
+                co_await s.write("[");
+                co_await ctx.db.local().get_compaction_manager().get_compaction_history([&s, &first](const db::compaction_history_entry& entry) mutable -> future<> {
                        cm::history h;
                        h.id = entry.id.to_sstring();
                        h.ks = std::move(entry.ks);
@@ -169,18 +173,21 @@ void set_compaction_manager(http_context& ctx, routes& r) {
                            e.value = it.second;
                            h.rows_merged.push(std::move(e));
                        }
-                        auto fut = first ? make_ready_future<>() : s.write(", ");
+                        if (!first) {
+                            co_await s.write(", ");
+                        }
                        first = false;
-                        return fut.then([&s, h = std::move(h)] {
-                            return formatter::write(s, h);
-                        });
-                    }).then([&s] {
-                        return s.write("]").then([&s] {
-                            return s.close();
-                        });
+                        co_await formatter::write(s, h);
                    });
-                });
-            });
+                co_await s.write("]");
+                co_await s.flush();
+            } catch (...) {
+                ex = std::current_exception();
+            }
+            co_await s.close();
+            if (ex) {
+                co_await coroutine::return_exception_ptr(std::move(ex));
+            }
        };
        return make_ready_future<json::json_return_type>(std::move(f));
    });
--- a/api/config.cc
+++ b/api/config.cc
@@ -91,7 +91,7 @@ void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx
    });

    cs::find_config_id.set(r, [&cfg] (const_req r) {
-        auto id = r.param["id"];
+        auto id = r.get_path_param("id");
        for (auto&& cfg_ref : cfg.values()) {
            auto&& cfg = cfg_ref.get();
            if (id == cfg.name()) {
--- a/api/error_injection.cc
+++ b/api/error_injection.cc
@@ -24,7 +24,7 @@ namespace hf = httpd::error_injection_json;
 void set_error_injection(http_context& ctx, routes& r) {

    hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
-        sstring injection = req->param["injection"];
+        sstring injection = req->get_path_param("injection");
        bool one_shot = req->get_query_param("one_shot") == "True";
        auto params = req->content;

@@ -56,7 +56,7 @@ void set_error_injection(http_context& ctx, routes& r) {
    });

    hf::disable_injection.set(r, [](std::unique_ptr<request> req) {
-        sstring injection = req->param["injection"];
+        sstring injection = req->get_path_param("injection");

        auto& errinj = utils::get_local_injector();
        return errinj.disable_on_all(injection).then([] {
@@ -72,7 +72,7 @@ void set_error_injection(http_context& ctx, routes& r) {
    });

    hf::message_injection.set(r, [](std::unique_ptr<request> req) {
-        sstring injection = req->param["injection"];
+        sstring injection = req->get_path_param("injection");
        auto& errinj = utils::get_local_injector();
        return errinj.receive_message_on_all(injection).then([] {
            return make_ready_future<json::json_return_type>(json::json_void());
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -80,9 +80,9 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {

    fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
        return g.container().invoke_on(0, [req = std::move(req)] (gms::gossiper& g) {
-            auto state = g.get_endpoint_state_ptr(gms::inet_address(req->param["addr"]));
+            auto state = g.get_endpoint_state_ptr(gms::inet_address(req->get_path_param("addr")));
            if (!state) {
-                return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
+                return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->get_path_param("addr")));
            }
            std::stringstream ss;
            g.append_endpoint_state(ss, *state);
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -31,21 +31,21 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
    });

    httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
-        gms::inet_address ep(req->param["addr"]);
+        gms::inet_address ep(req->get_path_param("addr"));
        // synchronize unreachable_members on all shards
        co_await g.get_unreachable_members_synchronized();
        co_return g.get_endpoint_downtime(ep);
    });

    httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
-        gms::inet_address ep(req->param["addr"]);
+        gms::inet_address ep(req->get_path_param("addr"));
        return g.get_current_generation_number(ep).then([] (gms::generation_type res) {
            return make_ready_future<json::json_return_type>(res.value());
        });
    });

    httpd::gossiper_json::get_current_heart_beat_version.set(r, [&g] (std::unique_ptr<http::request> req) {
-        gms::inet_address ep(req->param["addr"]);
+        gms::inet_address ep(req->get_path_param("addr"));
        return g.get_current_heart_beat_version(ep).then([] (gms::version_type res) {
            return make_ready_future<json::json_return_type>(res.value());
        });
@@ -53,17 +53,17 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {

    httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
        if (req->get_query_param("unsafe") != "True") {
-            return g.assassinate_endpoint(req->param["addr"]).then([] {
+            return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
                return make_ready_future<json::json_return_type>(json_void());
            });
        }
-        return g.unsafe_assassinate_endpoint(req->param["addr"]).then([] {
+        return g.unsafe_assassinate_endpoint(req->get_path_param("addr")).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
-        gms::inet_address ep(req->param["addr"]);
+        gms::inet_address ep(req->get_path_param("addr"));
        return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
--- a/api/raft.cc
+++ b/api/raft.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2024-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#include <seastar/core/coroutine.hh>
+
+#include "api/api.hh"
+#include "api/api-doc/raft.json.hh"
+
+#include "service/raft/raft_group_registry.hh"
+
+using namespace seastar::httpd;
+
+extern logging::logger apilog;
+
+namespace api {
+
+namespace r = httpd::raft_json;
+using namespace json;
+
+void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr) {
+    r::trigger_snapshot.set(r, [&raft_gr] (std::unique_ptr<http::request> req) -> future<json_return_type> {
+        raft::group_id gid{utils::UUID{req->get_path_param("group_id")}};
+        auto timeout_dur = std::invoke([timeout_str = req->get_query_param("timeout")] {
+            if (timeout_str.empty()) {
+                return std::chrono::seconds{60};
+            }
+            auto dur = std::stoll(timeout_str);
+            if (dur <= 0) {
+                throw std::runtime_error{"Timeout must be a positive number."};
+            }
+            return std::chrono::seconds{dur};
+        });
+
+        std::atomic<bool> found_srv{false};
+        co_await raft_gr.invoke_on_all([gid, timeout_dur, &found_srv] (service::raft_group_registry& raft_gr) -> future<> {
+            auto* srv = raft_gr.find_server(gid);
+            if (!srv) {
+                co_return;
+            }
+
+            found_srv = true;
+            abort_on_expiry aoe(lowres_clock::now() + timeout_dur);
+            apilog.info("Triggering Raft group {} snapshot", gid);
+            auto result = co_await srv->trigger_snapshot(&aoe.abort_source());
+            if (result) {
+                apilog.info("New snapshot for Raft group {} created", gid);
+            } else {
+                apilog.info("Could not create new snapshot for Raft group {}, no new entries applied", gid);
+            }
+        });
+
+        if (!found_srv) {
+            throw std::runtime_error{fmt::format("Server for group ID {} not found", gid)};
+        }
+
+        co_return json_void{};
+    });
+}
+
+void unset_raft(http_context&, httpd::routes& r) {
+    r::trigger_snapshot.unset(r);
+}
+
+}
+
--- a/api/raft.hh
+++ b/api/raft.hh
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2023-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "api_init.hh"
+
+namespace api {
+
+void set_raft(http_context& ctx, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr);
+void unset_raft(http_context& ctx, httpd::routes& r);
+
+}
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -58,15 +58,19 @@ namespace ss = httpd::storage_service_json;
 namespace sp = httpd::storage_proxy_json;
 using namespace json;

-sstring validate_keyspace(http_context& ctx, sstring ks_name) {
+sstring validate_keyspace(const http_context& ctx, sstring ks_name) {
    if (ctx.db.local().has_keyspace(ks_name)) {
        return ks_name;
    }
    throw bad_param_exception(replica::no_such_keyspace(ks_name).what());
 }

-sstring validate_keyspace(http_context& ctx, const parameters& param) {
-    return validate_keyspace(ctx, param["keyspace"]);
+sstring validate_keyspace(const http_context& ctx, const std::unique_ptr<http::request>& req) {
+    return validate_keyspace(ctx, req->get_path_param("keyspace"));
+}
+
+sstring validate_keyspace(const http_context& ctx, const http::request& req) {
+    return validate_keyspace(ctx, req.get_path_param("keyspace"));
 }

 locator::host_id validate_host_id(const sstring& param) {
@@ -171,7 +175,7 @@ using ks_cf_func = std::function<future<json::json_return_type>(http_context&, s

 static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
    return [&ctx, f = std::move(f)](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
        return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
    };
@@ -338,7 +342,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
        // returns immediately, not waiting for the repair to finish. The user
        // then has other mechanisms to track the ongoing repair's progress,
        // or stop it.
-        return repair_start(repair, validate_keyspace(ctx, req->param),
+        return repair_start(repair, validate_keyspace(ctx, req),
                options_map).then([] (int i) {
                    return make_ready_future<json::json_return_type>(i);
                });
@@ -421,7 +425,7 @@ void unset_repair(http_context& ctx, routes& r) {

 void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader) {
    ss::load_new_ss_tables.set(r, [&ctx, &sst_loader](std::unique_ptr<http::request> req) {
-        auto ks = validate_keyspace(ctx, req->param);
+        auto ks = validate_keyspace(ctx, req);
        auto cf = req->get_query_param("cf");
        auto stream = req->get_query_param("load_and_stream");
        auto primary_replica = req->get_query_param("primary_replica_only");
@@ -452,8 +456,8 @@ void unset_sstables_loader(http_context& ctx, routes& r) {

 void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb) {
    ss::view_build_statuses.set(r, [&ctx, &vb] (std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto view = req->param["view"];
+        auto keyspace = validate_keyspace(ctx, req);
+        auto view = req->get_path_param("view");
        return vb.local().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
            std::vector<storage_service_json::mapper> res;
            return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
@@ -590,7 +594,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::get_range_to_endpoint_map.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        std::vector<ss::maplist_mapper> res;
        co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace),
                [](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
@@ -615,7 +619,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::get_pending_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        std::vector<ss::maplist_mapper> res;
        return make_ready_future<json::json_return_type>(res);
    });
@@ -631,7 +635,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::describe_ring.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) {
-        return describe_ring_as_json(ss, validate_keyspace(ctx, req->param));
+        return describe_ring_as_json(ss, validate_keyspace(ctx, req));
    });

    ss::get_host_id_map.set(r, [&ss](const_req req) {
@@ -664,7 +668,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::get_natural_endpoints.set(r, [&ctx, &ss](const_req req) {
-        auto keyspace = validate_keyspace(ctx, req.param);
+        auto keyspace = validate_keyspace(ctx, req);
        return container_to_vec(ss.local().get_natural_endpoints(keyspace, req.get_query_param("cf"),
                req.get_query_param("key")));
    });
@@ -733,7 +737,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto& db = ctx.db;
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
        apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
        if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
@@ -796,7 +800,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
        apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
        auto& db = ctx.db;
@@ -905,7 +909,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::truncate.set(r, [&ctx](std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto column_family = req->get_query_param("cf");
        return make_ready_future<json::json_return_type>(json_void());
    });
@@ -1039,14 +1043,14 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::bulk_load.set(r, [](std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        auto path = req->param["path"];
+        auto path = req->get_path_param("path");
        return make_ready_future<json::json_return_type>(json_void());
    });

    ss::bulk_load_async.set(r, [](std::unique_ptr<http::request> req) {
        //TBD
        unimplemented();
-        auto path = req->param["path"];
+        auto path = req->get_path_param("path");
        return make_ready_future<json::json_return_type>(json_void());
    });

@@ -1134,7 +1138,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

        apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
@@ -1142,7 +1146,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

        apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
@@ -1150,7 +1154,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

        apilog.info("enable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
@@ -1158,7 +1162,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
-        auto keyspace = validate_keyspace(ctx, req->param);
+        auto keyspace = validate_keyspace(ctx, req);
        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

        apilog.info("disable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
@@ -1254,7 +1258,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::get_effective_ownership.set(r, [&ctx, &ss] (std::unique_ptr<http::request> req) {
-        auto keyspace_name = req->param["keyspace"] == "null" ? "" : validate_keyspace(ctx, req->param);
+        auto keyspace_name = req->get_path_param("keyspace") == "null" ? "" : validate_keyspace(ctx, req);
        return ss.local().effective_ownership(keyspace_name).then([] (auto&& ownership) {
            std::vector<storage_service_json::mapper> res;
            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
@@ -1542,8 +1546,10 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
                        });
                    }).then([&s] {
                        return s.write("]").then([&s] {
-                            return s.close();
+                            return s.flush();
                        });
+                    }).finally([&s] {
+                        return s.close();
                    });
                });
            };
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -37,11 +37,11 @@ namespace api {

 // verify that the keyspace is found, otherwise a bad_param_exception exception is thrown
 // containing the description of the respective keyspace error.
-sstring validate_keyspace(http_context& ctx, sstring ks_name);
+sstring validate_keyspace(const http_context& ctx, sstring ks_name);

 // verify that the keyspace parameter is found, otherwise a bad_param_exception exception is thrown
 // containing the description of the respective keyspace error.
-sstring validate_keyspace(http_context& ctx, const httpd::parameters& param);
+sstring validate_keyspace(const http_context& ctx, const std::unique_ptr<http::request>& req);

 // splits a request parameter assumed to hold a comma-separated list of table names
 // verify that the tables are found, otherwise a bad_param_exception exception is thrown
--- a/api/stream_manager.cc
+++ b/api/stream_manager.cc
@@ -106,7 +106,7 @@ void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_
    });

    hs::get_total_incoming_bytes.set(r, [&sm](std::unique_ptr<request> req) {
-        gms::inet_address peer(req->param["peer"]);
+        gms::inet_address peer(req->get_path_param("peer"));
        return sm.map_reduce0([peer](streaming::stream_manager& sm) {
            return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
                return sbytes.bytes_received;
@@ -127,7 +127,7 @@ void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_
    });

    hs::get_total_outgoing_bytes.set(r, [&sm](std::unique_ptr<request> req) {
-        gms::inet_address peer(req->param["peer"]);
+        gms::inet_address peer(req->get_path_param("peer"));
        return sm.map_reduce0([peer] (streaming::stream_manager& sm) {
            return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
                return sbytes.bytes_sent;
--- a/api/system.cc
+++ b/api/system.cc
@@ -119,9 +119,9 @@ void set_system(http_context& ctx, routes& r) {

    hs::get_logger_level.set(r, [](const_req req) {
        try {
-            return logging::level_name(logging::logger_registry().get_logger_level(req.param["name"]));
+            return logging::level_name(logging::logger_registry().get_logger_level(req.get_path_param("name")));
        } catch (std::out_of_range& e) {
-            throw bad_param_exception("Unknown logger name " + req.param["name"]);
+            throw bad_param_exception("Unknown logger name " + req.get_path_param("name"));
        }
        // just to keep the compiler happy
        return sstring();
@@ -130,9 +130,9 @@ void set_system(http_context& ctx, routes& r) {
    hs::set_logger_level.set(r, [](const_req req) {
        try {
            logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
-            logging::logger_registry().set_logger_level(req.param["name"], level);
+            logging::logger_registry().set_logger_level(req.get_path_param("name"), level);
        } catch (std::out_of_range& e) {
-            throw bad_param_exception("Unknown logger name " + req.param["name"]);
+            throw bad_param_exception("Unknown logger name " + req.get_path_param("name"));
        } catch (boost::bad_lexical_cast& e) {
            throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
        }
--- a/api/task_manager.cc
+++ b/api/task_manager.cc
@@ -7,6 +7,7 @@
 */

 #include <seastar/core/coroutine.hh>
+#include <seastar/coroutine/exception.hh>

 #include "task_manager.hh"
 #include "api/api-doc/task_manager.json.hh"
@@ -124,7 +125,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
            chunked_stats local_res;
            tasks::task_manager::module_ptr module;
            try {
-                module = tm.find_module(req->param["module"]);
+                module = tm.find_module(req->get_path_param("module"));
            } catch (...) {
                throw bad_param_exception(fmt::format("{}", std::current_exception()));
            }
@@ -139,25 +140,34 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {

        std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
            auto s = std::move(os);
-            auto res = std::move(r);
-            co_await s.write("[");
-            std::string delim = "";
-            for (auto& v: res) {
-                for (auto& stats: v) {
-                    co_await s.write(std::exchange(delim, ", "));
-                    tm::task_stats ts;
-                    ts = stats;
-                    co_await formatter::write(s, ts);
+            std::exception_ptr ex;
+            try {
+                auto res = std::move(r);
+                co_await s.write("[");
+                std::string delim = "";
+                for (auto& v: res) {
+                    for (auto& stats: v) {
+                        co_await s.write(std::exchange(delim, ", "));
+                        tm::task_stats ts;
+                        ts = stats;
+                        co_await formatter::write(s, ts);
+                    }
                }
+                co_await s.write("]");
+                co_await s.flush();
+            } catch (...) {
+                ex = std::current_exception();
            }
-            co_await s.write("]");
            co_await s.close();
+            if (ex) {
+                co_await coroutine::return_exception_ptr(std::move(ex));
+            }
        };
        co_return std::move(f);
    });

    tm::get_task_status.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
+        auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
        tasks::task_manager::foreign_task_ptr task;
        try {
            task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
@@ -174,7 +184,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
    });

    tm::abort_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
+        auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
        try {
            co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
                if (!task->is_abortable()) {
@@ -189,7 +199,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
    });

    tm::wait_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
+        auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
        tasks::task_manager::foreign_task_ptr task;
        try {
            task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
@@ -210,7 +220,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {

    tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
        auto& _ctx = ctx;
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
+        auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
        std::queue<tasks::task_manager::foreign_task_ptr> q;
        utils::chunked_vector<full_task_status> res;

--- a/api/task_manager_test.cc
+++ b/api/task_manager_test.cc
@@ -83,7 +83,7 @@ void set_task_manager_test(http_context& ctx, routes& r) {
    });

    tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
-        auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
+        auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
        auto it = req->query_parameters.find("error");
        bool fail = it != req->query_parameters.end();
        std::string error = fail ? it->second : "";
--- a/bytes.hh
+++ b/bytes.hh
@@ -89,7 +89,7 @@ public:
        // get the delimeter if any
        auto it = ctx.begin();
        auto end = ctx.end();
-        if (it != end) {
+        if (it != end && *it != '}') {
            int group_size = *it++ - '0';
            if (group_size < 0 ||
                static_cast<size_t>(group_size) > sizeof(uint64_t)) {
--- a/cache_flat_mutation_reader.hh
+++ b/cache_flat_mutation_reader.hh
@@ -453,7 +453,10 @@ future<> cache_flat_mutation_reader::read_from_underlying() {
                                auto e = alloc_strategy_unique_ptr<rows_entry>(
                                    current_allocator().construct<rows_entry>(_ck_ranges_curr->start()->value()));
                                // Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
-                                auto insert_result = rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), std::move(e), cmp);
+                                auto insert_result = rows.insert_before_hint(
+                                        _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version() : rows.begin(),
+                                        std::move(e),
+                                        cmp);
                                if (insert_result.second) {
                                    auto it = insert_result.first;
                                    _snp->tracker()->insert(*it);
@@ -470,7 +473,10 @@ future<> cache_flat_mutation_reader::read_from_underlying() {
                                auto e = alloc_strategy_unique_ptr<rows_entry>(
                                    current_allocator().construct<rows_entry>(table_s, to_table_domain(_upper_bound), is_dummy::yes, is_continuous::no));
                                // Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
-                                auto insert_result = rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), std::move(e), cmp);
+                                auto insert_result = rows.insert_before_hint(
+                                        _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version() : rows.begin(),
+                                        std::move(e),
+                                        cmp);
                                if (insert_result.second) {
                                    clogger.trace("csm {}: L{}: inserted dummy at {}", fmt::ptr(this), __LINE__, _upper_bound);
                                    _snp->tracker()->insert(*insert_result.first);
@@ -631,7 +637,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
            current_allocator().construct<rows_entry>(table_schema(), cr.key(), cr.as_deletable_row()));
        new_entry->set_continuous(false);
        new_entry->set_range_tombstone(_current_tombstone);
-        auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
+        auto it = _next_row.iterators_valid() && _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version()
                                              : mp.clustered_rows().lower_bound(cr.key(), cmp);
        auto insert_result = mp.mutable_clustered_rows().insert_before_hint(it, std::move(new_entry), cmp);
        it = insert_result.first;
@@ -696,7 +702,7 @@ bool cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone_change

        auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
                current_allocator().construct<rows_entry>(table_schema(), to_table_domain(rtc.position()), is_dummy::yes, is_continuous::no));
-        auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
+        auto it = _next_row.iterators_valid() && _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version()
                                              : mp.clustered_rows().lower_bound(to_table_domain(rtc.position()), cmp);
        auto insert_result = mp.mutable_clustered_rows().insert_before_hint(it, std::move(new_entry), cmp);
        it = insert_result.first;
@@ -899,7 +905,10 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con
                    auto& rows = _snp->version()->partition().mutable_clustered_rows();
                    auto new_entry = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(table_schema(),
                            to_table_domain(_lower_bound), is_dummy::yes, is_continuous::no));
-                    return rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), std::move(new_entry), cmp);
+                    return rows.insert_before_hint(
+                            _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version() : rows.begin(),
+                            std::move(new_entry),
+                            cmp);
                });
                auto it = insert_result.first;
                if (insert_result.second) {
--- a/cdc/generation.cc
+++ b/cdc/generation.cc
@@ -51,8 +51,16 @@ namespace db {

 namespace cdc {

-extern const api::timestamp_clock::duration generation_leeway =
-    std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
+api::timestamp_clock::duration get_generation_leeway() {
+    static thread_local auto generation_leeway =
+            std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
+
+    utils::get_local_injector().inject("increase_cdc_generation_leeway", [&] {
+        generation_leeway = std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::minutes(5));
+    });
+
+    return generation_leeway;
+}

 static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
    i = net::hton(i);
@@ -372,7 +380,7 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli

    auto ts = db_clock::now();
    if (add_delay && ring_delay != 0ms) {
-        ts += 2 * ring_delay + duration_cast<milliseconds>(generation_leeway);
+        ts += 2 * ring_delay + duration_cast<milliseconds>(get_generation_leeway());
    }
    return ts;
 }
--- a/cdc/generation.hh
+++ b/cdc/generation.hh
@@ -46,6 +46,8 @@ namespace gms {

 namespace cdc {

+api::timestamp_clock::duration get_generation_leeway();
+
 class stream_id final {
    bytes _value;
 public:
--- a/cdc/metadata.cc
+++ b/cdc/metadata.cc
@@ -15,10 +15,6 @@

 extern logging::logger cdc_log;

-namespace cdc {
-    extern const api::timestamp_clock::duration generation_leeway;
-} // namespace cdc
-
 static api::timestamp_type to_ts(db_clock::time_point tp) {
    // This assumes that timestamp_clock and db_clock have the same epochs.
    return std::chrono::duration_cast<api::timestamp_clock::duration>(tp.time_since_epoch()).count();
@@ -73,7 +69,7 @@ bool cdc::metadata::streams_available() const {

 cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok) {
    auto now = api::new_timestamp();
-    if (ts > now + generation_leeway.count()) {
+    if (ts > now + get_generation_leeway().count()) {
        throw exceptions::invalid_request_exception(format(
                "cdc: attempted to get a stream \"from the future\" ({}; current server time: {})."
                " With CDC you cannot send writes with timestamps arbitrarily into the future, because we don't"
@@ -86,27 +82,43 @@ cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok)
        // Nothing protects us from that until we start using transactions for generation switching.
    }

-    auto it = gen_used_at(now);
-    if (it == _gens.end()) {
+    auto it = gen_used_at(now - get_generation_leeway().count());
+
+    if (it != _gens.end()) {
+        // Garbage-collect generations that will no longer be used.
+        it = _gens.erase(_gens.begin(), it);
+    }
+
+    if (ts <= now - get_generation_leeway().count()) {
+        // We reject the write if `ts <= now - generation_leeway` and the write is not to the current generation, which
+        // happens iff one of the following is true:
+        // - the write is to no generation,
+        // - the write is to a generation older than the generation under `it`,
+        // - the write is to the generation under `it` and that generation is not the current generation.
+        // Note that we cannot distinguish the first and second cases because we garbage-collect obsolete generations,
+        // but we can check if one of them takes place (`it == _gens.end() || ts < it->first`). These three conditions
+        // are sufficient. The write with `ts <= now - generation_leeway` cannot be to one of the generations following
+        // the generation under `it` because that generation was operating at `now - generation_leeway`.
+        bool is_previous_gen = it != _gens.end() && std::next(it) != _gens.end() && std::next(it)->first <= now;
+        if (it == _gens.end() || ts < it->first || is_previous_gen) {
+            throw exceptions::invalid_request_exception(format(
+                    "cdc: attempted to get a stream \"from the past\" ({}; current server time: {})."
+                    " With CDC you cannot send writes with timestamps too far into the past, because that would break"
+                    " consistency properties.\n"
+                    "We *do* allow sending writes into the near past, but our ability to do that is limited."
+                    " Are you using client-side timestamps? Make sure your clocks are well-synchronized"
+                    " with the database's clocks.", format_timestamp(ts), format_timestamp(now)));
+        }
+    }
+
+    it = _gens.begin();
+    if (it == _gens.end() || ts < it->first) {
        throw std::runtime_error(format(
-                "cdc::metadata::get_stream: could not find any CDC stream (current time: {})."
-                " Are we in the middle of a cluster upgrade?", format_timestamp(now)));
+                "cdc::metadata::get_stream: could not find any CDC stream for timestamp {}."
+                " Are we in the middle of a cluster upgrade?", format_timestamp(ts)));
    }

-    // Garbage-collect generations that will no longer be used.
-    it = _gens.erase(_gens.begin(), it);
-
-    if (it->first > ts) {
-        throw exceptions::invalid_request_exception(format(
-                "cdc: attempted to get a stream from an earlier generation than the currently used one."
-                " With CDC you cannot send writes with timestamps too far into the past, because that would break"
-                " consistency properties (write timestamp: {}, current generation started at: {})",
-                format_timestamp(ts), format_timestamp(it->first)));
-    }
-
-    // With `generation_leeway` we allow sending writes to the near future. It might happen
-    // that `ts` doesn't belong to the current generation ("current" according to our clock),
-    // but to the next generation. Adjust for this case:
+    // Find the generation operating at `ts`.
    {
        auto next_it = std::next(it);
        while (next_it != _gens.end() && next_it->first <= ts) {
@@ -147,8 +159,8 @@ bool cdc::metadata::known_or_obsolete(db_clock::time_point tp) const {
        ++it;
    }

-    // Check if some new generation has already superseded this one.
-    return it != _gens.end() && it->first <= api::new_timestamp();
+    // Check if the generation is obsolete.
+    return it != _gens.end() && it->first <= api::new_timestamp() - get_generation_leeway().count();
 }

 bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen) {
@@ -157,7 +169,7 @@ bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen)
    }

    auto now = api::new_timestamp();
-    auto it = gen_used_at(now);
+    auto it = gen_used_at(now - get_generation_leeway().count());

    if (it != _gens.end()) {
        // Garbage-collect generations that will no longer be used.
--- a/cdc/metadata.hh
+++ b/cdc/metadata.hh
@@ -42,7 +42,9 @@ class metadata final {

    container_t::const_iterator gen_used_at(api::timestamp_type ts) const;
 public:
-    /* Is a generation with the given timestamp already known or superseded by a newer generation? */
+    /* Is a generation with the given timestamp already known or obsolete? It is obsolete if and only if
+     * it is older than the generation operating at `now - get_generation_leeway()`.
+     */
    bool known_or_obsolete(db_clock::time_point) const;

    /* Are there streams available. I.e. valid for time == now. If this is false, any writes to 
@@ -54,8 +56,9 @@ public:
     *
     * If the provided timestamp is too far away "into the future" (where "now" is defined according to our local clock),
     * we reject the get_stream query. This is because the resulting stream might belong to a generation which we don't
-     * yet know about. The amount of leeway (how much "into the future" we allow `ts` to be) is defined
-     * by the `cdc::generation_leeway` constant.
+     * yet know about. Similarly, we reject queries to the previous generations if the timestamp is too far away "into
+     * the past". The amount of leeway (how much "into the future" or "into the past" we allow `ts` to be) is defined by
+     * `get_generation_leeway()`.
     */
    stream_id get_stream(api::timestamp_type ts, dht::token tok);

--- a/compaction/compaction.cc
+++ b/compaction/compaction.cc
@@ -144,12 +144,21 @@ std::ostream& operator<<(std::ostream& os, compaction_type_options::scrub::quara
 }

 static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
-        const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks) {
+        const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks,
+        const api::timestamp_type compacting_max_timestamp) {
    if (!table_s.tombstone_gc_enabled()) [[unlikely]] {
        return api::min_timestamp;
    }

-    auto timestamp = table_s.min_memtable_timestamp();
+    auto timestamp = api::max_timestamp;
+    auto memtable_min_timestamp = table_s.min_memtable_timestamp();
+    // Use memtable timestamp if it contains data older than the sstables being compacted,
+    // and if the memtable also contains the key we're calculating max purgeable timestamp for.
+    // First condition helps to not penalize the common scenario where memtable only contains
+    // newer data.
+    if (memtable_min_timestamp <= compacting_max_timestamp && table_s.memtable_has_key(dk)) {
+        timestamp = memtable_min_timestamp;
+    }
    std::optional<utils::hashed_key> hk;
    for (auto&& sst : boost::range::join(selector.select(dk).sstables, table_s.compacted_undeleted_sstables())) {
        if (compacting_set.contains(sst)) {
@@ -441,7 +450,9 @@ protected:
    uint64_t _end_size = 0;
    // fully expired files, which are skipped, aren't taken into account.
    uint64_t _compacting_data_file_size = 0;
+    api::timestamp_type _compacting_max_timestamp = api::min_timestamp;
    uint64_t _estimated_partitions = 0;
+    double _estimated_droppable_tombstone_ratio = 0;
    uint64_t _bloom_filter_checks = 0;
    db::replay_position _rp;
    encoding_stats_collector _stats_collector;
@@ -470,6 +481,26 @@ private:
        cdata.compaction_fan_in = descriptor.fan_in();
        return cdata;
    }
+
+    // Called in a seastar thread
+    dht::partition_range_vector
+    get_ranges_for_invalidation(const std::vector<shared_sstable>& sstables) {
+        // If owned ranges is disengaged, it means no cleanup work was done and
+        // so nothing needs to be invalidated.
+        if (!_owned_ranges) {
+            return dht::partition_range_vector{};
+        }
+        auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
+
+        auto non_owned_ranges = boost::copy_range<dht::partition_range_vector>(sstables
+                | boost::adaptors::transformed([] (const shared_sstable& sst) {
+            seastar::thread::maybe_yield();
+            return dht::partition_range::make({sst->get_first_decorated_key(), true},
+                                              {sst->get_last_decorated_key(), true});
+        }));
+
+        return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
+    }
 protected:
    compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
        : _cdata(init_compaction_data(cdata, descriptor))
@@ -549,9 +580,10 @@ protected:
        return _stats_collector.get();
    }

-    virtual compaction_completion_desc
+    compaction_completion_desc
    get_compaction_completion_desc(std::vector<shared_sstable> input_sstables, std::vector<shared_sstable> output_sstables) {
-        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables)};
+        auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
+        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
    }

    // Tombstone expiration is enabled based on the presence of sstable set.
@@ -567,7 +599,8 @@ protected:
        sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
        cfg.run_identifier = gc_run;
        cfg.monitor = monitor.get();
-        auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats());
+        uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * _estimated_droppable_tombstone_ratio)));
+        auto writer = sst->get_writer(*schema(), estimated_partitions, cfg, get_encoding_stats());
        return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
    }

@@ -686,6 +719,7 @@ private:
        auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
        min_max_tracker<api::timestamp_type> timestamp_tracker;

+        double sum_of_estimated_droppable_tombstone_ratio = 0;
        _input_sstable_generations.reserve(_sstables.size());
        for (auto& sst : _sstables) {
            co_await coroutine::maybe_yield();
@@ -712,7 +746,10 @@ private:
            // for a better estimate for the number of partitions in the merged
            // sstable than just adding up the lengths of individual sstables.
            _estimated_partitions += sst->get_estimated_key_count();
+            auto gc_before = sst->get_gc_before_for_drop_estimation(gc_clock::now(), _table_s.get_tombstone_gc_state(), _schema);
+            sum_of_estimated_droppable_tombstone_ratio += sst->estimate_droppable_tombstone_ratio(gc_before);
            _compacting_data_file_size += sst->ondisk_data_size();
+
            // TODO:
            // Note that this is not fully correct. Since we might be merging sstables that originated on
            // another shard (#cpu changed), we might be comparing RP:s with differing shard ids,
@@ -721,12 +758,16 @@ private:
            // this is kind of ok, esp. since we will hopefully not be trying to recover based on
            // compacted sstables anyway (CL should be clean by then).
            _rp = std::max(_rp, sst_stats.position);
+
+            _compacting_max_timestamp = std::max(_compacting_max_timestamp, sst->get_stats_metadata().max_timestamp);
        }
        log_info("{} {}", report_start_desc(), formatted_msg);
        if (ssts->size() < _sstables.size()) {
            log_debug("{} out of {} input sstables are fully expired sstables that will not be actually compacted",
                      _sstables.size() - ssts->size(), _sstables.size());
        }
+        // _estimated_droppable_tombstone_ratio could exceed 1.0 in certain cases, so limit it to 1.0.
+        _estimated_droppable_tombstone_ratio = std::min(1.0, sum_of_estimated_droppable_tombstone_ratio / ssts->size());

        _compacting = std::move(ssts);

@@ -841,7 +882,7 @@ private:
            };
        }
        return [this] (const dht::decorated_key& dk) {
-            return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks);
+            return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp);
        };
    }

@@ -1248,28 +1289,6 @@ public:
 };

 class cleanup_compaction final : public regular_compaction {
-private:
-    // Called in a seastar thread
-    dht::partition_range_vector
-    get_ranges_for_invalidation(const std::vector<shared_sstable>& sstables) {
-        auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
-
-        auto non_owned_ranges = boost::copy_range<dht::partition_range_vector>(sstables
-                | boost::adaptors::transformed([] (const shared_sstable& sst) {
-            seastar::thread::maybe_yield();
-            return dht::partition_range::make({sst->get_first_decorated_key(), true},
-                                              {sst->get_last_decorated_key(), true});
-        }));
-
-        return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
-    }
-protected:
-    virtual compaction_completion_desc
-    get_compaction_completion_desc(std::vector<shared_sstable> input_sstables, std::vector<shared_sstable> output_sstables) override {
-        auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
-        return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
-    }
-
 public:
    cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
        : regular_compaction(table_s, std::move(descriptor), cdata)
--- a/compaction/compaction_manager.cc
+++ b/compaction/compaction_manager.cc
@@ -22,6 +22,7 @@
 #include "sstables/exceptions.hh"
 #include "sstables/sstable_directory.hh"
 #include "locator/abstract_replication_strategy.hh"
+#include "utils/error_injection.hh"
 #include "utils/fb_utilities.hh"
 #include "utils/UUID_gen.hh"
 #include "db/system_keyspace.hh"
@@ -1147,6 +1148,11 @@ protected:
    }

    virtual future<compaction_manager::compaction_stats_opt> do_run() override {
+        if (!is_system_keyspace(_status.keyspace)) {
+            co_await utils::get_local_injector().inject_with_handler("compaction_regular_compaction_task_executor_do_run",
+                [] (auto& handler) { return handler.wait_for_message(db::timeout_clock::now() + 10s); });
+        }
+
        co_await coroutine::switch_to(_cm.compaction_sg());

        for (;;) {
@@ -1321,13 +1327,20 @@ private:
                }));
        };

-        auto get_next_job = [&] () -> std::optional<sstables::compaction_descriptor> {
-            auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), sstables::reshape_mode::strict);
-            return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
+        auto get_next_job = [&] () -> future<std::optional<sstables::compaction_descriptor>> {
+            auto candidates = get_reshape_candidates();
+            if (candidates.empty()) {
+                co_return std::nullopt;
+            }
+            // all sstables added to maintenance set share the same underlying storage.
+            auto& storage = candidates.front()->get_storage();
+            sstables::reshape_config cfg = co_await sstables::make_reshape_config(storage, sstables::reshape_mode::strict);
+            auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), cfg);
+            co_return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
        };

        std::exception_ptr err;
-        while (auto desc = get_next_job()) {
+        while (auto desc = co_await get_next_job()) {
            auto compacting = compacting_sstable_registration(_cm, _cm.get_compaction_state(&t), desc->sstables);
            auto on_replace = compacting.update_on_sstable_replacement();

@@ -1845,6 +1858,9 @@ future<> compaction_manager::try_perform_cleanup(owned_ranges_ptr sorted_owned_r
    if (found_maintenance_sstables) {
        co_await perform_offstrategy(t, info);
    }
+    if (utils::get_local_injector().enter("major_compaction_before_cleanup")) {
+        co_await perform_major_compaction(t, info);
+    }

    // Called with compaction_disabled
    auto get_sstables = [this, &t] () -> future<std::vector<sstables::shared_sstable>> {
--- a/compaction/compaction_strategy.cc
+++ b/compaction/compaction_strategy.cc
@@ -75,7 +75,7 @@ reader_consumer_v2 compaction_strategy_impl::make_interposer_consumer(const muta
 }

 compaction_descriptor
-compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
+compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
    return compaction_descriptor();
 }

@@ -700,8 +700,8 @@ compaction_backlog_tracker compaction_strategy::make_backlog_tracker() const {
 }

 sstables::compaction_descriptor
-compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
-    return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, mode);
+compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
+    return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, cfg);
 }

 uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) const {
@@ -739,6 +739,13 @@ compaction_strategy make_compaction_strategy(compaction_strategy_type strategy,
    return compaction_strategy(std::move(impl));
 }

+future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode) {
+    co_return sstables::reshape_config{
+        .mode = mode,
+        .free_storage_space = co_await storage.free_space() / smp::count,
+    };
+}
+
 }

 namespace compaction {
--- a/compaction/compaction_strategy.hh
+++ b/compaction/compaction_strategy.hh
@@ -31,6 +31,7 @@ class sstable;
 class sstable_set;
 struct compaction_descriptor;
 struct resharding_descriptor;
+class storage;

 class compaction_strategy {
    ::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
@@ -122,11 +123,13 @@ public:
    //
    // The caller should also pass a maximum number of SSTables which is the maximum amount of
    // SSTables that can be added into a single job.
-    compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
+    compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;

 };

 // Creates a compaction_strategy object from one of the strategies available.
 compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map<sstring, sstring>& options);

+future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode);
+
 }
--- a/compaction/compaction_strategy_impl.hh
+++ b/compaction/compaction_strategy_impl.hh
@@ -76,6 +76,6 @@ public:
        return false;
    }

-    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
+    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
 };
 }
--- a/compaction/compaction_strategy_type.hh
+++ b/compaction/compaction_strategy_type.hh
@@ -8,6 +8,8 @@

 #pragma once

+#include <cstdint>
+
 namespace sstables {

 enum class compaction_strategy_type {
@@ -18,4 +20,10 @@ enum class compaction_strategy_type {
 };

 enum class reshape_mode { strict, relaxed };
+
+struct reshape_config {
+    reshape_mode mode;
+    const uint64_t free_storage_space;
+};
+
 }
--- a/compaction/leveled_compaction_strategy.cc
+++ b/compaction/leveled_compaction_strategy.cc
@@ -146,7 +146,8 @@ int64_t leveled_compaction_strategy::estimated_pending_compactions(table_state&
 }

 compaction_descriptor
-leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
+leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
+    auto mode = cfg.mode;
    std::array<std::vector<shared_sstable>, leveled_manifest::MAX_LEVELS> level_info;

    auto is_disjoint = [schema] (const std::vector<shared_sstable>& sstables, unsigned tolerance) -> std::tuple<bool, unsigned> {
@@ -203,7 +204,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input

    if (level_info[0].size() > offstrategy_threshold) {
        size_tiered_compaction_strategy stcs(_stcs_options);
-        return stcs.get_reshaping_job(std::move(level_info[0]), schema, mode);
+        return stcs.get_reshaping_job(std::move(level_info[0]), schema, cfg);
    }

    for (unsigned level = leveled_manifest::MAX_LEVELS - 1; level > 0; --level) {
--- a/compaction/leveled_compaction_strategy.hh
+++ b/compaction/leveled_compaction_strategy.hh
@@ -74,7 +74,7 @@ public:

    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;

-    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
+    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
 };

 }
--- a/compaction/size_tiered_compaction_strategy.cc
+++ b/compaction/size_tiered_compaction_strategy.cc
@@ -297,8 +297,9 @@ size_tiered_compaction_strategy::most_interesting_bucket(const std::vector<sstab
 }

 compaction_descriptor
-size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const
+size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const
 {
+    auto mode = cfg.mode;
    size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
    size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));

--- a/compaction/size_tiered_compaction_strategy.hh
+++ b/compaction/size_tiered_compaction_strategy.hh
@@ -96,7 +96,7 @@ public:

    virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;

-    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
+    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;

    friend class ::size_tiered_backlog_tracker;
 };
--- a/compaction/table_state.hh
+++ b/compaction/table_state.hh
@@ -48,6 +48,7 @@ public:
    virtual sstables::shared_sstable make_sstable() const = 0;
    virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
    virtual api::timestamp_type min_memtable_timestamp() const = 0;
+    virtual bool memtable_has_key(const dht::decorated_key& key) const = 0;
    virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
    virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
    virtual bool tombstone_gc_enabled() const noexcept = 0;
--- a/compaction/task_manager_module.cc
+++ b/compaction/task_manager_module.cc
@@ -555,7 +555,13 @@ future<> shard_reshaping_compaction_task_impl::run() {
                | boost::adaptors::filtered([&filter = _filter] (const auto& sst) {
            return filter(sst);
        }));
-        auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), _mode);
+        if (reshape_candidates.empty()) {
+            break;
+        }
+        // all sstables were found in the same sstable_directory instance, so they share the same underlying storage.
+        auto& storage = reshape_candidates.front()->get_storage();
+        auto cfg = co_await sstables::make_reshape_config(storage, _mode);
+        auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), cfg);
        if (desc.sstables.empty()) {
            break;
        }
--- a/compaction/task_manager_module.hh
+++ b/compaction/task_manager_module.hh
@@ -704,6 +704,10 @@ public:
    virtual std::string type() const override {
        return "regular compaction";
    }
+
+    virtual tasks::is_internal is_internal() const noexcept override {
+        return tasks::is_internal::yes;
+    }
 protected:
    virtual future<> run() override = 0;
 };
--- a/compaction/time_window_compaction_strategy.cc
+++ b/compaction/time_window_compaction_strategy.cc
@@ -223,12 +223,14 @@ reader_consumer_v2 time_window_compaction_strategy::make_interposer_consumer(con
 }

 compaction_descriptor
-time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
+time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
+    auto mode = cfg.mode;
    std::vector<shared_sstable> single_window;
    std::vector<shared_sstable> multi_window;

    size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
    size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
+    const uint64_t target_job_size = cfg.free_storage_space * reshape_target_space_overhead;

    if (mode == reshape_mode::relaxed) {
        offstrategy_threshold = max_sstables;
@@ -260,22 +262,40 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
            multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
            single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);

-    auto need_trimming = [max_sstables, schema, &is_disjoint] (const std::vector<shared_sstable>& ssts) {
-        // All sstables can be compacted at once if they're disjoint, given that partitioned set
-        // will incrementally open sstables which translates into bounded memory usage.
-        return ssts.size() > max_sstables && !is_disjoint(ssts);
+    auto get_job_size = [] (const std::vector<shared_sstable>& ssts) {
+        return boost::accumulate(ssts | boost::adaptors::transformed(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0));
+    };
+
+    // Targets a space overhead of 10%. All disjoint sstables can be compacted together as long as they won't
+    // cause an overhead above target. Otherwise, the job targets a maximum of #max_threshold sstables.
+    auto need_trimming = [&] (const std::vector<shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
+        const size_t min_sstables = 2;
+        auto is_above_target_size = job_size > target_job_size;
+
+        return (ssts.size() > max_sstables && !is_disjoint) ||
+               (ssts.size() > min_sstables && is_above_target_size);
+    };
+
+    auto maybe_trim_job = [&need_trimming] (std::vector<shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
+        while (need_trimming(ssts, job_size, is_disjoint)) {
+            auto sst = ssts.back();
+            ssts.pop_back();
+            job_size -= sst->bytes_on_disk();
+        }
    };

    if (!multi_window.empty()) {
+        auto disjoint = is_disjoint(multi_window);
+        auto job_size = get_job_size(multi_window);
        // Everything that spans multiple windows will need reshaping
-        if (need_trimming(multi_window)) {
+        if (need_trimming(multi_window, job_size, disjoint)) {
            // When trimming, let's keep sstables with overlapping time window, so as to reduce write amplification.
            // For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
            // in a single compaction round, removing the need to later compact W to reduce its number of files.
            boost::partial_sort(multi_window, multi_window.begin() + max_sstables, [](const shared_sstable &a, const shared_sstable &b) {
                return a->get_stats_metadata().max_timestamp < b->get_stats_metadata().max_timestamp;
            });
-            multi_window.resize(max_sstables);
+            maybe_trim_job(multi_window, job_size, disjoint);
        }
        compaction_descriptor desc(std::move(multi_window));
        desc.options = compaction_type_options::make_reshape();
@@ -294,15 +314,17 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
                std::copy(ssts.begin(), ssts.end(), std::back_inserter(single_window));
                continue;
            }
+
            // reuse STCS reshape logic which will only compact similar-sized files, to increase overall efficiency
            // when reshaping time buckets containing a huge amount of files
-            auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, mode);
+            auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, cfg);
            if (!desc.sstables.empty()) {
                return desc;
            }
        }
    }
    if (!single_window.empty()) {
+        maybe_trim_job(single_window, get_job_size(single_window), all_disjoint);
        compaction_descriptor desc(std::move(single_window));
        desc.options = compaction_type_options::make_reshape();
        return desc;
--- a/compaction/time_window_compaction_strategy.hh
+++ b/compaction/time_window_compaction_strategy.hh
@@ -78,6 +78,7 @@ public:
    // To prevent an explosion in the number of sstables we cap it.
    // Better co-locate some windows into the same sstables than OOM.
    static constexpr uint64_t max_data_segregation_window_count = 100;
+    static constexpr float reshape_target_space_overhead = 0.1f;

    using bucket_t = std::vector<shared_sstable>;
    enum class bucket_compaction_mode { none, size_tiered, major };
@@ -170,7 +171,7 @@ public:
        return true;
    }

-    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
+    virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
 };

 }
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -572,7 +572,7 @@ murmur3_partitioner_ignore_msb_bits: 12
 force_schema_commit_log: true

 # Time for which task manager task is kept in memory after it completes.
-task_ttl_in_seconds: 10
+# task_ttl_in_seconds: 0

 # Use Raft to consistently manage schema information in the cluster.
 # Refer to https://docs.scylladb.com/master/architecture/raft.html for more details.
--- a/configure.py
+++ b/configure.py
@@ -852,6 +852,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/rjson.cc',
                'utils/human_readable.cc',
                'utils/histogram_metrics_helper.cc',
+                'utils/on_internal_error.cc',
                'utils/pretty_printers.cc',
                'converting_mutation_partition_applier.cc',
                'readers/combined.cc',
@@ -1126,6 +1127,7 @@ scylla_core = (['message/messaging_service.cc',
                'utils/lister.cc',
                'repair/repair.cc',
                'repair/row_level.cc',
+                'repair/table_check.cc',
                'exceptions/exceptions.cc',
                'auth/allow_all_authenticator.cc',
                'auth/allow_all_authorizer.cc',
@@ -1240,6 +1242,8 @@ api = ['api/api.cc',
       Json2Code('api/api-doc/error_injection.json'),
       'api/authorization_cache.cc',
       Json2Code('api/api-doc/authorization_cache.json'),
+       'api/raft.cc',
+       Json2Code('api/api-doc/raft.json'),
       ]

 alternator = [
@@ -1451,7 +1455,7 @@ deps['test/boost/bytes_ostream_test'] = [
    "test/lib/log.cc",
 ]
 deps['test/boost/input_stream_test'] = ['test/boost/input_stream_test.cc']
-deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc', 'utils/uuid.cc', 'utils/dynamic_bitset.cc', 'utils/hashers.cc']
+deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc', 'utils/uuid.cc', 'utils/dynamic_bitset.cc', 'utils/hashers.cc', 'utils/on_internal_error.cc']
 deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
 deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
 deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -338,6 +338,9 @@ functions::get(data_dictionary::database db,
        if (!receiver_cf.has_value()) {
            throw exceptions::invalid_request_exception("functions::get for token doesn't have a known column family");
        }
+        if (schema == nullptr) {
+            throw exceptions::invalid_request_exception(format("functions::get for token cannot find {} table", *receiver_cf));
+        }
        auto fun = ::make_shared<token_fct>(schema);
        validate_types(db, keyspace, schema.get(), fun, provided_args, receiver_ks, receiver_cf);
        return fun;
--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -815,7 +815,7 @@ bool query_processor::has_more_results(cql3::internal_query_state& state) const

 future<> query_processor::for_each_cql_result(
        cql3::internal_query_state& state,
-         noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)>&& f) {
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
    do {
        auto msg = co_await execute_paged_internal(state);
        for (auto& row : *msg) {
@@ -1065,6 +1065,9 @@ void query_processor::migration_subscriber::on_update_aggregate(const sstring& k
 void query_processor::migration_subscriber::on_update_view(
        const sstring& ks_name,
        const sstring& view_name, bool columns_changed) {
+    // scylladb/scylladb#16392 - Materialized views are also tables so we need at least handle
+    // them as such when changed.
+    on_update_column_family(ks_name, view_name, columns_changed);
 }

 void query_processor::migration_subscriber::on_update_tablet_metadata() {
@@ -1113,14 +1116,14 @@ future<> query_processor::query_internal(
        db::consistency_level cl,
        const std::initializer_list<data_value>& values,
        int32_t page_size,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
    auto query_state = create_paged_state(query_string, cl, values, page_size);
    co_return co_await for_each_cql_result(query_state, std::move(f));
 }

 future<> query_processor::query_internal(
        const sstring& query_string,
-        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
+        noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
    return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
 }

--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -307,7 +307,7 @@ public:
            db::consistency_level cl,
            const std::initializer_list<data_value>& values,
            int32_t page_size,
-            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
+            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);

    /*
     * \brief iterate over all cql results using paging
@@ -322,7 +322,7 @@ public:
     */
    future<> query_internal(
            const sstring& query_string,
-            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
+            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);

    class cache_internal_tag;
    using cache_internal = bool_class<cache_internal_tag>;
@@ -479,7 +479,7 @@ private:
     */
    future<> for_each_cql_result(
            cql3::internal_query_state& state,
-             noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
+            noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);

    /*!
     * \brief check, based on the state if there are additional results
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -541,22 +541,32 @@ std::pair<std::optional<secondary_index::index>, expr::expression> statement_res
    int chosen_index_score = 0;
    expr::expression chosen_index_restrictions = expr::conjunction({});

-    for (const auto& index : sim.list_indexes()) {
-        auto cdef = _schema->get_column_definition(to_bytes(index.target_column()));
-        for (const expr::expression& restriction : index_restrictions()) {
-            if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) {
-                continue;
-            }
-
-            expr::single_column_restrictions_map rmap = expr::get_single_column_restrictions_map(restriction);
-            const auto found = rmap.find(cdef);
-            if (found != rmap.end() && is_supported_by(found->second, index)
-                && score(index) > chosen_index_score) {
-                chosen_index = index;
-                chosen_index_score = score(index);
-                chosen_index_restrictions = restriction;
-            }
+    // Several indexes may be usable for this query. When their score is tied,
+    // let's pick one by order of the columns mentioned in the restriction
+    // expression. This specific order isn't important (and maybe in the
+    // future we could plan a better order based on the specificity of each
+    // index), but it is critical that two coordinators - or the same
+    // coordinator over time - must choose the same index for the same query.
+    // Otherwise, paging can break (see issue #7969).
+    for (const expr::expression& restriction : index_restrictions()) {
+        if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) {
+            continue;
        }
+        expr::for_each_expression<expr::column_value>(restriction, [&](const expr::column_value& cval) {
+            auto& cdef = cval.col;
+            expr::expression col_restrictions = expr::conjunction {
+                .children = expr::extract_single_column_restrictions_for_column(restriction, *cdef)
+            };
+            for (const auto& index : sim.list_indexes()) {
+                if (cdef->name_as_text() == index.target_column() &&
+                        expr::is_supported_by(col_restrictions, index) &&
+                        score(index) > chosen_index_score) {
+                    chosen_index = index;
+                    chosen_index_score = score(index);
+                    chosen_index_restrictions = restriction;
+                }
+            }
+        });
    }
    return {chosen_index, chosen_index_restrictions};
 }
@@ -1132,13 +1142,14 @@ bool starts_before_start(
    const auto len1 = r1.start()->value().representation().size();
    const auto len2 = r2.start()->value().representation().size();
    if (len1 == len2) { // The values truly are equal.
+        // (a)>=(1) starts before (a)>(1)
        return r1.start()->is_inclusive() && !r2.start()->is_inclusive();
    } else if (len1 < len2) { // r1 start is a prefix of r2 start.
        // (a)>=(1) starts before (a,b)>=(1,1), but (a)>(1) doesn't.
        return r1.start()->is_inclusive();
    } else { // r2 start is a prefix of r1 start.
        // (a,b)>=(1,1) starts before (a)>(1) but after (a)>=(1).
-        return r2.start()->is_inclusive();
+        return !r2.start()->is_inclusive();
    }
 }

@@ -1163,6 +1174,7 @@ bool starts_before_or_at_end(
    const auto len1 = r1.start()->value().representation().size();
    const auto len2 = r2.end()->value().representation().size();
    if (len1 == len2) { // The values truly are equal.
+        // (a)>=(1) starts at end of (a)<=(1)
        return r1.start()->is_inclusive() && r2.end()->is_inclusive();
    } else if (len1 < len2) { // r1 start is a prefix of r2 end.
        // a>=(1) starts before (a,b)<=(1,1) ends, but (a)>(1) doesn't.
@@ -1194,6 +1206,7 @@ bool ends_before_end(
    const auto len1 = r1.end()->value().representation().size();
    const auto len2 = r2.end()->value().representation().size();
    if (len1 == len2) { // The values truly are equal.
+        // (a)<(1) ends before (a)<=(1) ends
        return !r1.end()->is_inclusive() && r2.end()->is_inclusive();
    } else if (len1 < len2) { // r1 end is a prefix of r2 end.
        // (a)<(1) ends before (a,b)<=(1,1), but (a)<=(1) doesn't.
@@ -1209,7 +1222,10 @@ std::optional<query::clustering_range> intersection(
        const query::clustering_range& r1,
        const query::clustering_range& r2,
        const clustering_key_prefix::prefix_equal_tri_compare& cmp) {
-    // Assume r1's start is to the left of r2's start.
+    // If needed, swap r1 and r2 so that r1's start is to the left of r2's
+    // start. Note that to avoid infinite recursion (#18688) the function
+    // starts_before_start() must never return true for both (r1,r2) and
+    // (r2,r1) - in other words, it must be a *strict* partial order.
    if (starts_before_start(r2, r1, cmp)) {
        return intersection(r2, r1, cmp);
    }
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -2004,7 +2004,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
            )
            && !restrictions->need_filtering()  // No filtering
            && group_by_cell_indices->empty()   // No GROUP BY
-            && db.get_config().enable_parallelized_aggregation();
+            && db.get_config().enable_parallelized_aggregation()
+            && !( // Do not parallelize the request if it's single partition read
+                restrictions->partition_key_restrictions_is_all_eq() 
+                && restrictions->partition_key_restrictions_size() == schema->partition_key_size());
    };

    if (_parameters->is_prune_materialized_view()) {
--- a/cql3/type_json.cc
+++ b/cql3/type_json.cc
@@ -151,13 +151,15 @@ static bytes from_json_object_aux(const map_type_impl& t, const rjson::value& va
    std::map<bytes, bytes, serialized_compare> raw_map(t.get_keys_type()->as_less_comparator());
    for (auto it = value.MemberBegin(); it != value.MemberEnd(); ++it) {
        bytes value = from_json_object(*t.get_values_type(), it->value);
-        if (t.get_keys_type()->underlying_type() == ascii_type ||
-            t.get_keys_type()->underlying_type() == utf8_type) {
+        // For all native (non-collection, non-tuple) key types, they are
+        // represented as a string in JSON. For more elaborate types, they
+        // can also be a string representation of another JSON type, which
+        // needs to be reparsed as JSON. For example,
+        // map<frozen<list<int>>, int> will be represented as:
+        // { "[1, 3, 6]": 3, "[]": 0, "[1, 2]": 2 }
+        if (t.get_keys_type()->underlying_type()->is_native()) {
            raw_map.emplace(from_json_object(*t.get_keys_type(), it->name), std::move(value));
        } else {
-            // Keys in maps can only be strings in JSON, but they can also be a string representation
-            // of another JSON type, which needs to be reparsed. Example - map<frozen<list<int>>, int>
-            // will be represented like this: { "[1, 3, 6]": 3, "[]": 0, "[1, 2]": 2 }
            try {
                rjson::value map_key = rjson::parse(rjson::to_string_view(it->name));
                raw_map.emplace(from_json_object(*t.get_keys_type(), map_key), std::move(value));
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -135,7 +135,7 @@ future<> db::batchlog_manager::stop() {
 }

 future<size_t> db::batchlog_manager::count_all_batches() const {
-    sstring query = format("SELECT count(*) FROM {}.{}", system_keyspace::NAME, system_keyspace::BATCHLOG);
+    sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
    return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
       return size_t(rs->one().get_as<int64_t>("count"));
    });
@@ -154,26 +154,26 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
    auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
    auto limiter = make_lw_shared<utils::rate_limiter>(throttle);

-    auto batch = [this, limiter](const cql3::untyped_result_set::row& row) {
+    auto batch = [this, limiter](const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
        auto written_at = row.get_as<db_clock::time_point>("written_at");
        auto id = row.get_as<utils::UUID>("id");
        // enough time for the actual write + batchlog entry mutation delivery (two separate requests).
        auto timeout = get_batch_log_timeout();
        if (db_clock::now() < written_at + timeout) {
            blogger.debug("Skipping replay of {}, too fresh", id);
-            return make_ready_future<>();
+            return make_ready_future<stop_iteration>(stop_iteration::no);
        }

        // check version of serialization format
        if (!row.has("version")) {
            blogger.warn("Skipping logged batch because of unknown version");
-            return make_ready_future<>();
+            return make_ready_future<stop_iteration>(stop_iteration::no);
        }

        auto version = row.get_as<int32_t>("version");
        if (version != netw::messaging_service::current_version) {
            blogger.warn("Skipping logged batch because of incorrect version");
-            return make_ready_future<>();
+            return make_ready_future<stop_iteration>(stop_iteration::no);
        }

        auto data = row.get_blob("data");
@@ -255,49 +255,20 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
            auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
            m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
            return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
-        });
+        }).then([] { return make_ready_future<stop_iteration>(stop_iteration::no); });
    };

-    return seastar::with_gate(_gate, [this, batch = std::move(batch)] {
+    return seastar::with_gate(_gate, [this, batch = std::move(batch)] () mutable {
        blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
-
-        typedef ::shared_ptr<cql3::untyped_result_set> page_ptr;
-        sstring query = format("SELECT id, data, written_at, version FROM {}.{} LIMIT {:d}", system_keyspace::NAME, system_keyspace::BATCHLOG, page_size);
-        return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([this, batch = std::move(batch)](page_ptr page) {
-            return do_with(std::move(page), [this, batch = std::move(batch)](page_ptr & page) mutable {
-                return repeat([this, &page, batch = std::move(batch)]() mutable {
-                    if (page->empty()) {
-                        return make_ready_future<stop_iteration>(stop_iteration::yes);
-                    }
-                    auto id = page->back().get_as<utils::UUID>("id");
-                    return parallel_for_each(*page, batch).then([this, &page, id]() {
-                        if (page->size() < page_size) {
-                            return make_ready_future<stop_iteration>(stop_iteration::yes); // we've exhausted the batchlog, next query would be empty.
-                        }
-                        sstring query = format("SELECT id, data, written_at, version FROM {}.{} WHERE token(id) > token(?) LIMIT {:d}",
-                                system_keyspace::NAME,
-                                system_keyspace::BATCHLOG,
-                                page_size);
-                        return _qp.execute_internal(query, {id}, cql3::query_processor::cache_internal::yes).then([&page](auto res) {
-                                    page = std::move(res);
-                                    return make_ready_future<stop_iteration>(stop_iteration::no);
-                                });
-                    });
-                });
-            });
-        }).then([] {
-        // TODO FIXME : cleanup()
-#if 0
-            ColumnFamilyStore cfs = Keyspace.open(SystemKeyspace.NAME).getColumnFamilyStore(SystemKeyspace.BATCHLOG);
-            cfs.forceBlockingFlush();
-            Collection<Descriptor> descriptors = new ArrayList<>();
-            for (SSTableReader sstr : cfs.getSSTables())
-            descriptors.add(sstr.descriptor);
-            if (!descriptors.isEmpty()) // don't pollute the logs if there is nothing to compact.
-            CompactionManager.instance.submitUserDefined(cfs, descriptors, Integer.MAX_VALUE).get();
-
-#endif
-
+        return _qp.query_internal(
+                format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
+                db::consistency_level::ONE,
+                {},
+                page_size,
+                std::move(batch)).then([this] {
+            // Replaying batches could have generated tombstones, flush to disk,
+            // where they can be compacted away.
+            return replica::database::flush_table_on_all_shards(_qp.proxy().get_db(), system_keyspace::NAME, system_keyspace::BATCHLOG);
        }).then([] {
            blogger.debug("Finished replayAllFailedBatches");
        });
--- a/db/config.cc
+++ b/db/config.cc
@@ -489,6 +489,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "Adjusts the sensitivity of the failure detector on an exponential scale. Generally this setting never needs adjusting.\n"
        "Related information: Failure detection and recovery")
    , failure_detector_timeout_in_ms(this, "failure_detector_timeout_in_ms", liveness::LiveUpdate, value_status::Used, 20 * 1000, "Maximum time between two successful echo message before gossip mark a node down in milliseconds.\n")
+    , direct_failure_detector_ping_timeout_in_ms(this, "direct_failure_detector_ping_timeout_in_ms", value_status::Used, 600, "Duration after which the direct failure detector aborts a ping message, so the next ping can start.\n"
+        "Note: this failure detector is used by Raft, and is different from gossiper's failure detector (configured by `failure_detector_timeout_in_ms`).\n")
    /**
    * @Group Performance tuning properties
    * @GroupDescription Tuning performance and system resource utilization, including commit log, compaction, memory, disk I/O, CPU, reads, and writes.
@@ -678,6 +680,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
        "The maximum number of tombstones a query can scan before aborting.")
    , query_tombstone_page_limit(this, "query_tombstone_page_limit", liveness::LiveUpdate, value_status::Used, 10000,
        "The number of tombstones after which a query cuts a page, even if not full or even empty.")
+    , query_page_size_in_bytes(this, "query_page_size_in_bytes", liveness::LiveUpdate, value_status::Used, 1 << 20,
+        "The size of pages in bytes, after a page accumulates this much data, the page is cut and sent to the client."
+        " Setting a too large value increases the risk of OOM.")
    /**
    * @Group Network timeout settings
    */
@@ -926,6 +931,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
    , allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild")
    , enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
+    , repair_partition_count_estimation_ratio(this, "repair_partition_count_estimation_ratio", liveness::LiveUpdate, value_status::Used, 0.1,
+        "Specify the fraction of partitions written by repair out of the total partitions. The value is currently only used for bloom filter estimation. Value is between 0 and 1.")
    , ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
    , shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
    , fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")
@@ -944,6 +951,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
    , unspooled_dirty_soft_limit(this, "unspooled_dirty_soft_limit", value_status::Used, 0.6, "Soft limit of unspooled dirty memory expressed as a portion of the hard limit")
    , sstable_summary_ratio(this, "sstable_summary_ratio", value_status::Used, 0.0005, "Enforces that 1 byte of summary is written for every N (2000 by default) "
        "bytes written to data file. Value must be between 0 and 1.")
+    , components_memory_reclaim_threshold(this, "components_memory_reclaim_threshold", liveness::LiveUpdate, value_status::Used, .2, "Ratio of available memory for all in-memory components of SSTables in a shard beyond which the memory will be reclaimed from components until it falls back under the threshold. Currently, this limit is only enforced for bloom filters.")
    , large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, size_t(1) << 20, "Warn about memory allocations above this size; set to zero to disable")
    , enable_deprecated_partitioners(this, "enable_deprecated_partitioners", value_status::Used, false, "Enable the byteordered and random partitioners. These partitioners are deprecated and will be removed in a future version.")
    , enable_keyspace_column_family_metrics(this, "enable_keyspace_column_family_metrics", value_status::Used, false, "Enable per keyspace and per column family metrics reporting")
@@ -983,6 +991,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
            "Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
    , reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
            "Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
+    , reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 1,
+            "Admit new reads while there are less than this number of requests that need CPU.")
    , twcs_max_window_count(this, "twcs_max_window_count", liveness::LiveUpdate, value_status::Used, 50,
            "The maximum number of compaction windows allowed when making use of TimeWindowCompactionStrategy. A setting of 0 effectively disables the restriction.")
    , initial_sstable_loading_concurrency(this, "initial_sstable_loading_concurrency", value_status::Used, 4u,
--- a/db/config.hh
+++ b/db/config.hh
@@ -196,6 +196,7 @@ public:
    named_value<bool> snapshot_before_compaction;
    named_value<uint32_t> phi_convict_threshold;
    named_value<uint32_t> failure_detector_timeout_in_ms;
+    named_value<uint32_t> direct_failure_detector_ping_timeout_in_ms;
    named_value<sstring> commitlog_sync;
    named_value<uint32_t> commitlog_segment_size_in_mb;
    named_value<uint32_t> schema_commitlog_segment_size_in_mb;
@@ -254,6 +255,7 @@ public:
    named_value<uint32_t> tombstone_warn_threshold;
    named_value<uint32_t> tombstone_failure_threshold;
    named_value<uint64_t> query_tombstone_page_limit;
+    named_value<uint64_t> query_page_size_in_bytes;
    named_value<uint32_t> range_request_timeout_in_ms;
    named_value<uint32_t> read_request_timeout_in_ms;
    named_value<uint32_t> counter_write_request_timeout_in_ms;
@@ -329,6 +331,7 @@ public:
    named_value<bool> enable_repair_based_node_ops;
    named_value<sstring> allowed_repair_based_node_ops;
    named_value<bool> enable_compacting_data_for_streaming_and_repair;
+    named_value<double> repair_partition_count_estimation_ratio;
    named_value<uint32_t> ring_delay_ms;
    named_value<uint32_t> shadow_round_ms;
    named_value<uint32_t> fd_max_interval_ms;
@@ -346,6 +349,7 @@ public:
    named_value<unsigned> murmur3_partitioner_ignore_msb_bits;
    named_value<double> unspooled_dirty_soft_limit;
    named_value<double> sstable_summary_ratio;
+    named_value<double> components_memory_reclaim_threshold;
    named_value<size_t> large_memory_allocation_warning_threshold;
    named_value<bool> enable_deprecated_partitioners;
    named_value<bool> enable_keyspace_column_family_metrics;
@@ -369,6 +373,7 @@ public:
    named_value<uint64_t> max_memory_for_unlimited_query_hard_limit;
    named_value<uint32_t> reader_concurrency_semaphore_serialize_limit_multiplier;
    named_value<uint32_t> reader_concurrency_semaphore_kill_limit_multiplier;
+    named_value<uint32_t> reader_concurrency_semaphore_cpu_concurrency;
    named_value<uint32_t> twcs_max_window_count;
    named_value<unsigned> initial_sstable_loading_concurrency;
    named_value<bool> enable_3_1_0_compatibility_mode;
--- a/db/large_data_handler.cc
+++ b/db/large_data_handler.cc
@@ -155,7 +155,7 @@ future<> cql_table_large_data_handler::try_record(std::string_view large_table,
    const auto sstable_name = large_data_handler::sst_filename(sst);
    std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
    auto timestamp = db_clock::now();
-    large_data_logger.warn("Writing large {} {}/{}: {}{} ({} bytes) to {}", desc, ks_name, cf_name, pk_str, extra_path, size, sstable_name);
+    large_data_logger.warn("Writing large {} {}/{}: {} ({} bytes) to {}", desc, ks_name, cf_name, extra_path, size, sstable_name);
    return _sys_ks->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
            .discard_result()
            .handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
@@ -182,10 +182,10 @@ future<> cql_table_large_data_handler::internal_record_large_cells(const sstable
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("/{}/{}", ck_str, column_name), extra_fields, ck_str, column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name);
    } else {
        auto desc = format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, format("//{}", column_name), extra_fields, data_value::make_null(utf8_type), column_name);
+        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
    }
 }

@@ -197,10 +197,10 @@ future<> cql_table_large_data_handler::internal_record_large_cells_and_collectio
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        auto ck_str = key_to_str(*clustering_key, s);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("/{}/{}", ck_str, column_name), extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
    } else {
        auto desc = format("static {}", cell_type);
-        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, format("//{}", column_name), extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
+        return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
    }
 }

@@ -210,7 +210,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
    if (clustering_key) {
        const schema &s = *sst.get_schema();
        std::string ck_str = key_to_str(*clustering_key, s);
-        return try_record("row", sst, partition_key, int64_t(row_size), "row", format("/{}", ck_str), extra_fields,  ck_str);
+        return try_record("row", sst, partition_key, int64_t(row_size), "row", "", extra_fields, ck_str);
    } else {
        return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
    }
--- a/db/paxos_grace_seconds_extension.hh
+++ b/db/paxos_grace_seconds_extension.hh
@@ -55,6 +55,10 @@ public:
        return ser::serialize_to_buffer<bytes>(_paxos_gc_sec);
    }

+    std::string options_to_string() const override {
+        return std::to_string(_paxos_gc_sec);
+    }
+
    static int32_t deserialize(const bytes_view& buffer) {
        return ser::deserialize_from_buffer(buffer, boost::type<int32_t>());
    }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -973,7 +973,7 @@ future<> merge_schema(sharded<db::system_keyspace>& sys_ks, distributed<service:
    if (this_shard_id() != 0) {
        // mutations must be applied on the owning shard (0).
        co_await smp::submit_to(0, [&, fmuts = freeze(mutations)] () mutable -> future<> {
-            return merge_schema(sys_ks, proxy, feat, unfreeze(fmuts));
+            return merge_schema(sys_ks, proxy, feat, unfreeze(fmuts), reload);
        });
        co_return;
    }
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -493,37 +493,56 @@ mutation_partition& view_updates::partition_for(partition_key&& key) {
 }

 size_t view_updates::op_count() const {
-    return _op_count++;;
+    return _op_count;
 }

 row_marker view_updates::compute_row_marker(const clustering_or_static_row& base_row) const {
    /*
-     * We need to compute both the timestamp and expiration.
+     * We need to compute both the timestamp and expiration for view rows.
     *
-     * There are 3 cases:
-     *   1) There is a column that is not in the base PK but is in the view PK. In that case, as long as that column
-     *      lives, the view entry does too, but as soon as it expires (or is deleted for that matter) the entry also
-     *      should expire. So the expiration for the view is the one of that column, regardless of any other expiration.
-     *      To take an example of that case, if you have:
-     *        CREATE TABLE t (a int, b int, c int, PRIMARY KEY (a, b))
-     *        CREATE MATERIALIZED VIEW mv AS SELECT * FROM t WHERE c IS NOT NULL AND a IS NOT NULL AND b IS NOT NULL PRIMARY KEY (c, a, b)
-     *        INSERT INTO t(a, b) VALUES (0, 0) USING TTL 3;
-     *        UPDATE t SET c = 0 WHERE a = 0 AND b = 0;
-     *      then even after 3 seconds elapsed, the row will still exist (it just won't have a "row marker" anymore) and so
-     *      the MV should still have a corresponding entry.
-     *      This cell determines the liveness of the view row.
-     *   2) The columns for the base and view PKs are exactly the same, and all base columns are selected by the view.
-     *      In that case, all components (marker, deletion and cells) are the same and trivially mapped.
-     *   3) The columns for the base and view PKs are exactly the same, but some base columns are not selected in the view.
-     *      Use the max timestamp out of the base row marker and all the unselected columns - this ensures we can keep the
-     *      view row alive. Do the same thing for the expiration, if the marker is dead or will expire, and so
-     *      will all unselected columns.
+     * Below there are several distinct cases depending on how many new key
+     * columns the view has - i.e., how many of the view's key columns were
+     * regular columns in the base. base_regular_columns_in_view_pk.size():
+     *
+     * Zero new key columns:
+     *     The view rows key is composed only from base key columns, and those
+     *     cannot be changed in an update, so the view row remains alive as
+     *     long as the base row is alive. We need to return the same row
+     *     marker as the base for the view - to keep an empty view row alive
+    *      for as long as an empty base row exists.
+     *     Note that in this case, if there are *unselected* base columns, we
+     *     may need to keep an empty view row alive even without a row marker
+     *     because the base row (which has additional columns) is still alive.
+     *     For that we have the "virtual columns" feature: In the zero new
+     *     key columns case, we put unselected columns in the view as empty
+     *     columns, to keep the view row alive.
+     *
+     * One new key column:
+     *     In this case, there is a regular base column that is part of the
+     *     view key. This regular column can be added or deleted in an update,
+     *     or its expiration be set, and those can cause the view row -
+     *     including its row marker - to need to appear or disappear as well.
+     *     So the liveness of cell of this one column determines the liveness
+     *     of the view row and the row marker that we return.
+     *
+     * Two or more new key columns:
+     *     This case is explicitly NOT supported in CQL - one cannot create a
+     *     view with more than one base-regular columns in its key. In general
+     *     picking one liveness (timestamp and expiration) is not possible
+     *     if there are multiple regular base columns in the view key, as
+     *     those can have different liveness.
+     *     However, we do allow this case for Alternator - we need to allow
+     *     the case of two (but not more) because the DynamoDB API allows
+     *     creating a GSI whose two key columns (hash and range key) were
+     *     regular columns.
+     *     We can support this case in Alternator because it doesn't use
+     *     expiration (the "TTL" it does support is different), and doesn't
+     *     support user-defined timestamps. But, the two columns can still
+     *     have different timestamps - this happens if an update modifies
+     *     just one of them. In this case the timestamp of the view update
+     *     (and that of the row marker we return) is the later of these two
+     *     updated columns.
     */
-
-    // WARNING: The code assumes that if multiple regular base columns are present in the view key,
-    // they share liveness information. It's true especially in the only case currently allowed by CQL,
-    // which assumes there's up to one non-pk column in the view key. It's also true in alternator,
-    // which does not carry TTL information.
    const auto& col_ids = base_row.is_clustering_row()
            ? _base_info->base_regular_columns_in_view_pk()
            : _base_info->base_static_columns_in_view_pk();
@@ -531,7 +550,20 @@ row_marker view_updates::compute_row_marker(const clustering_or_static_row& base
        auto& def = _base->column_at(base_row.column_kind(), col_ids[0]);
        // Note: multi-cell columns can't be part of the primary key.
        auto cell = base_row.cells().cell_at(col_ids[0]).as_atomic_cell(def);
-        return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp());
+        auto ts = cell.timestamp();
+        if (col_ids.size() > 1){
+            // As explained above, this case only happens in Alternator,
+            // and we may need to pick a higher ts:
+            auto& second_def = _base->column_at(base_row.column_kind(), col_ids[1]);
+            auto second_cell = base_row.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
+            auto second_ts = second_cell.timestamp();
+            ts = std::max(ts, second_ts);
+            // Alternator isn't supposed to have TTL or more than two col_ids!
+            if (col_ids.size() != 2 || cell.is_live_and_has_ttl() || second_cell.is_live_and_has_ttl()) [[unlikely]] {
+                utils::on_internal_error(format("Unexpected col_ids length {} or has TTL", col_ids.size()));
+            }
+        }
+        return cell.is_live_and_has_ttl() ? row_marker(ts, cell.ttl(), cell.expiry()) : row_marker(ts);
    }

    return base_row.marker();
@@ -930,8 +962,22 @@ void view_updates::do_delete_old_entry(const partition_key& base_key, const clus
            // Note: multi-cell columns can't be part of the primary key.
            auto& def = _base->column_at(kind, col_ids[0]);
            auto cell = existing.cells().cell_at(col_ids[0]).as_atomic_cell(def);
+            auto ts = cell.timestamp();
+            if (col_ids.size() > 1) {
+                // This is the Alternator-only support for two regular base
+                // columns that become view key columns. See explanation in
+                // view_updates::compute_row_marker().
+                auto& second_def = _base->column_at(kind, col_ids[1]);
+                auto second_cell = existing.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
+                auto second_ts = second_cell.timestamp();
+                ts = std::max(ts, second_ts);
+                // Alternator isn't supposed to have more than two col_ids!
+                if (col_ids.size() != 2) [[unlikely]] {
+                    utils::on_internal_error(format("Unexpected col_ids length {}", col_ids.size()));
+                }
+            }
            if (cell.is_live()) {
-                r->apply(shadowable_tombstone(cell.timestamp(), now));
+                r->apply(shadowable_tombstone(ts, now));
            }
        } else {
            // "update" caused the base row to have been deleted, and !col_id
@@ -1316,11 +1362,12 @@ void view_update_builder::generate_update(static_row&& update, const tombstone&

 future<stop_iteration> view_update_builder::on_results() {
    constexpr size_t max_rows_for_view_updates = 100;
-    size_t rows_for_view_updates = std::accumulate(_view_updates.begin(), _view_updates.end(), 0, [] (size_t acc, const view_updates& vu) {
-        return acc + vu.op_count();
-    });
-    const bool stop_updates = rows_for_view_updates >= max_rows_for_view_updates;
-
+    auto should_stop_updates = [this] () -> bool {
+        size_t rows_for_view_updates = std::accumulate(_view_updates.begin(), _view_updates.end(), 0, [] (size_t acc, const view_updates& vu) {
+            return acc + vu.op_count();
+        });
+        return rows_for_view_updates >= max_rows_for_view_updates;
+    };
    if (_update && !_update->is_end_of_partition() && _existing && !_existing->is_end_of_partition()) {
        auto cmp = position_in_partition::tri_compare(*_schema)(_update->position(), _existing->position());
        if (cmp < 0) {
@@ -1343,7 +1390,7 @@ future<stop_iteration> view_update_builder::on_results() {
                              : std::nullopt;
                generate_update(std::move(update), _update_partition_tombstone, std::move(existing), _existing_partition_tombstone);
            }
-            return stop_updates ? stop() : advance_updates();
+            return should_stop_updates() ? stop() : advance_updates();
        }
        if (cmp > 0) {
            // We have something existing but no update (which will happen either because it's a range tombstone marker in
@@ -1379,7 +1426,7 @@ future<stop_iteration> view_update_builder::on_results() {
                    generate_update(std::move(update), _update_partition_tombstone, { std::move(existing) }, _existing_partition_tombstone);
                }
            }
-            return stop_updates ? stop () : advance_existings();
+            return should_stop_updates() ? stop () : advance_existings();
        }
        // We're updating a row that had pre-existing data
        if (_update->is_range_tombstone_change()) {
@@ -1401,8 +1448,9 @@ future<stop_iteration> view_update_builder::on_results() {
                                                  mutation_fragment_v2::printer(*_schema, *_update), mutation_fragment_v2::printer(*_schema, *_existing)));
            }
            generate_update(std::move(*_update).as_static_row(), _update_partition_tombstone, { std::move(*_existing).as_static_row() }, _existing_partition_tombstone);
+
        }
-        return stop_updates ? stop() : advance_all();
+        return should_stop_updates() ? stop() : advance_all();
    }

    auto tombstone = std::max(_update_partition_tombstone, _update_current_tombstone);
@@ -1417,7 +1465,7 @@ future<stop_iteration> view_update_builder::on_results() {
            auto update = static_row();
            generate_update(std::move(update), _update_partition_tombstone, { std::move(existing) }, _existing_partition_tombstone);
        }
-        return stop_updates ? stop() : advance_existings();
+        return should_stop_updates() ? stop() : advance_existings();
    }

    // If we have updates and it's a range tombstone, it removes nothing pre-exisiting, so we can ignore it
@@ -1438,7 +1486,7 @@ future<stop_iteration> view_update_builder::on_results() {
                          : std::nullopt;
            generate_update(std::move(*_update).as_static_row(), _update_partition_tombstone, std::move(existing), _existing_partition_tombstone);
        }
-        return stop_updates ? stop() : advance_updates();
+        return should_stop_updates() ? stop() : advance_updates();
    }

    return stop();
@@ -1619,6 +1667,13 @@ static bool should_update_synchronously(const schema& s) {
    return *tag_opt == "true";
 }

+size_t memory_usage_of(const frozen_mutation_and_schema& mut) {
+    // Overhead of sending a view mutation, in terms of data structures used by the storage_proxy, as well as possible background tasks
+    // allocated for a remote view update.
+    constexpr size_t base_overhead_bytes = 2288;
+    return base_overhead_bytes + mut.fm.representation().size();
+}
+
 // Take the view mutations generated by generate_view_updates(), which pertain
 // to a modification of a single base partition, and apply them to the
 // appropriate paired replicas. This is done asynchronously - we do not wait
@@ -1643,7 +1698,7 @@ future<> view_update_generator::mutate_MV(
        bool network_topology = dynamic_cast<const locator::network_topology_strategy*>(&ks.get_replication_strategy());
        auto target_endpoint = get_view_natural_endpoint(ermp, network_topology, base_token, view_token);
        auto remote_endpoints = ermp->get_pending_endpoints(view_token);
-        auto sem_units = pending_view_updates.split(mut.fm.representation().size());
+        auto sem_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_updates.split(memory_usage_of(mut)));

        const bool update_synchronously = should_update_synchronously(*mut.s);
        if (update_synchronously) {
@@ -1691,7 +1746,7 @@ future<> view_update_generator::mutate_MV(
                    mut.s->ks_name(), mut.s->cf_name(), base_token, view_token);
            local_view_update = _proxy.local().mutate_mv_locally(mut.s, *mut_ptr, tr_state, db::commitlog::force_sync::no).then_wrapped(
                    [s = mut.s, &stats, &cf_stats, tr_state, base_token, view_token, my_address, mut_ptr = std::move(mut_ptr),
-                            units = sem_units.split(sem_units.count())] (future<>&& f) {
+                            sem_units] (future<>&& f) {
                --stats.writes;
                if (f.failed()) {
                    ++stats.view_updates_failed_local;
@@ -1728,7 +1783,7 @@ future<> view_update_generator::mutate_MV(
            schema_ptr s = mut.s;
            future<> view_update = apply_to_remote_endpoints(_proxy.local(), std::move(ermp), *target_endpoint, std::move(remote_endpoints), std::move(mut), base_token, view_token, allow_hints, tr_state).then_wrapped(
                    [s = std::move(s), &stats, &cf_stats, tr_state, base_token, view_token, target_endpoint, updates_pushed_remote,
-                            units = sem_units.split(sem_units.count()), apply_update_synchronously] (future<>&& f) mutable {
+                            sem_units, apply_update_synchronously] (future<>&& f) mutable {
                if (f.failed()) {
                    stats.view_updates_failed_remote += updates_pushed_remote;
                    cf_stats.total_view_updates_failed_remote += updates_pushed_remote;
@@ -2255,7 +2310,7 @@ future<> view_builder::do_build_step() {
            }
        }
    }).handle_exception([] (std::exception_ptr ex) {
-        vlogger.warn("Unexcepted error executing build step: {}. Ignored.", std::current_exception());
+        vlogger.warn("Unexcepted error executing build step: {}. Ignored.", ex);
    });
 }

--- a/db/view/view.hh
+++ b/db/view/view.hh
@@ -209,7 +209,7 @@ class view_updates final {
    schema_ptr _base;
    base_info_ptr _base_info;
    std::unordered_map<partition_key, mutation_partition, partition_key::hashing, partition_key::equality> _updates;
-    mutable size_t _op_count = 0;
+    size_t _op_count = 0;
    const bool _backing_secondary_index;
 public:
    explicit view_updates(view_and_base vab, bool backing_secondary_index)
@@ -318,6 +318,8 @@ future<query::clustering_row_ranges> calculate_affected_clustering_ranges(

 bool needs_static_row(const mutation_partition& mp, const std::vector<view_and_base>& views);

+size_t memory_usage_of(const frozen_mutation_and_schema& mut);
+
 /**
 * create_virtual_column() adds a "virtual column" to a schema builder.
 * The definition of a "virtual column" is based on the given definition
--- a/db/view/view_update_generator.cc
+++ b/db/view/view_update_generator.cc
@@ -234,12 +234,12 @@ void view_update_generator::do_abort() noexcept {
    }

    vug_logger.info("Terminating background fiber");
-    _db.unplug_view_update_generator();
    _as.request_abort();
    _pending_sstables.signal();
 }

 future<> view_update_generator::stop() {
+    _db.unplug_view_update_generator();
    do_abort();
    return std::move(_started).then([this] {
        _registration_sem.broken();
--- a/direct_failure_detector/failure_detector.cc
+++ b/direct_failure_detector/failure_detector.cc
@@ -96,6 +96,7 @@ struct failure_detector::impl {
    clock& _clock;

    clock::interval_t _ping_period;
+    clock::interval_t _ping_timeout;

    // Number of workers on each shard.
    // We use this to decide where to create new workers (we pick a shard with the smallest number of workers).
@@ -138,7 +139,7 @@ struct failure_detector::impl {
    // The unregistering process requires cross-shard operations which we perform on this fiber.
    future<> _destroy_subscriptions = make_ready_future<>();

-    impl(failure_detector& parent, pinger&, clock&, clock::interval_t ping_period);
+    impl(failure_detector& parent, pinger&, clock&, clock::interval_t ping_period, clock::interval_t ping_timeout);
    ~impl();

    // Inform update_endpoint_fiber() about an added/removed endpoint.
@@ -174,12 +175,14 @@ struct failure_detector::impl {
    future<> mark(listener* l, pinger::endpoint_id ep, bool alive);
 };

-failure_detector::failure_detector(pinger& pinger, clock& clock, clock::interval_t ping_period)
-        : _impl(std::make_unique<impl>(*this, pinger, clock, ping_period))
+failure_detector::failure_detector(
+    pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout)
+        : _impl(std::make_unique<impl>(*this, pinger, clock, ping_period, ping_timeout))
 {}

-failure_detector::impl::impl(failure_detector& parent, pinger& pinger, clock& clock, clock::interval_t ping_period)
-        : _parent(parent), _pinger(pinger), _clock(clock), _ping_period(ping_period) {
+failure_detector::impl::impl(
+    failure_detector& parent, pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout)
+        : _parent(parent), _pinger(pinger), _clock(clock), _ping_period(ping_period), _ping_timeout(ping_timeout) {
    if (this_shard_id() != 0) {
        return;
    }
@@ -536,11 +539,9 @@ future<> endpoint_worker::ping_fiber() noexcept {
        auto start = clock.now();
        auto next_ping_start = start + _fd._ping_period;

-        // A ping should take significantly less time than _ping_period, but we give it a multiple of ping_period before it times out
-        // just in case of transient network partitions.
-        // However, if there's a listener that's going to timeout soon (before the ping returns), we abort the ping in order to handle
+        auto timeout = start + _fd._ping_timeout;
+        // If there's a listener that's going to timeout soon (before the ping returns), we abort the ping in order to handle
        // the listener (mark it as dead).
-        auto timeout = start + 3 * _fd._ping_period;
        for (auto& [threshold, l]: _fd._listeners_liveness) {
            if (l.endpoint_liveness[_id].alive && last_response + threshold < timeout) {
                timeout = last_response + threshold;
--- a/direct_failure_detector/failure_detector.hh
+++ b/direct_failure_detector/failure_detector.hh
@@ -120,14 +120,14 @@ public:

        // Every endpoint in the detected set will be periodically pinged every `ping_period`,
        // assuming that the pings return in a timely manner. A ping may take longer than `ping_period`
-        // before it's aborted (up to a certain multiple of `ping_period`), in which case the next ping
-        // will start immediately.
-        //
-        // `ping_period` should be chosen so that during normal operation, a ping takes significantly
-        // less time than `ping_period` (preferably at least an order of magnitude less).
+        // before it's aborted (up to `ping_timeout`), in which case the next ping will start immediately.
        //
        // The passed-in value must be the same on every shard.
-        clock::interval_t ping_period
+        clock::interval_t ping_period,
+
+        // Duration after which a ping is aborted, so that next ping can be started
+        // (pings are sent sequentially).
+        clock::interval_t ping_timeout
    );

    ~failure_detector();
@@ -147,7 +147,7 @@ public:
    // The listener stops being called when the returned subscription is destroyed.
    // The subscription must be destroyed before service is stopped.
    //
-    // `threshold` should be significantly larger than `ping_period`, preferably at least an order of magnitude larger.
+    // `threshold` should be significantly larger than `ping_timeout`, preferably at least an order of magnitude larger.
    //
    // Different listeners may use different thresholds, depending on the use case:
    // some listeners may want to mark endpoints as dead more aggressively if fast reaction times are important
--- a/dist/common/scripts/scylla_io_setup
+++ b/dist/common/scripts/scylla_io_setup
@@ -10,6 +10,7 @@
 import os
 import re
 from scylla_util import *
+import resource
 import subprocess
 import argparse
 import yaml
@@ -102,6 +103,34 @@ class scylla_cpuinfo:
        else:
            return len(self._cpu_data["system"])

+def configure_iotune_open_fd_limit(shards_count):
+    try:
+        fd_limits = resource.getrlimit(resource.RLIMIT_NOFILE)
+    except (OSError, ValueError) as e:
+        logging.warning("Could not get the limit of count of open file descriptors!")
+        logging.warning("iotune will proceed with the default limit. This may cause problems.")
+        return
+
+    precalculated_fds_count = (10 * shards_count) + 500
+    soft_limit, hard_limit = fd_limits
+
+    if hard_limit == resource.RLIM_INFINITY:
+        # If there is no hard limit, then ensure that soft limit allows enough FDs.
+        soft_limit = max(soft_limit, precalculated_fds_count)
+    else:
+        # If hard_limit is greater than precalculated_fds_count, then set it as soft and as hard limit.
+        required_fds_count = max(hard_limit, precalculated_fds_count)
+        soft_limit = max(soft_limit, required_fds_count)
+        hard_limit = max(hard_limit, required_fds_count)
+
+    try:
+        resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+    except (OSError, ValueError) as e:
+        logging.error(e)
+        logging.error("Could not set the limit of open file descriptors for iotune!")
+        logging.error(f"Required FDs count: {precalculated_fds_count}, default limit: {fd_limits}!")
+        sys.exit(1)
+
 def run_iotune():
            if "SCYLLA_CONF" in os.environ:
                conf_dir = os.environ["SCYLLA_CONF"]
@@ -142,6 +171,8 @@ def run_iotune():
            elif cpudata.smp():
                iotune_args += [ "--smp", str(cpudata.smp()) ]

+            configure_iotune_open_fd_limit(cpudata.nr_shards())
+
            try:
                subprocess.check_call([bindir() + "/iotune",
                                       "--format", "envfile",
--- a/dist/docker/debian/build_docker.sh
+++ b/dist/docker/debian/build_docker.sh
@@ -77,7 +77,7 @@ run apt-get -y upgrade
 run apt-get -y install dialog apt-utils
 run bash -ec "echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections"
 run bash -ec "rm -rf /etc/rsyslog.conf"
-run apt-get -y install hostname supervisor openjdk-11-jre-headless python2 python3 python3-yaml curl rsyslog sudo
+run apt-get -y install hostname supervisor openjdk-11-jre-headless python2 python3 python3-yaml curl rsyslog sudo systemd
 run bash -ec "echo LANG=C.UTF-8 > /etc/default/locale"
 run bash -ec "dpkg -i packages/*.deb"
 run apt-get -y clean all
--- a/docs/_ext/scylladb_include_flag.py
+++ b/docs/_ext/scylladb_include_flag.py
@@ -0,0 +1,25 @@
+from sphinx.directives.other import Include
+from docutils.parsers.rst import directives
+
+class IncludeFlagDirective(Include):
+    option_spec = Include.option_spec.copy()
+    option_spec['base_path'] = directives.unchanged
+
+    def run(self):
+        env = self.state.document.settings.env
+        base_path = self.options.get('base_path', '_common')
+
+        if env.app.tags.has('enterprise'):
+            self.arguments[0] = base_path + "_enterprise/" + self.arguments[0]
+        else:
+            self.arguments[0] = base_path + "/" + self.arguments[0]
+        return super().run()
+
+def setup(app):
+    app.add_directive('scylladb_include_flag', IncludeFlagDirective, override=True)
+
+    return {
+        "version": "0.1",
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
--- a/docs/_utils/redirects.yaml
+++ b/docs/_utils/redirects.yaml
@@ -1,6 +1,14 @@
 ### a dictionary of redirections
 #old path: new path

+# Moving pages from the install-scylla folder
+
+/stable/getting-started/install-scylla/scylla-web-installer.html: /stable/getting-started/installation-common/scylla-web-installer.html
+/stable/getting-started/install-scylla/unified-installer.html: /stable/getting-started/installation-common/unified-installer.html
+/stable/getting-started/install-scylla/air-gapped-install.html: /stable/getting-started/installation-common/air-gapped-install.html
+/stable/getting-started/install-scylla/disable-housekeeping.html: /stable/getting-started/installation-common/disable-housekeeping.html
+/stable/getting-started/install-scylla/dev-mod.html: /stable/getting-started/installation-common/dev-mod.html
+/stable/getting-started/install-scylla/config-commands.html: /stable/getting-started/config-commands.html

 # Removed the outdated upgrade guides

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -39,7 +39,8 @@ extensions = [
    "recommonmark",  # optional
    "sphinxcontrib.datatemplates",
    "scylladb_cc_properties",
-    "scylladb_aws_images"
+    "scylladb_aws_images",
+    "scylladb_include_flag"
 ]

 # The suffix(es) of source filenames.
--- a/docs/cql/dml.rst
+++ b/docs/cql/dml.rst
@@ -20,7 +20,7 @@ sections common to data updating statements.
 Update parameters
 ~~~~~~~~~~~~~~~~~

-The ``UPDATE``, ``INSERT`` (and ``DELETE`` and ``BATCH`` for the ``TIMESTAMP``) statements support the following
+The ``UPDATE``, ``INSERT`` (and ``DELETE`` and ``BATCH`` for the ``TIMESTAMP`` and ``TIMEOUT``) statements support the following
 parameters:

 - ``TIMESTAMP``: sets the timestamp for the operation. If not specified, the coordinator will use the current time, in
--- a/docs/dev/cdc.md
+++ b/docs/dev/cdc.md
@@ -198,11 +198,27 @@ We're not able to prevent a node learning about a new generation too late due to

 After committing the generation ID, the topology coordinator publishes the generation data to user-facing description tables (`system_distributed.cdc_streams_descriptions_v2` and `system_distributed.cdc_generation_timestamps`).

-#### Generation switching: other notes
+#### Generation switching: accepting writes

-Due to the need of maintaining colocation we don't allow the client to send writes with arbitrary timestamps.
-Suppose that a write is requested and the write coordinator's local clock has time `C` and the generation operating at time `C` has timestamp `T` (`T <= C`). Then we only allow the write if its timestamp is in the interval [`T`, `C + generation_leeway`), where `generation_leeway` is a small time-inteval constant (e.g. 5 seconds).
-Reason: we cannot allow writes before `T`, because they belong to the old generation whose token ranges might no longer refine the current vnodes, so the corresponding log write would not necessarily be colocated with the base write. We also cannot allow writes too far "into the future" because we don't know what generation will be operating at that time (the node which will introduce this generation might not have joined yet). But, as mentioned before, we assume that we'll learn about the next generation in time. Again --- the need for this assumption will be gone in a future patch.
+Due to the need of maintaining colocation we don't allow the client to send writes with arbitrary timestamps. We allow:
+- writes to the current and next generations unless they are too far into the future,
+- writes to the previous generations unless they are too far into the past.
+
+##### Writes to the current and next generations
+
+Suppose that a write with timestamp `W` is requested and the write coordinator's local clock has time `C` and the generation operating at time `C` has timestamp `T` (`T <= C`) such that `T <= W`. Then we only allow the write if `W < C + generation_leeway`, where `generation_leeway` is a small time-interval constant (e.g. 5 seconds).
+
+We cannot allow writes too far "into the future" because we don't know what generation will be operating at that time (the node which will introduce this generation might not have joined yet). But, as mentioned before, we assume that we'll learn about the next generation in time. Again --- the need for this assumption will be gone in a future patch.
+
+##### Writes to the previous generations
+
+This time suppose that `T > W`. Then we only allow the write if `W > C - generation_leeway` and there was a generation operating at `W`.
+
+We allow writes to previous generations to improve user experience. If a client generates timestamps by itself and clocks are not perfectly synchronized, there may be short periods of time around the moment of switching generations when client's writes are rejected because they fall into one of the previous generations. Usually, this problem is easy to overcome by the client. It can simply repeat a write a few times, but using a higher timestamp. Unfortunately, if a table additionally uses LWT, the client cannot increase the timestamp because LWT makes timestamps permanent. Once Paxos commits an entry with a given timestamp, Scylla will keep trying to apply that entry until it succeeds, with the same timestamp. Applying the entry involves doing a CDC log table write. If it fails, we are stuck. Allowing writes to the previous generations is also a probabilistic fix for this bug.
+
+Note that writing only to the previous generation might not be enough. With the Raft-based topology and tablets, we can add multiple nodes almost instantly. Then, we can have multiple generations with almost identical timestamps.
+
+We allow writes only to the recent past to reduce the number of generations that must be stored in memory.

 ### Streams description tables

--- a/docs/getting-started/_common/os-support-info.rst
+++ b/docs/getting-started/_common/os-support-info.rst
@@ -0,0 +1,21 @@
+You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
+
+----------------------------+-------------+---------------+---------+---------------+
+| Linux Distributions        |Ubuntu       | Debian        | CentOS /| Rocky /       |
+|                            |             |               | RHEL    | RHEL          |
+----------------------------+------+------+-------+-------+---------+-------+-------+
+| ScyllaDB Version / Version |20.04 |22.04 |  10   |  11   |   7     |   8   |   9   |
+============================+======+======+=======+=======+=========+=======+=======+
+|   5.4                      | |v|  | |v|  | |v|   | |v|   | |x|     | |v|   | |v|   |
+----------------------------+------+------+-------+-------+---------+-------+-------+
+|   5.2                      | |v|  | |v|  | |v|   | |v|   | |v|     | |v|   | |x|   |
+----------------------------+------+------+-------+-------+---------+-------+-------+
+
+* The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
+* All releases are available as a Docker container and EC2 AMI, GCP, and Azure images. 
+
+Supported Architecture
+-----------------------------
+
+ScyllaDB Open Source supports x86_64 for all versions and AArch64 starting from ScyllaDB 4.6 and nightly build. 
+In particular, aarch64 support includes AWS EC2 Graviton.
--- a/docs/getting-started/os-support.rst
+++ b/docs/getting-started/os-support.rst
@@ -9,65 +9,5 @@ Where *supported* in this scope means:
 - The download and install procedures are tested as part of ScyllaDB release process for each version.
 - An automated install is included from :doc:`ScyllaDB Web Installer for Linux tool </getting-started/installation-common/scylla-web-installer>` (for latest versions)

-You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
+.. scylladb_include_flag:: os-support-info.rst

-.. note::
-   
-   **Supported Architecture**
-
-   ScyllaDB Open Source supports x86_64 for all versions and AArch64 starting from ScyllaDB 4.6 and nightly build. In particular, aarch64 support includes AWS EC2 Graviton.
-
-
-ScyllaDB Open Source
----------------------
-
-.. note:: 
-
-    The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
-
-+----------------------------+-------------+---------------+---------+---------------+
-| Linux Distributions        |Ubuntu       | Debian        | CentOS /| Rocky /       |
-|                            |             |               | RHEL    | RHEL          |
-+----------------------------+------+------+-------+-------+---------+-------+-------+
-| ScyllaDB Version / Version |20.04 |22.04 |  10   |  11   |   7     |   8   |   9   |
-+============================+======+======+=======+=======+=========+=======+=======+
-|   5.4                      | |v|  | |v|  | |v|   | |v|   | |x|     | |v|   | |v|   |
-+----------------------------+------+------+-------+-------+---------+-------+-------+
-|   5.2                      | |v|  | |v|  | |v|   | |v|   | |v|     | |v|   | |x|   |
-+----------------------------+------+------+-------+-------+---------+-------+-------+
-
-All releases are available as a Docker container and EC2 AMI, GCP, and Azure images.
-
-
-ScyllaDB Enterprise
--------------------
-
-.. note:: 
-
-    The recommended OS for ScyllaDB Enterprise is Ubuntu 22.04.
-
-
-+----------------------------+-----------------------------------+---------------------------+--------+-------+
-| Linux Distributions        |  Ubuntu                           | Debian                    | CentOS/| Rocky/|
-|                            |                                   |                           | RHEL   | RHEL  |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-| ScyllaDB Version / Version | 14.04| 16.04| 18.04| 20.04| 22.04 | 8    | 9    | 10   | 11   |  7     | 8     |
-+============================+======+======+======+======+=======+======+======+======+======+========+=======+
-|   2023.1                   | |x|  | |x|  | |x|  | |v|  | |v|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2022.2                   | |x|  | |x|  | |v|  | |v|  | |v|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2022.1                   | |x|  | |x|  | |v|  | |v|  | |v|   | |x|  | |x|  | |v|  | |v|  | |v|    | |v|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2021.1                   | |x|  | |v|  | |v|  | |v|  | |v|   | |x|  | |v|  | |v|  | |x|  | |v|    | |v|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2020.1                   | |x|  | |v|  | |v|  |  |x| | |x|   | |x|  | |v|  | |v|  | |x|  | |v|    | |v|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2019.1                   | |x|  | |v|  | |v|  |  |x| | |x|   | |x|  | |v|  | |x|  | |x|  | |v|    | |x|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-|   2018.1                   | |v|  | |v|  | |x|  |  |x| | |v|   | |x|  | |x|  | |x|  | |x|  | |v|    | |x|   |
-+----------------------------+------+------+------+------+-------+------+------+------+------+--------+-------+
-
-
-All releases are available as a Docker container, EC2 AMI, and a GCP image (GCP image from version 2021.1). Since 
-version 2023.1, the ScyllaDB AMI/Image OS for ScyllaDB Enterprise is based on Ubuntu 22.04.
--- a/docs/getting-started/system-requirements.rst
+++ b/docs/getting-started/system-requirements.rst
@@ -23,7 +23,7 @@ It’s recommended to have a balanced setup. If there are only 4-8 :term:`Logica
 This works in the opposite direction as well.
 ScyllaDB can be used in many types of installation environments.

-To see which system would best suit your workload requirements, use the `ScyllaDB Sizing Calculator <https://price-calc.gh.scylladb.com/>`_ to customize ScyllaDB for your usage.
+To see which system would best suit your workload requirements, use the `ScyllaDB Sizing Calculator <https://www.scylladb.com/product/scylla-cloud/get-pricing/>`_ to customize ScyllaDB for your usage.



--- a/docs/operating-scylla/admin-tools/scylla-sstable.rst
+++ b/docs/operating-scylla/admin-tools/scylla-sstable.rst
@@ -129,7 +129,7 @@ SStable Content
 .. _SStable: /architecture/sstable

 All operations target either one specific sstable component or all of them as a whole.
-For more information about the sstable components and the format itself, visit SStable_.
+For more information about the sstable components and the format itself, visit :doc:`SSTable Format </architecture/sstable/index>`.

 On a conceptual level, the data in SStables is represented by objects called mutation fragments. There are the following kinds of fragments:

@@ -634,6 +634,22 @@ Note that levels are cumulative - each contains all the checks of the previous l
 By default, the strictest level is used.
 This can be relaxed, for example, if you want to produce intentionally corrupt SStables for tests.

+shard-of
+^^^^^^^^
+
+Pint out the shards which own the specified SSTables.
+
+The content is dumped in JSON, using the following schema:
+
+.. code-block:: none
+    :class: hide-copy-button
+
+    $ROOT := { "$sstable_path": $SHARD_IDS, ... }
+
+    $SHARD_IDS := [$SHARD_ID, ...]
+
+    $SHARD_ID := Uint
+
 script
 ^^^^^^

--- a/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst
+++ b/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst
@@ -7,10 +7,11 @@

 .. Note:: 

-   If ``authenticator`` is set to ``PasswordAuthenticator`` - increase the replication factor of the ``system_auth`` keyspace.
-
-   For example:
-
+   If ``authenticator`` is set to ``PasswordAuthenticator``, increase the replication factor of the ``system_auth`` keyspace.
+   For example: 
+   
   ``ALTER KEYSPACE system_auth WITH REPLICATION = {'class' : 'NetworkTopologyStrategy', 'dc1' : <new_replication_factor>};``
+   
+   Ensure you run repair after you alter the keyspace. See :doc:`How to Safely Increase the Replication Factor </kb/rf-increase>`.

   It is recommended to set ``system_auth`` replication factor to the number of nodes in each DC.
--- a/docs/reference/versions-matrix-enterprise-oss.rst
+++ b/docs/reference/versions-matrix-enterprise-oss.rst
@@ -14,6 +14,8 @@ The following table shows ScyllaDB Enterprise versions and their corresponding S

   * - ScyllaDB Enterprise
     - ScyllaDB Open Source
+   * - 2024.1
+     - 5.4
   * - 2023.1
     - 5.2
   * - 2022.2
--- a/docs/troubleshooting/debugging-large-partition.rst
+++ b/docs/troubleshooting/debugging-large-partition.rst
@@ -21,7 +21,7 @@ Any of the following:

  .. code-block:: none

-     WARN  2022-09-22 17:33:11,075 [shard 1]large_data - Writing large partition Some_KS/Some_table: PK[/CK[/COL]] (SIZE bytes) to SSTABLE_NAME
+     WARN  2022-09-22 17:33:11,075 [shard 1]large_data - Writing large partition Some_KS/Some_table: [COL] (SIZE bytes) to SSTABLE_NAME

  In this case, refer to :ref:`Troubleshooting Large Partition Tables <large-partition-table-configure>` for more information.

--- a/docs/troubleshooting/missing-dotmount-files.rst
+++ b/docs/troubleshooting/missing-dotmount-files.rst
@@ -12,7 +12,7 @@ the ``/etc/systemd/system/var-lib-scylla.mount`` and ``/etc/systemd/system/var-l
 deleted by RPM.

 To avoid losing the files, the upgrade procedure includes a step to backup the .mount files. The following 
-example shows the command to backup the files before the :doc:`upgrade from version 5.0 </upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/upgrade-guide-from-5.0-to-2022.1-rpm/>`:
+example shows the command to backup the files before the upgrade from version 5.0:

 .. code-block:: console

--- a/docs/troubleshooting/sstable-corruption.rst
+++ b/docs/troubleshooting/sstable-corruption.rst
@@ -21,7 +21,7 @@ For example:
 
 In this scenario, a missing ``TOC`` file will prevent the Scylla node from starting.

-The SSTable corporation problem can be different, for example, other missing or unreadable files. The following solution apply for all of the scenarios.
+The SSTable corruption problem can be different, for example, other missing or unreadable files. The following solution applies to all scenarios.

 Solution
 ^^^^^^^^
--- a/docs/upgrade/_common/upgrade-guide-v4-rpm.rst
+++ b/docs/upgrade/_common/upgrade-guide-v4-rpm.rst
@@ -31,7 +31,7 @@ Apply the following procedure **serially** on each node. Do not move to the next
 * Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/index.html>`_ for suspending ScyllaDB Manager (only available for ScyllaDB Enterprise) scheduled or running repairs.
 * Not to apply schema changes

-.. note:: Before upgrading, make sure to use the latest `ScyllaDB Montioring <https://monitoring.docs.scylladb.com/>`_ stack.
+.. note:: Before upgrading, make sure to use the latest `ScyllaDB Monitoring <https://monitoring.docs.scylladb.com/>`_ stack.

 Upgrade Steps
 =============
@@ -182,4 +182,4 @@ Start the node

 Validate
 --------
-Check the upgrade instructions above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
+Check the upgrade instructions above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
--- a/docs/upgrade/_common/upgrade-guide-v4-ubuntu-and-debian.rst
+++ b/docs/upgrade/_common/upgrade-guide-v4-ubuntu-and-debian.rst
@@ -34,7 +34,7 @@ Apply the following procedure **serially** on each node. Do not move to the next
 * Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/index.html>`_ for suspending Scylla Manager (only available Scylla Enterprise) scheduled or running repairs.
 * Not to apply schema changes

-.. note:: Before upgrading, make sure to use the latest `Scylla Montioring <https://monitoring.docs.scylladb.com/>`_ stack.
+.. note:: Before upgrading, make sure to use the latest `Scylla Monitoring <https://monitoring.docs.scylladb.com/>`_ stack.

 Upgrade steps
 =============
--- a/docs/upgrade/_common/upgrade-guide-v5-ubuntu-and-debian-p1.rst
+++ b/docs/upgrade/_common/upgrade-guide-v5-ubuntu-and-debian-p1.rst
@@ -32,7 +32,7 @@ Apply the following procedure **serially** on each node. Do not move to the next
 * Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/>`_ for suspending ScyllaDB Manager (only available for ScyllaDB Enterprise) scheduled or running repairs.
 * Not to apply schema changes

-.. note:: Before upgrading, make sure to use the latest `ScyllaDB Montioring <https://monitoring.docs.scylladb.com/>`_ stack.
+.. note:: Before upgrading, make sure to use the latest `ScyllaDB Monitoring <https://monitoring.docs.scylladb.com/>`_ stack.

 Upgrade Steps
 =============
--- a/docs/upgrade/_common/warning_rollback.rst
+++ b/docs/upgrade/_common/warning_rollback.rst
@@ -1 +1 @@
-.. note:: Execute the following commands one node at the time, moving to the next node only **after** the rollback procedure completed successfully.
+.. note:: Execute the following commands one node at a time, moving to the next node only **after** the rollback procedure is completed successfully.
--- a/docs/upgrade/ami-upgrade.rst
+++ b/docs/upgrade/ami-upgrade.rst
@@ -2,13 +2,14 @@
 Upgrade ScyllaDB Image: EC2 AMI, GCP, and Azure Images
 ======================================================

-To upgrade ScyllaDB images, you need to update:
+ScyllaDB images are based on **Ubuntu 22.04**.

-#. ScyllaDB packages. Since ScyllaDB Open Source **5.2** and ScyllaDB 
-   Enterprise **2023.1**, the images are based on **Ubuntu 22.04**. 
-   See the :doc:`upgrade guide <./index>` for your ScyllaDB version 
-   for instructions for updating ScyllaDB packages on Ubuntu.
-#. Underlying OS packages. ScyllaDB includes a list of 3rd party and OS packages 
-   tested with the ScyllaDB release. 
+If you’re using the ScyllaDB official image (recommended), follow the upgrade 
+instructions on the **Debian/Ubuntu** tab in the :doc:`upgrade guide </upgrade/index/>`
+for your ScyllaDB version.
+
+If you’re using your own image and have installed ScyllaDB packages for Ubuntu or Debian, 
+follow the extended upgrade procedure on the **EC2/GCP/Azure Ubuntu image** tab 
+in the :doc:`upgrade guide </upgrade/index/>` for your ScyllaDB version.

 To check your Scylla version, run the ``scylla --version`` command.
--- a/docs/upgrade/upgrade-to-enterprise/index.rst
+++ b/docs/upgrade/upgrade-to-enterprise/index.rst
@@ -1,20 +1,13 @@
-====================================================
-Upgrade from Scylla Open Source to Scylla Enterprise
-====================================================
+=========================================================
+Upgrade from ScyllaDB Open Source to ScyllaDB Enterprise
+=========================================================

 .. toctree::
   :titlesonly:
   :hidden:

+   ScyllaDB 5.4 to ScyllaDB Enterprise 2024.1 <upgrade-guide-from-5.4-to-2024.1/index>
   ScyllaDB 5.2 to ScyllaDB Enterprise 2023.1 <upgrade-guide-from-5.2-to-2023.1/index>
-   ScyllaDB 5.1 to ScyllaDB Enterprise 2022.2 <upgrade-guide-from-5.1-to-2022.2/index>
-   ScyllaDB 5.0 to ScyllaDB Enterprise 2022.1 <upgrade-guide-from-5.0-to-2022.1/index>
-   Scylla 4.3 to Scylla Enterprise 2021.1 <upgrade-guide-from-4.3-to-2021.1/index>
-   Scylla 4.0 to Scylla Enterprise 2020.1 <upgrade-guide-from-4.0-to-2020.1/index>
-   Scylla 3.0 to Scylla Enterprise 2019.1 <upgrade-guide-from-3.0-to-2019.1/index>
-   Scylla 2.1 to Scylla Enterprise 2018.1 <upgrade-guide-from-2.1-to-2018.1/index>
-   Scylla 1.6 to Scylla Enterprise 2017.1 <upgrade-guide-from-1.6-to-2017.1/index>
-
  

 .. raw:: html
@@ -23,21 +16,14 @@ Upgrade from Scylla Open Source to Scylla Enterprise
   <div class="panel callout radius animated">
            <div class="row">
              <div class="medium-3 columns">
-                <h5 id="getting-started">Upgrade to Scylla Enterprise</h5>
+                <h5 id="getting-started">Upgrade to ScyllaDB Enterprise</h5>
              </div>
              <div class="medium-9 columns">

-Procedures for upgrading from Scylla Open Source to Scylla Enterprise.
-
-* :doc:`Upgrade - ScyllaDB 5.2 to Scylla Enterprise 2023.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-5.2-to-2023.1/index>`
-* :doc:`Upgrade - ScyllaDB 5.1 to Scylla Enterprise 2022.2 </upgrade/upgrade-to-enterprise/upgrade-guide-from-5.1-to-2022.2/index>`
-* :doc:`Upgrade - ScyllaDB 5.0 to Scylla Enterprise 2022.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-5.0-to-2022.1/index>`
-* :doc:`Upgrade - Scylla 4.3 to Scylla Enterprise 2021.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-4.3-to-2021.1/index>`
-* :doc:`Upgrade - Scylla 4.0 to Scylla Enterprise 2020.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-4.0-to-2020.1/index>`
-* :doc:`Upgrade - Scylla 3.0 to Scylla Enterprise 2019.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/index>`
-* :doc:`Upgrade - Scylla 2.1 to Scylla Enterprise 2018.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/index>`
-* :doc:`Upgrade - Scylla 1.6 to Scylla Enterprise 2017.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/index>`
+Procedures for upgrading from ScyllaDB Open Source to ScyllaDB Enterprise:

+* :doc:`ScyllaDB 5.4 to ScyllaDB Enterprise 2024.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-5.4-to-2024.1/index>`
+* :doc:`ScyllaDB 5.2 to ScyllaDB Enterprise 2023.1 </upgrade/upgrade-to-enterprise/upgrade-guide-from-5.2-to-2023.1/index>`

 .. raw:: html

--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/index.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/index.rst
@@ -1,36 +0,0 @@
-================================================
-Upgrade - Scylla 1.6 to Scylla Enterprise 2017.1
-================================================
-
-.. toctree::
-   :titlesonly:
-   :hidden:
-
-   Red Hat Enterprise Linux and CentOS <upgrade-guide-from-1.6-to-2017.1-rpm>
-   Ubuntu <upgrade-guide-from-1.6-to-2017.1-ubuntu>
-   Debian <upgrade-guide-from-1.6-to-2017.1-debian>
-
-
-.. raw:: html
-
-
-   <div class="panel callout radius animated">
-            <div class="row">
-              <div class="medium-3 columns">
-                <h5 id="getting-started">Upgrade Scylla Scylla 1.6 to Scylla Enterprise 2017.1</h5>
-              </div>
-              <div class="medium-9 columns">
-
-Upgrade guides are available for:
-
-* :doc:`Upgrade Scylla from 1.6.x to Scylla Enterprise 2017.1.y on Red Hat Enterprise Linux and CentOS <upgrade-guide-from-1.6-to-2017.1-rpm>`
-* :doc:`Upgrade Scylla from 1.6.x to Scylla Enterprise 2017.1.y on Ubuntu <upgrade-guide-from-1.6-to-2017.1-ubuntu>`
-* :doc:`Upgrade Scylla from 1.6.x to Scylla Enterprise 2017.1.y on Debian <upgrade-guide-from-1.6-to-2017.1-debian>`
-
-
-
-.. raw:: html
-
-   </div>
-   </div>
-   </div>
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-debian.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-debian.rst
@@ -1,6 +0,0 @@
-.. |OS| replace:: Debian 8
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-debian/#rollback-procedure
-.. |APT| replace:: Scylla Enterprise Deb repo
-.. _APT: http://www.scylladb.com/enterprise-download/debian8/
-.. include:: /upgrade/_common/upgrade-guide-from-1.6-to-2017.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-rpm.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-rpm.rst
@@ -1,156 +0,0 @@
-===========================================================================================
-Upgrade Guide - Scylla 1.6 to Scylla Enterprise 2017.1 for Red Hat Enterprise 7 or CentOS 7
-===========================================================================================
-
-This document is a step by step procedure for upgrading from Scylla 1.6 to Scylla Enterprise 2017.1, and rollback to 1.6 if required.
-
-
-Applicable versions
-===================
-This guide covers upgrading Scylla from the following versions: 1.6.x to Scylla Enterprise version 2017.1.y, on the following platforms:
-
-* Red Hat Enterprise Linux, version 7 and later
-* CentOS, version 7 and later
-* No longer provide packages for Fedora
-
-Upgrade Procedure
-=================
-
-.. include:: /upgrade/_common/warning.rst
-
-A Scylla upgrade is a rolling procedure which does not require full cluster shutdown. For each of the nodes in the cluster, serially (i.e. one at a time), you will:
-
-* drain node and backup the data
-* check your current release
-* backup configuration file
-* stop Scylla
-* download and install new Scylla packages
-* start Scylla
-* validate that the upgrade was successful
-
-Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
-
-**During** the rolling upgrade it is highly recommended:
-
-* Not to use new Scylla Enterprise 2017.1 features
-* Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes
-* Not to apply schema changes
-
-Upgrade steps
-=============
-Drain node and backup the data
------------------------------
-Before any major procedure, like an upgrade, it is recommended to backup all the data to an external device. In Scylla, backup is done using the ``nodetool snapshot`` command. For **each** node in the cluster, run the following command:
-
-.. code:: sh
-
-   nodetool drain
-   nodetool snapshot
-
-Take note of the directory name that nodetool gives you, and copy all the directories having this name under ``/var/lib/scylla`` to a backup device.
-
-When the upgrade is complete (all nodes), the snapshot should be removed by ``nodetool clearsnapshot -t <snapshot>``, or you risk running out of space.
-
-Backup configuration file
-------------------------
-.. code:: sh
-
-   sudo cp -a /etc/scylla/scylla.yaml /etc/scylla/scylla.yaml.backup-1.6
-
-Stop Scylla
-----------
-.. code:: sh
-
-   sudo systemctl stop scylla-server
-
-Download and install the new release
------------------------------------
-Before upgrading, check what version you are running now using ``rpm -qa | grep scylla-server``. You should use the same version in case you want to :ref:`rollback <upgrade-1.6-2017.1-rpm-rollback-procedure>` the upgrade. If you are not running a 1.6.x version, stop right here! This guide only covers 1.6.x to 2017.1.y upgrades.
-
-To upgrade:
-
-1. Update the `Scylla RPM Enterprise repo <http://www.scylladb.com/enterprise-download/centos_rpm/>`_ to **2017.1**
-2. install
-
-.. code:: sh
-
-   sudo yum update scylla\* -y
-
-Start the node
--------------
-.. code:: sh
-
-   sudo systemctl start scylla-server
-
-Validate
--------
-1. Check cluster status with ``nodetool status`` and make sure **all** nodes, including the one you just upgraded, are in UN status.
-2. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` to check scylla version.
-3. Use ``journalctl _COMM=scylla`` to check there are no new errors in the log.
-4. Check again after 2 minutes, to validate no new issues are introduced.
-
-Once you are sure the node upgrade is successful, move to the next node in the cluster.
-
-.. _upgrade-1.6-2017.1-rpm-rollback-procedure:
-
-Rollback Procedure
-==================
-
-.. include:: /upgrade/_common/warning_rollback.rst
-
-The following procedure describes a rollback from Scylla release 2017.1.x to 1.6.y. Apply this procedure if an upgrade from 1.6 to 2017.1 failed before completing on all nodes. Use this procedure only for nodes you upgraded to 2017.1
-
-Scylla rollback is a rolling procedure which does **not** require full cluster shutdown.
-For each of the nodes rollback to 1.6, you will:
-
-* drain the node and stop Scylla
-* retrieve the old Scylla packages
-* restore the configuration file
-* restart Scylla
-* validate the rollback success
-
-Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
-
-Rollback steps
-==============
-Gracefully shutdown Scylla
--------------------------
-.. code:: sh
-
-   nodetool drain
-   sudo systemctl stop scylla-server
-
-Download and install the new release
------------------------------------
-1. Remove the old repo file.
-
-.. code:: sh
-
-   sudo rm -rf /etc/yum.repos.d/scylla.repo
-
-2. Update the `Scylla RPM repo <http://www.scylladb.com/download/centos_rpm>`_ to **1.6**
-3. Install
-
-.. code:: sh
-
-   sudo yum clean all
-   sudo yum downgrade scylla\* -y
-
-Restore the configuration file
------------------------------
-
-.. code:: sh
-
-   sudo rm -rf /etc/scylla/scylla.yaml
-   sudo cp -a /etc/scylla/scylla.yaml.backup-1.6 /etc/scylla/scylla.yaml
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo systemctl start scylla-server
-
-Validate
--------
-Check upgrade instruction above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-ubuntu.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-ubuntu.rst
@@ -1,6 +0,0 @@
-.. |OS| replace:: Ubuntu 14.04 or 16.04
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-1.6-to-2017.1/upgrade-guide-from-1.6-to-2017.1-ubuntu/#rollback-procedure
-.. |APT| replace:: Scylla Enterprise Deb repo
-.. _APT: http://www.scylladb.com/enterprise-download/ubuntu-16-04/
-.. include:: /upgrade/_common/upgrade-guide-from-1.6-to-2017.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/index.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/index.rst
@@ -1,38 +0,0 @@
-================================================
-Upgrade - Scylla 2.1 to Scylla Enterprise 2018.1
-================================================
-
-.. toctree::
-   :titlesonly:
-   :hidden:
-
-   Red Hat Enterprise Linux and CentOS <upgrade-guide-from-2.1-to-2018.1-rpm>
-   Ubuntu 14.04 <upgrade-guide-from-2.1-to-2018.1-ubuntu>
-   Ubuntu 16.04 <upgrade-guide-from-2.1-to-2018.1-ubuntu-16-04>
-   Debian <upgrade-guide-from-2.1-to-2018.1-debian>
-   Metrics <metric-update-2.1-to-2018.1>
-
-
-.. raw:: html
-
-
-   <div class="panel callout radius animated">
-            <div class="row">
-              <div class="medium-3 columns">
-                <h5 id="getting-started">Upgrade Scylla Scylla 2.1 to Scylla Enterprise 2018.1</h5>
-              </div>
-              <div class="medium-9 columns">
-
-Upgrade guides are available for:
-
-* :doc:`Upgrade Scylla Enterprise from 2.1.x to 2018.1.y on Red Hat Enterprise Linux and CentOS <upgrade-guide-from-2.1-to-2018.1-rpm>`
-* :doc:`Upgrade Scylla Enterprise from 2.1.x to 2018.1.y on Ubuntu 14.04 <upgrade-guide-from-2.1-to-2018.1-ubuntu>`
-* :doc:`Upgrade Scylla Enterprise from 2.1.x to 2018.1.y on Ubuntu 16.04 <upgrade-guide-from-2.1-to-2018.1-ubuntu-16-04>`
-* :doc:`Upgrade Scylla Enterprise from 2.1.x to 2018.1.y on Debian <upgrade-guide-from-2.1-to-2018.1-debian>`
-* :doc:`Scylla Metrics Update - Scylla 2.1 to 2018.1 <metric-update-2.1-to-2018.1>`
-
-.. raw:: html
-
-   </div>
-   </div>
-   </div>
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/metric-update-2.1-to-2018.1.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/metric-update-2.1-to-2018.1.rst
@@ -1,10 +0,0 @@
-=============================================================
-Scylla Metric Update - Scylla 2.1 to Scylla Enterprise 2018.1
-=============================================================
-
-
-The following metrics are new in Scylla 2018.1
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-* scylla_evictions_from_garbage
-* scylla_garbage_partitions
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-debian.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-debian.rst
@@ -1,10 +0,0 @@
-.. |OS| replace:: Debian 8
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-debian/#rollback-procedure
-.. |APT| replace:: Scylla deb repo
-.. _APT: http://www.scylladb.com/download/debian8/
-.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
-.. _APT_ENTERPRISE: http://www.scylladb.com/enterprise-download/debian8/
-.. |ENABLE_APT_REPO| replace:: echo 'deb http://http.debian.net/debian jessie-backports main' > /etc/apt/sources.list.d/jessie-backports.list
-.. |JESSIE_BACKPORTS| replace:: -t jessie-backports openjdk-8-jre-headless
-.. include:: /upgrade/_common/upgrade-guide-from-2.1-to-2018.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-rpm.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-rpm.rst
@@ -1,170 +0,0 @@
-=============================================================================================
-Upgrade Guide - Scylla 2.1 to 2018.1 for Red Hat Enterprise Linux 7 or CentOS 7
-=============================================================================================
-
-This document is a step by step procedure for upgrading from Scylla 2.1 to Scylla Enterprise 2018.1, and rollback to 2.1 if required.
-
-
-Applicable versions
-===================
-This guide covers upgrading Scylla from the following versions: 2.1.x to Scylla Enterprise version 2018.1.y, on the following platforms:
-
-* Red Hat Enterprise Linux, version 7 and later
-* CentOS, version 7 and later
-* No longer provide packages for Fedora
-
-Upgrade Procedure
-=================
-
-.. include:: /upgrade/_common/warning.rst
-
-A Scylla upgrade is a rolling procedure which does not require full cluster shutdown. For each of the nodes in the cluster, serially (i.e. one at a time), you will:
-
-* Check cluster schema
-* Drain node and backup the data
-* Backup configuration file
-* Stop Scylla
-* Download and install new Scylla packages
-* Start Scylla
-* Validate that the upgrade was successful
-
-Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
-
-**During** the rolling upgrade it is highly recommended:
-
-* Not to use new 2018.1 features
-* Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes
-* Not to apply schema changes
-
-Upgrade steps
-=============
-Check cluster schema
--------------------
-Make sure that all nodes have the schema synched prior to upgrade, we won't survive an upgrade that has schema disagreement between nodes.
-
-.. code:: sh
-
-       nodetool describecluster
-
-Drain node and backup the data
------------------------------
-Before any major procedure, like an upgrade, it is recommended to backup all the data to an external device. In Scylla, backup is done using the ``nodetool snapshot`` command. For **each** node in the cluster, run the following command:
-
-.. code:: sh
-
-   nodetool drain
-   nodetool snapshot
-
-Take note of the directory name that nodetool gives you, and copy all the directories having this name under ``/var/lib/scylla`` to a backup device.
-
-When the upgrade is complete (all nodes), the snapshot should be removed by ``nodetool clearsnapshot -t <snapshot>``, or you risk running out of space.
-
-Backup configuration files
--------------------------
-.. code:: sh
-
-   for conf in $( rpm -qc $(rpm -qa | grep scylla) | grep -v contains ); do sudo cp -v $conf $conf.backup-2.1; done
-
-Stop Scylla
-----------
-.. code:: sh
-
-   sudo systemctl stop scylla-server
-
-Download and install the new release
------------------------------------
-Before upgrading, check what version you are running now using ``rpm -qa | grep scylla-server``. You should use the same version in case you want to :ref:`rollback <upgrade-2.1-2018.1-rpm-rollback-procedure>` the upgrade. If you are not running a 2.1.x version, stop right here! This guide only covers 2.1.x to 2018.1.y upgrades.
-
-To upgrade:
-
-1. Update the `Scylla RPM Enterprise repo <http://www.scylladb.com/enterprise-download/centos_rpm/>`_  to **2018.1**
-2. install
-
-.. code:: sh
-
-   sudo yum clean all
-   sudo rm -rf /var/cache/yum
-   sudo yum remove scylla\*
-   sudo yum install scylla-enterprise 
-   for conf in $( rpm -qc $(rpm -qa | grep scylla) | grep -v contains ); do sudo cp -v $conf.backup-2.1 $conf; done
-
-Start the node
--------------
-.. code:: sh
-
-   sudo systemctl start scylla-server
-
-Validate
--------
-1. Check cluster status with ``nodetool status`` and make sure **all** nodes, including the one you just upgraded, are in UN status.
-2. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` to check scylla version.
-3. Use ``journalctl _COMM=scylla`` to check there are no new errors in the log.
-4. Check again after 2 minutes, to validate no new issues are introduced.
-
-Once you are sure the node upgrade is successful, move to the next node in the cluster.
-
-* More on :doc:`Scylla Metrics Update - Scylla 2.1 to 2018.1<metric-update-2.1-to-2018.1>`
-
-.. _upgrade-2.1-2018.1-rpm-rollback-procedure:
-
-Rollback Procedure
-==================
-
-.. include:: /upgrade/_common/warning_rollback.rst
-
-The following procedure describes a rollback from Scylla Enterprise release 2018.1.x to 2.1.y. Apply this procedure if an upgrade from 2.1 to 2018.1 failed before completing on all nodes. Use this procedure only for nodes you upgraded to 2018.1
-
-Scylla rollback is a rolling procedure which does **not** require full cluster shutdown.
-For each of the nodes rollback to 2.1, you will:
-
-* Drain the node and stop Scylla
-* Retrieve the old Scylla packages
-* Restore the configuration file
-* Restart Scylla
-* Validate the rollback success
-
-Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
-
-Rollback steps
-==============
-Gracefully shutdown Scylla
--------------------------
-.. code:: sh
-
-   nodetool drain
-   sudo systemctl stop scylla-server
-
-Download and install the new release
------------------------------------
-1. Remove the old repo file.
-
-.. code:: sh
-
-   sudo rm -rf /etc/yum.repos.d/scylla.repo
-
-2. Update the `Scylla RPM repo <http://www.scylladb.com/download/?platform=centos>`_ to **2.1**
-3. Install
-
-.. code:: sh
-
-   sudo yum clean all
-   sudo yum remove scylla\*
-   sudo yum install scylla
-
-Restore the configuration file
------------------------------
-
-.. code:: sh
-
-   for conf in $( rpm -qc $(rpm -qa | grep scylla) | grep -v contains ); do sudo cp -v $conf.backup-2.1 $conf; done
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo systemctl start scylla-server
-
-Validate
--------
-Check upgrade instruction above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-ubuntu-16-04.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-ubuntu-16-04.rst
@@ -1,10 +0,0 @@
-.. |OS| replace:: Ubuntu 16.04 
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-ubuntu-16-04/#rollback-procedure
-.. |APT| replace:: Scylla deb repo
-.. _APT: http://www.scylladb.com/download/
-.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
-.. _APT_ENTERPRISE: http://www.scylladb.com/enterprise-download/ubuntu-16-04/
-.. |ENABLE_APT_REPO| replace:: sudo add-apt-repository -y ppa:openjdk-r/ppa
-.. |JESSIE_BACKPORTS| replace:: openjdk-8-jre-headless
-.. include:: /upgrade/_common/upgrade-guide-from-2.1-to-2018.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-ubuntu.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-ubuntu.rst
@@ -1,10 +0,0 @@
-.. |OS| replace:: Ubuntu 14.04 
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-2.1-to-2018.1/upgrade-guide-from-2.1-to-2018.1-ubuntu/#rollback-procedure
-.. |APT| replace:: Scylla deb repo
-.. _APT: http://www.scylladb.com/download/
-.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
-.. _APT_ENTERPRISE: http://www.scylladb.com/enterprise-download/ubuntu/
-.. |ENABLE_APT_REPO| replace:: sudo add-apt-repository -y ppa:openjdk-r/ppa
-.. |JESSIE_BACKPORTS| replace:: openjdk-8-jre-headless
-.. include:: /upgrade/_common/upgrade-guide-from-2.1-to-2018.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/index.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/index.rst
@@ -1,38 +0,0 @@
-================================================
-Upgrade - Scylla 3.0 to Scylla Enterprise 2019.1
-================================================
-
-.. toctree::
-   :titlesonly:
-   :hidden:
-
-   Red Hat Enterprise Linux and CentOS <upgrade-guide-from-3.0-to-2019.1-rpm>
-   Ubuntu 16.04 <upgrade-guide-from-3.0-to-2019.1-ubuntu-16-04>
-   Ubuntu 14.04 <upgrade-guide-from-3.0-to-2019.1-ubuntu-18-04>
-   Debian <upgrade-guide-from-3.0-to-2019.1-debian>
-   Metrics <metric-update-3.0-to-2019.1>
-
-
-.. raw:: html
-
-
-   <div class="panel callout radius animated">
-            <div class="row">
-              <div class="medium-3 columns">
-                <h5 id="getting-started">Upgrade Scylla Scylla 3.0 to Scylla Enterprise 2019.1</h5>
-              </div>
-              <div class="medium-9 columns">
-
-Upgrade guides are available for:
-
-* :doc:`Upgrade Scylla Enterprise from 3.0.x to 2019.1.y on Red Hat Enterprise Linux and CentOS <upgrade-guide-from-3.0-to-2019.1-rpm>`
-* :doc:`Upgrade Scylla Enterprise from 3.0.x to 2019.1.y on Ubuntu 16.04 <upgrade-guide-from-3.0-to-2019.1-ubuntu-16-04>`
-* :doc:`Upgrade Scylla Enterprise from 3.0.x to 2019.1.y on Ubuntu 18.04 <upgrade-guide-from-3.0-to-2019.1-ubuntu-18-04>`
-* :doc:`Upgrade Scylla Enterprise from 3.0.x to 2019.1.y on Debian <upgrade-guide-from-3.0-to-2019.1-debian>`
-* :doc:`Scylla Metrics Update - Scylla 3.0 to 2019.1 <metric-update-3.0-to-2019.1>`
-
-.. raw:: html
-
-   </div>
-   </div>
-   </div>
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/metric-update-3.0-to-2019.1.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/metric-update-3.0-to-2019.1.rst
@@ -1,87 +0,0 @@
-=============================================================
-Scylla Metric Update - Scylla 3.0 to Scylla Enterprise 2019.1
-=============================================================
-
-
-The following metrics are new in Scylla Enterprise 2019.1
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-* scylla_database_paused_reads
-* scylla_database_paused_reads_permit_based_evictions
-* scylla_database_total_view_updates_failed_local
-* scylla_database_total_view_updates_failed_remote
-* scylla_database_total_view_updates_pushed_local
-* scylla_database_total_view_updates_pushed_remote
-* scylla_database_view_building_paused
-* scylla_hints_for_views_manager_corrupted_files
-* scylla_hints_for_views_manager_discarded
-* scylla_hints_manager_corrupted_files
-* scylla_hints_manager_discarded
-* scylla_query_processor_queries
-* scylla_reactor_aio_errors
-* scylla_sstables_capped_local_deletion_time
-* scylla_sstables_capped_tombstone_deletion_time
-* scylla_sstables_cell_tombstone_writes
-* scylla_sstables_cell_writes
-* scylla_sstables_partition_reads
-* scylla_sstables_partition_seeks
-* scylla_sstables_partition_writes
-* scylla_sstables_range_partition_reads
-* scylla_sstables_range_tombstone_writes
-* scylla_sstables_row_reads
-* scylla_sstables_row_writes
-* scylla_sstables_single_partition_reads
-* scylla_sstables_sstable_partition_reads
-* scylla_sstables_static_row_writes
-* scylla_sstables_tombstone_writes
-* scylla_storage_proxy_coordinator_last_mv_flow_control_delay
-
-The following metrics names changes from Scylla 3.0 to Scylla Enterprise 2019.1
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. list-table::
-   :widths: 30 30
-   :header-rows: 1
-                 
-   * - Scylla 3.0 Name
-     - Scylla 2019.1 Name
-   * - scylla_io_queue_commitlog_delay
-     - scylla_io_queue_delay
-   * - scylla_io_queue_commitlog_queue_length
-     - scylla_io_queue_queue_length
-   * - scylla_io_queue_commitlog_shares
-     - scylla_io_queue_shares
-   * - scylla_io_queue_commitlog_total_bytes
-     - scylla_io_queue_total_bytes
-   * - scylla_io_queue_commitlog_total_operations
-     - scylla_io_queue_total_operations
-   * - scylla_io_queue_compaction_delay
-     - scylla_io_queue_delay
-   * - scylla_io_queue_compaction_queue_length
-     - scylla_io_queue_queue_length
-   * - scylla_io_queue_compaction_shares
-     - scylla_io_queue_shares
-   * - scylla_io_queue_compaction_total_bytes
-     - scylla_io_queue_total_bytes
-   * - scylla_io_queue_compaction_total_operations
-     - scylla_io_queue_total_operations
-   * - scylla_io_queue_default_delay
-     - scylla_io_queue_delay
-   * - scylla_io_queue_default_queue_length
-     - scylla_io_queue_queue_length
-   * - scylla_io_queue_default_shares
-     - scylla_io_queue_shares
-   * - scylla_io_queue_default_total_bytes
-     - scylla_io_queue_total_bytes
-   * - scylla_io_queue_default_total_operations
-     - scylla_io_queue_total_operations
-   * - scylla_io_queue_memtable_flush_delay
-     - scylla_io_queue_delay
-   * - scylla_io_queue_memtable_flush_queue_length
-     - scylla_io_queue_queue_length
-   * - scylla_io_queue_memtable_flush_shares
-     - scylla_io_queue_shares
-   * - scylla_io_queue_memtable_flush_total_bytes
-     - scylla_io_queue_total_bytes
-   * - scylla_io_queue_memtable_flush_total_operations
-     - scylla_io_queue_total_operations
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-debian.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-debian.rst
@@ -1,8 +0,0 @@
-.. |OS| replace:: Debian 9
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-debian/#rollback-procedure
-.. |APT| replace:: Scylla deb repo
-.. _APT: http://www.scylladb.com/download/?platform=debian-9
-.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
-.. _APT_ENTERPRISE: http://www.scylladb.com/enterprise-download/debian9/
-.. include:: /upgrade/_common/upgrade-guide-from-3.0-to-2019.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-rpm.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-rpm.rst
@@ -1,172 +0,0 @@
-=============================================================================================
-Upgrade Guide - Scylla 3.0 to 2019.1 for Red Hat Enterprise Linux 7 or CentOS 7
-=============================================================================================
-
-This document is a step by step procedure for upgrading from Scylla 3.0 to Scylla Enterprise 2019.1, and rollback to 3.0 if required.
-
-
-Applicable versions
-===================
-This guide covers upgrading Scylla from the following versions: 3.0.x to Scylla Enterprise version 2019.1.y, on the following platforms:
-
-* Red Hat Enterprise Linux, version 7 and later
-* CentOS, version 7 and later
-* No longer provide packages for Fedora
-
-.. include:: /upgrade/_common/upgrade_to_2019_warning.rst
-  
-Upgrade Procedure
-=================
-
-.. include:: /upgrade/_common/warning.rst
-
-A Scylla upgrade is a rolling procedure which does not require full cluster shutdown. For each of the nodes in the cluster, serially (i.e. one at a time), you will:
-
-* Check cluster schema
-* Drain node and backup the data
-* Backup configuration file
-* Stop Scylla
-* Download and install new Scylla packages
-* Start Scylla
-* Validate that the upgrade was successful
-
-Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
-
-**During** the rolling upgrade it is highly recommended:
-
-* Not to use new 2019.1 features
-* Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/index.html>`_ for suspending Scylla Manager scheduled or running repairs.
-* Not to apply schema changes
-
-Upgrade steps
-=============
-Check cluster schema
--------------------
-Make sure that all nodes have the schema synched prior to upgrade, we won't survive an upgrade that has schema disagreement between nodes.
-
-.. code:: sh
-
-       nodetool describecluster
-
-Drain node and backup the data
------------------------------
-Before any major procedure, like an upgrade, it is recommended to backup all the data to an external device. In Scylla, backup is done using the ``nodetool snapshot`` command. For **each** node in the cluster, run the following command:
-
-.. code:: sh
-
-   nodetool drain
-   nodetool snapshot
-
-Take note of the directory name that nodetool gives you, and copy all the directories having this name under ``/var/lib/scylla`` to a backup device.
-
-When the upgrade is complete (all nodes), the snapshot should be removed by ``nodetool clearsnapshot -t <snapshot>``, or you risk running out of space.
-
-Backup configuration files
--------------------------
-.. code:: sh
-
-   for conf in $( rpm -qc $(rpm -qa | grep scylla) | grep -v contains ); do sudo cp -v $conf $conf.backup-3.0; done
-
-Stop Scylla
-----------
-.. code:: sh
-
-   sudo systemctl stop scylla-server
-
-Download and install the new release
------------------------------------
-Before upgrading, check what version you are running now using ``rpm -qa | grep scylla-server``. You should use the same version in case you want to :ref:`rollback <upgrade-3.0-2019.1-rpm-rollback-procedure>` the upgrade. If you are not running a 3.0.x version, stop right here! This guide only covers 3.0.x to 2019.1.y upgrades.
-
-To upgrade:
-
-1. Update the `Scylla RPM Enterprise repo <http://www.scylladb.com/enterprise-download/centos_rpm/>`_  to **2019.1**
-2. install
-
-.. code:: sh
-
-   sudo yum clean all
-   sudo rm -rf /var/cache/yum
-   sudo yum remove scylla\*
-   sudo yum install scylla-enterprise 
-   for conf in $( rpm -qc $(rpm -qa | grep scylla) | grep -v contains ); do sudo cp -v $conf.backup-3.0 $conf; done
-
-Start the node
--------------
-.. code:: sh
-
-   sudo systemctl start scylla-server
-
-Validate
--------
-1. Check cluster status with ``nodetool status`` and make sure **all** nodes, including the one you just upgraded, are in UN status.
-2. Use ``curl -X GET "http://localhost:10000/storage_service/scylla_release_version"`` to check scylla version.
-3. Use ``journalctl _COMM=scylla`` to check there are no new errors in the log.
-4. Check again after 2 minutes, to validate no new issues are introduced.
-
-Once you are sure the node upgrade is successful, move to the next node in the cluster.
-
-* More on :doc:`Scylla Metrics Update - Scylla 3.0 to 2019.1<metric-update-3.0-to-2019.1>`
-
-.. _upgrade-3.0-2019.1-rpm-rollback-procedure:
-
-Rollback Procedure
-==================
-
-.. include:: /upgrade/_common/warning_rollback.rst
-
-The following procedure describes a rollback from Scylla Enterprise release 2019.1.x to 3.0.y. Apply this procedure if an upgrade from 3.0 to 2019.1 failed before completing on all nodes. Use this procedure only for nodes you upgraded to 2019.1
-
-Scylla rollback is a rolling procedure which does **not** require full cluster shutdown.
-For each of the nodes rollback to 3.0, you will:
-
-* Drain the node and stop Scylla
-* Retrieve the old Scylla packages
-* Restore the configuration file
-* Restart Scylla
-* Validate the rollback success
-
-Apply the following procedure **serially** on each node. Do not move to the next node before validating the node is up and running with the new version.
-
-Rollback steps
-==============
-Gracefully shutdown Scylla
--------------------------
-.. code:: sh
-
-   nodetool drain
-   sudo systemctl stop scylla-server
-
-Download and install the new release
------------------------------------
-1. Remove the old repo file.
-
-.. code:: sh
-
-   sudo rm -rf /etc/yum.repos.d/scylla.repo
-
-2. Update the `Scylla RPM repo <http://www.scylladb.com/download/?platform=centos>`_ to **3.0**
-3. Install
-
-.. code:: sh
-
-   sudo yum clean all
-   sudo yum remove scylla\*
-   sudo yum install scylla
-
-Restore the configuration file
------------------------------
-
-.. code:: sh
-
-   for conf in $( rpm -qc $(rpm -qa | grep scylla) | grep -v contains ); do sudo cp -v $conf.backup-3.0 $conf; done
-
-Start the node
--------------
-
-.. code:: sh
-
-   sudo systemctl start scylla-server
-
-Validate
--------
-Check upgrade instruction above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-ubuntu-16-04.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-ubuntu-16-04.rst
@@ -1,8 +0,0 @@
-.. |OS| replace:: Ubuntu 16.04
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-ubuntu-16-04/#rollback-procedure
-.. |APT| replace:: Scylla deb repo
-.. _APT: http://www.scylladb.com/download/
-.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
-.. _APT_ENTERPRISE: http://www.scylladb.com/enterprise-download/ubuntu-16-04/
-.. include:: /upgrade/_common/upgrade-guide-from-3.0-to-2019.1-ubuntu-and-debian.rst
--- a/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-ubuntu-18-04.rst
+++ b/docs/upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-ubuntu-18-04.rst
@@ -1,8 +0,0 @@
-.. |OS| replace:: Ubuntu 18.04
-.. |ROLLBACK| replace:: rollback
-.. _ROLLBACK: /upgrade/upgrade-to-enterprise/upgrade-guide-from-3.0-to-2019.1/upgrade-guide-from-3.0-to-2019.1-ubuntu-18-04/#id4
-.. |APT| replace:: Scylla deb repo
-.. _APT: http://www.scylladb.com/download/
-.. |APT_ENTERPRISE| replace:: Scylla Enterprise Deb repo
-.. _APT_ENTERPRISE: http://www.scylladb.com/enterprise-download/ubuntu/
-.. include:: /upgrade/_common/upgrade-guide-from-3.0-to-2019.1-ubuntu-and-debian.rst
--- a/Show More
+++ b/Show More