Merge '[branch-5.0] - minimal fix for crash caused by empty primary key range in LWT update' from Jan Ciołek

In #13001 we found a test case which causes a crash because it didn't handle `UNSET_VALUE` properly: ```python3 def test_unset_insert_where(cql, table2): p = unique_key_int() stmt = cql.prepare(f'INSERT INTO {table2} (p, c) VALUES ({p}, ?)') with pytest.raises(InvalidRequest, match="unset"): cql.execute(stmt, [UNSET_VALUE]) def test_unset_insert_where_lwt(cql, table2): p = unique_key_int() stmt = cql.prepare(f'INSERT INTO {table2} (p, c) VALUES ({p}, ?) IF NOT EXISTS') with pytest.raises(InvalidRequest, match="unset"): cql.execute(stmt, [UNSET_VALUE]) ``` This PR does an absolutely minimal change to fix the crash. It adds a check the moment before the crash would happen. To make sure that everything works correctly, and to detect any possible breaking changes, I wrote a bunch of tests that validate the current behavior. I also ported some tests from the `master` branch, at least the ones that were in line with the behavior on `branch-5.0`. The changes are the same as in #13133, just cherry-picked to `branch-5.0` Closes #13178 * github.com:scylladb/scylladb: cql-pytest/test_unset: port some tests from master branch cql-pytest/test_unset: test unset value in UPDATEs with LWT conditions cql-pytest/test_unset: test unset value in UPDATEs with IF EXISTS cql-pytest/test_unset: test unset value in UPDATE statements cql-pytest/test_unset: test unset value in INSERTs with IF NOT EXISTS cql-pytest/test_unset: test unset value in INSERT statements cas_request: fix crash on unset value in primary key with LWT
Update seastar submodule
2023-05-08 12:03:44 +03:00 · 2023-05-08 10:41:24 +03:00 · 2023-05-08 09:58:46 +03:00 · 2023-05-02 21:22:23 +03:00 · 2023-04-28 03:25:27 +02:00 · 2023-04-28 03:25:27 +02:00
1662 changed files with 37900 additions and 43578 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "seastar"]
 	path = seastar
-	url = ../seastar
+	url = ../scylla-seastar
 	ignore = dirty
 [submodule "swagger-ui"]
 	path = swagger-ui
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -349,7 +349,7 @@ set(scylla_sources
    cql3/constants.cc
    cql3/cql3_type.cc
    cql3/expr/expression.cc
-    cql3/expr/term_expr.cc
+    cql3/expr/prepare_expr.cc
    cql3/functions/aggregate_fcts.cc
    cql3/functions/castas_fcts.cc
    cql3/functions/error_injection_fcts.cc
@@ -428,7 +428,6 @@ set(scylla_sources
    cql3/statements/update_statement.cc
    cql3/statements/use_statement.cc
    cql3/token_relation.cc
-    cql3/tuples.cc
    cql3/type_json.cc
    cql3/untyped_result_set.cc
    cql3/update_parameters.cc
@@ -436,7 +435,7 @@ set(scylla_sources
    cql3/util.cc
    cql3/ut_name.cc
    cql3/values.cc
-    database.cc
+    data_dictionary/data_dictionary.cc
    db/batchlog_manager.cc
    db/commitlog/commitlog.cc
    db/commitlog/commitlog_entry.cc
@@ -537,6 +536,8 @@ set(scylla_sources
    raft/tracker.cc
    range_tombstone.cc
    range_tombstone_list.cc
+    tombstone_gc_options.cc
+    tombstone_gc.cc
    reader_concurrency_semaphore.cc
    redis/abstract_command.cc
    redis/command_factory.cc
@@ -553,6 +554,8 @@ set(scylla_sources
    release.cc
    repair/repair.cc
    repair/row_level.cc
+    replica/database.cc
+    replica/table.cc
    row_cache.cc
    schema.cc
    schema_mutations.cc
@@ -575,7 +578,7 @@ set(scylla_sources
    service/raft/raft_group_registry.cc
    service/raft/raft_rpc.cc
    service/raft/raft_sys_table_storage.cc
-    service/raft/schema_raft_state_machine.cc
+    service/raft/group0_state_machine.cc
    service/storage_proxy.cc
    service/storage_service.cc
    sstables/compress.cc
@@ -609,7 +612,6 @@ set(scylla_sources
    streaming/stream_summary.cc
    streaming/stream_task.cc
    streaming/stream_transfer_task.cc
-    table.cc
    table_helper.cc
    thrift/controller.cc
    thrift/handler.cc
--- a/4
+++ b/4
@@ -60,14 +60,14 @@ fi

 # Default scylla product/version tags
 PRODUCT=scylla
-VERSION=4.6.dev
+VERSION=5.0.13

 if test -f version
 then
 	SCYLLA_VERSION=$(cat version | awk -F'-' '{print $1}')
 	SCYLLA_RELEASE=$(cat version | awk -F'-' '{print $2}')
 else
-	DATE=$(date +%Y%m%d)
+	DATE=$(date --utc +%Y%m%d)
 	GIT_COMMIT=$(git -C "$SCRIPT_DIR" log --pretty=format:'%h' -n 1)
 	SCYLLA_VERSION=$VERSION
 	# For custom package builds, replace "0" with "counter.your_name",
--- a/absl-flat_hash_map.cc
+++ b/absl-flat_hash_map.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "absl-flat_hash_map.hh"
--- a/absl-flat_hash_map.hh
+++ b/absl-flat_hash_map.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/auth.cc
+++ b/alternator/auth.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "alternator/error.hh"
@@ -34,7 +21,6 @@
 #include "service/storage_proxy.hh"
 #include "alternator/executor.hh"
 #include "cql3/selection/selection.hh"
-#include "database.hh"
 #include "query-result-set.hh"
 #include "cql3/result_set.hh"
 #include <seastar/core/coroutine.hh>
@@ -137,7 +123,7 @@ std::string get_signature(std::string_view access_key_id, std::string_view secre
 }

 future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::string username) {
-    schema_ptr schema = proxy.get_db().local().find_schema("system_auth", "roles");
+    schema_ptr schema = proxy.data_dictionary().find_schema("system_auth", "roles");
    partition_key pk = partition_key::from_single_value(*schema, utf8_type->decompose(username));
    dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*schema, pk))};
    std::vector<query::clustering_range> bounds{query::clustering_range::make_open_ended_both_sides()};
--- a/alternator/auth.hh
+++ b/alternator/auth.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -35,7 +22,7 @@ namespace alternator {

 using hmac_sha256_digest = std::array<char, 32>;

-using key_cache = utils::loading_cache<std::string, std::string>;
+using key_cache = utils::loading_cache<std::string, std::string, 1>;

 std::string get_signature(std::string_view access_key_id, std::string_view secret_access_key, std::string_view host, std::string_view method,
        std::string_view orig_datestamp, std::string_view signed_headers_str, const std::map<std::string_view, std::string_view>& signed_headers_map,
--- a/alternator/conditions.cc
+++ b/alternator/conditions.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include <list>
--- a/alternator/conditions.hh
+++ b/alternator/conditions.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 /*
--- a/alternator/controller.cc
+++ b/alternator/controller.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include <seastar/net/dns.hh>
@@ -52,8 +39,26 @@ controller::controller(
 {
 }

-future<> controller::start() {
+sstring controller::name() const {
+    return "alternator";
+}
+
+sstring controller::protocol() const {
+    return "dynamodb";
+}
+
+sstring controller::protocol_version() const {
+    return version;
+}
+
+std::vector<socket_address> controller::listen_addresses() const {
+    return _listen_addresses;
+}
+
+future<> controller::start_server() {
    return seastar::async([this] {
+        _listen_addresses.clear();
+
        auto preferred = _config.listen_interface_prefer_ipv6() ? std::make_optional(net::inet_address::family::INET6) : std::nullopt;
        auto family = _config.enable_ipv6_dns_lookup() || preferred ? std::nullopt : std::make_optional(net::inet_address::family::INET);

@@ -67,25 +72,27 @@ future<> controller::start() {
        rmw_operation::set_default_write_isolation(_config.alternator_write_isolation());
        executor::set_default_timeout(std::chrono::milliseconds(_config.alternator_timeout_in_ms()));

-        net::inet_address addr;
-        try {
-            addr = net::dns::get_host_by_name(_config.alternator_address(), family).get0().addr_list.front();
-        } catch (...) {
-            std::throw_with_nested(std::runtime_error(fmt::format("Unable to resolve alternator_address {}", _config.alternator_address())));
-        }
+        net::inet_address addr = utils::resolve(_config.alternator_address, family).get0();

        auto get_cdc_metadata = [] (cdc::generation_service& svc) { return std::ref(svc.get_cdc_metadata()); };

        _executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks), sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value()).get();
        _server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper)).get();
+        // Note: from this point on, if start_server() throws for any reason,
+        // it must first call stop_server() to stop the executor and server
+        // services we just started - or Scylla will cause an assertion
+        // failure when the controller object is destroyed in the exception
+        // unwinding.
        std::optional<uint16_t> alternator_port;
        if (_config.alternator_port()) {
            alternator_port = _config.alternator_port();
+            _listen_addresses.push_back({addr, *alternator_port});
        }
        std::optional<uint16_t> alternator_https_port;
        std::optional<tls::credentials_builder> creds;
        if (_config.alternator_https_port()) {
            alternator_https_port = _config.alternator_https_port();
+            _listen_addresses.push_back({addr, *alternator_https_port});
            creds.emplace();
            auto opts = _config.alternator_encryption_options();
            if (opts.empty()) {
@@ -102,7 +109,13 @@ future<> controller::start() {
            }
            opts.erase("require_client_auth");
            opts.erase("truststore");
-            utils::configure_tls_creds_builder(creds.value(), std::move(opts)).get();
+            try {
+                utils::configure_tls_creds_builder(creds.value(), std::move(opts)).get();
+            } catch(...) {
+                logger.error("Failed to set up Alternator TLS credentials: {}", std::current_exception());
+                stop_server().get();
+                std::throw_with_nested(std::runtime_error("Failed to set up Alternator TLS credentials"));
+            }
        }
        bool alternator_enforce_authorization = _config.alternator_enforce_authorization();
        _server.invoke_on_all(
@@ -110,6 +123,10 @@ future<> controller::start() {
            return server.init(addr, alternator_port, alternator_https_port, creds, alternator_enforce_authorization,
                    &_memory_limiter.local().get_semaphore(),
                    _config.max_concurrent_requests_per_shard);
+        }).handle_exception([this, addr, alternator_port, alternator_https_port] (std::exception_ptr ep) {
+            logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
+                    addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF", ep);
+            return stop_server().then([ep = std::move(ep)] { return make_exception_future<>(ep); });
        }).then([addr, alternator_port, alternator_https_port] {
            logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}",
                    addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF");
@@ -117,12 +134,20 @@ future<> controller::start() {
    });
 }

-future<> controller::stop() {
+future<> controller::stop_server() {
    return seastar::async([this] {
+        if (!_ssg) {
+            return;
+        }
        _server.stop().get();
        _executor.stop().get();
+        _listen_addresses.clear();
        destroy_smp_service_group(_ssg.value()).get();
    });
 }

+future<> controller::request_stop_server() {
+    return stop_server();
+}
+
 }
--- a/alternator/controller.hh
+++ b/alternator/controller.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -24,6 +11,8 @@
 #include <seastar/core/sharded.hh>
 #include <seastar/core/smp.hh>

+#include "protocol_server.hh"
+
 namespace service {
 class storage_proxy;
 class migration_manager;
@@ -47,12 +36,17 @@ class gossiper;

 namespace alternator {

+// This is the official DynamoDB API version.
+// It represents the last major reorganization of that API, and all the features
+// that were added since did NOT increment this version string.
+constexpr const char* version = "2012-08-10";
+
 using namespace seastar;

 class executor;
 class server;

-class controller {
+class controller : public protocol_server {
    sharded<gms::gossiper>& _gossiper;
    sharded<service::storage_proxy>& _proxy;
    sharded<service::migration_manager>& _mm;
@@ -61,6 +55,7 @@ class controller {
    sharded<service::memory_limiter>& _memory_limiter;
    const db::config& _config;

+    std::vector<socket_address> _listen_addresses;
    sharded<executor> _executor;
    sharded<server> _server;
    std::optional<smp_service_group> _ssg;
@@ -75,8 +70,13 @@ public:
        sharded<service::memory_limiter>& memory_limiter,
        const db::config& config);

-    future<> start();
-    future<> stop();
+    virtual sstring name() const override;
+    virtual sstring protocol() const override;
+    virtual sstring protocol_version() const override;
+    virtual std::vector<socket_address> listen_addresses() const override;
+    virtual future<> start_server() override;
+    virtual future<> stop_server() override;
+    virtual future<> request_stop_server() override;
 };

 }
--- a/alternator/error.hh
+++ b/alternator/error.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -83,6 +70,9 @@ public:
    static api_error request_limit_exceeded(std::string msg) {
        return api_error("RequestLimitExceeded", std::move(msg));
    }
+    static api_error serialization(std::string msg) {
+        return api_error("SerializationException", std::move(msg));
+    }
    static api_error internal(std::string msg) {
        return api_error("InternalServerError", std::move(msg), reply::status_type::internal_server_error);
    }
--- a/alternator/executor.cc
+++ b/alternator/executor.cc
--- a/alternator/executor.hh
+++ b/alternator/executor.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -73,6 +60,16 @@ public:
    explicit make_jsonable(rjson::value&& value);
    std::string to_json() const override;
 };
+
+/**
+ * Make return type for serializing the object "streamed",
+ * i.e. direct to HTTP output stream. Note: only useful for
+ * (very) large objects as there are overhead issues with this
+ * as well, but for massive lists of return objects this can
+ * help avoid large allocations/many re-allocs
+ */ 
+json::json_return_type make_streamed(rjson::value&&);
+
 struct json_string : public json::jsonable {
    std::string _value;
 public:
@@ -85,6 +82,7 @@ class path;
 };

 const std::map<sstring, sstring>& get_tags_of_table(schema_ptr schema);
+std::optional<std::string> find_tag(const schema& s, const sstring& tag);
 future<> update_tags(service::migration_manager& mm, schema_ptr schema, std::map<sstring, sstring>&& tags_map);
 schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);

@@ -197,8 +195,6 @@ public:
    future<> start();
    future<> stop() { return make_ready_future<>(); }

-    future<> create_keyspace(std::string_view keyspace_name);
-
    static sstring table_name(const schema&);
    static db::timeout_clock::time_point default_timeout();
    static void set_default_timeout(db::timeout_clock::duration timeout);
@@ -228,9 +224,9 @@ public:
        rjson::value&,
        bool = false);

-    void add_stream_options(const rjson::value& stream_spec, schema_builder&) const;
-    void supplement_table_info(rjson::value& descr, const schema& schema) const;
-    void supplement_table_stream_info(rjson::value& descr, const schema& schema) const;
+    static void add_stream_options(const rjson::value& stream_spec, schema_builder&, service::storage_proxy& sp);
+    static void supplement_table_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
+    static void supplement_table_stream_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
 };

 }
--- a/alternator/expressions.cc
+++ b/alternator/expressions.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "expressions.hh"
--- a/alternator/expressions.g
+++ b/alternator/expressions.g
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 /*
--- a/alternator/expressions.hh
+++ b/alternator/expressions.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/expressions_types.hh
+++ b/alternator/expressions_types.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/rmw_operation.hh
+++ b/alternator/rmw_operation.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/serialization.cc
+++ b/alternator/serialization.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "utils/base64.hh"
@@ -259,11 +246,9 @@ big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic) {
        throw api_error::validation(format("{}: expected number, found type '{}'", diagnostic, it->name));
    }
    try {
-        if (it->value.IsNumber()) {
-             // FIXME(sarna): should use big_decimal constructor with numeric values directly:
-            return big_decimal(rjson::print(it->value));
-        }
        if (!it->value.IsString()) {
+            // We shouldn't reach here. Callers normally validate their input
+            // earlier with validate_value().
            throw api_error::validation(format("{}: improperly formatted number constant", diagnostic));
        }
        return big_decimal(rjson::to_string_view(it->value));
@@ -272,6 +257,21 @@ big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic) {
    }
 }

+std::optional<big_decimal> try_unwrap_number(const rjson::value& v) {
+    if (!v.IsObject() || v.MemberCount() != 1) {
+        return std::nullopt;
+    }
+    auto it = v.MemberBegin();
+    if (it->name != "N" || !it->value.IsString()) {
+        return std::nullopt;
+    }
+    try {
+        return big_decimal(rjson::to_string_view(it->value));
+    } catch (const marshal_exception& e) {
+        return std::nullopt;
+    }
+}
+
 const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value& v) {
    if (!v.IsObject() || v.MemberCount() != 1) {
        return {"", nullptr};
@@ -279,7 +279,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
    auto it = v.MemberBegin();
    const std::string it_key = it->name.GetString();
    if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
-        return {"", nullptr};
+        return {std::move(it_key), nullptr};
    }
    return std::make_pair(it_key, &(it->value));
 }
@@ -349,7 +349,7 @@ std::optional<rjson::value> set_diff(const rjson::value& v1, const rjson::value&
    auto [set1_type, set1] = unwrap_set(v1);
    auto [set2_type, set2] = unwrap_set(v2);
    if (set1_type != set2_type) {
-        throw api_error::validation(format("Mismatched set types: {} and {}", set1_type, set2_type));
+        throw api_error::validation(format("Set DELETE type mismatch: {} and {}", set1_type, set2_type));
    }
    if (!set1 || !set2) {
        throw api_error::validation("UpdateExpression: DELETE operation can only be performed on a set");
--- a/alternator/serialization.hh
+++ b/alternator/serialization.hh
@@ -3,26 +3,14 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once

 #include <string>
 #include <string_view>
+#include <optional>
 #include "types.hh"
 #include "schema_fwd.hh"
 #include "keys.hh"
@@ -64,6 +52,10 @@ clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
 // raises ValidationException with diagnostic.
 big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);

+// try_unwrap_number is like unwrap_number, but returns an unset optional
+// when the given v does not encode a number.
+std::optional<big_decimal> try_unwrap_number(const rjson::value& v);
+
 // Check if a given JSON object encodes a set (i.e., it is a {"SS": [...]}, or "NS", "BS"
 // and returns set's type and a pointer to that set. If the object does not encode a set,
 // returned value is {"", nullptr}
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "alternator/server.hh"
@@ -26,6 +13,7 @@
 #include <seastar/core/coroutine.hh>
 #include <seastar/json/json_elements.hh>
 #include <seastar/util/defer.hh>
+#include <seastar/util/short_streams.hh>
 #include "seastarx.hh"
 #include "error.hh"
 #include "utils/rjson.hh"
@@ -399,7 +387,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
    }
    auto units = co_await std::move(units_fut);
    assert(req->content_stream);
-    chunked_content content = co_await httpd::read_entire_stream(*req->content_stream);
+    chunked_content content = co_await util::read_entire_stream(*req->content_stream);
    auto username = co_await verify_signature(*req, content);

    if (slogger.is_enabled(log_level::trace)) {
@@ -545,36 +533,28 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
                " must be specified in order to init an alternator HTTP server instance"));
    }
    return seastar::async([this, addr, port, https_port, creds] {
-        try {
-            _executor.start().get();
+        _executor.start().get();

-            if (port) {
-                set_routes(_http_server._routes);
-                _http_server.set_content_length_limit(server::content_length_limit);
-                _http_server.set_content_streaming(true);
-                _http_server.listen(socket_address{addr, *port}).get();
-                _enabled_servers.push_back(std::ref(_http_server));
-            }
-            if (https_port) {
-                set_routes(_https_server._routes);
-                _https_server.set_content_length_limit(server::content_length_limit);
-                _https_server.set_content_streaming(true);
-                _https_server.set_tls_credentials(creds->build_reloadable_server_credentials([](const std::unordered_set<sstring>& files, std::exception_ptr ep) {
-                    if (ep) {
-                        slogger.warn("Exception loading {}: {}", files, ep);
-                    } else {
-                        slogger.info("Reloaded {}", files);
-                    }
-                }).get0());
-                _https_server.listen(socket_address{addr, *https_port}).get();
-                _enabled_servers.push_back(std::ref(_https_server));
-            }
-        } catch (...) {
-            slogger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
-                    addr, port ? std::to_string(*port) : "OFF", https_port ? std::to_string(*https_port) : "OFF", std::current_exception());
-            std::throw_with_nested(std::runtime_error(
-                    format("Failed to set up Alternator HTTP server on {} port {}, TLS port {}",
-                            addr, port ? std::to_string(*port) : "OFF", https_port ? std::to_string(*https_port) : "OFF")));
+        if (port) {
+            set_routes(_http_server._routes);
+            _http_server.set_content_length_limit(server::content_length_limit);
+            _http_server.set_content_streaming(true);
+            _http_server.listen(socket_address{addr, *port}).get();
+            _enabled_servers.push_back(std::ref(_http_server));
+        }
+        if (https_port) {
+            set_routes(_https_server._routes);
+            _https_server.set_content_length_limit(server::content_length_limit);
+            _https_server.set_content_streaming(true);
+            _https_server.set_tls_credentials(creds->build_reloadable_server_credentials([](const std::unordered_set<sstring>& files, std::exception_ptr ep) {
+                if (ep) {
+                    slogger.warn("Exception loading {}: {}", files, ep);
+                } else {
+                    slogger.info("Reloaded {}", files);
+                }
+            }).get0());
+            _https_server.listen(socket_address{addr, *https_port}).get();
+            _enabled_servers.push_back(std::ref(_https_server));
        }
    });
 }
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/stats.cc
+++ b/alternator/stats.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "stats.hh"
--- a/alternator/stats.hh
+++ b/alternator/stats.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/streams.cc
+++ b/alternator/streams.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include <type_traits>
@@ -28,7 +15,6 @@

 #include "utils/base64.hh"
 #include "log.hh"
-#include "database.hh"
 #include "db/config.hh"

 #include "cdc/log.hh"
@@ -155,24 +141,29 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
    auto limit = rjson::get_opt<int>(request, "Limit").value_or(std::numeric_limits<int>::max());
    auto streams_start = rjson::get_opt<stream_arn>(request, "ExclusiveStartStreamArn");
    auto table = find_table(_proxy, request);
-    auto& db = _proxy.get_db().local();
-    auto& cfs = db.get_column_families();
-    auto i = cfs.begin();
-    auto e = cfs.end();
+    auto db = _proxy.data_dictionary();
+    auto cfs = db.get_tables();

    if (limit < 1) {
        throw api_error::validation("Limit must be 1 or more");
    }

-    // TODO: the unordered_map here is not really well suited for partial
-    // querying - we're sorting on local hash order, and creating a table
-    // between queries may or may not miss info. But that should be rare,
-    // and we can probably expect this to be a single call.
+    // # 12601 (maybe?) - sort the set of tables on ID. This should ensure we never
+    // generate duplicates in a paged listing here. Can obviously miss things if they 
+    // are added between paged calls and end up with a "smaller" UUID/ARN, but that 
+    // is to be expected.
+    std::sort(cfs.begin(), cfs.end(), [](const data_dictionary::table& t1, const data_dictionary::table& t2) {
+        return t1.schema()->id() < t2.schema()->id();
+    });
+
+    auto i = cfs.begin();
+    auto e = cfs.end();
+
    if (streams_start) {
-        i = std::find_if(i, e, [&](const std::pair<utils::UUID, lw_shared_ptr<column_family>>& p) {
-            return p.first == streams_start 
-                && cdc::get_base_table(db, *p.second->schema())
-                && is_alternator_keyspace(p.second->schema()->ks_name())
+        i = std::find_if(i, e, [&](const data_dictionary::table& t) {
+            return t.schema()->id() == streams_start 
+                && cdc::get_base_table(db.real_database(), *t.schema())
+                && is_alternator_keyspace(t.schema()->ks_name())
                ;
        });
        if (i != e) {
@@ -186,7 +177,7 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
    std::optional<stream_arn> last;

    for (;limit > 0 && i != e; ++i) {
-        auto s = i->second->schema();
+        auto s = i->schema();
        auto& ks_name = s->ks_name();
        auto& cf_name = s->cf_name();

@@ -196,14 +187,14 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
        if (table && ks_name != table->ks_name()) {
            continue;
        }
-        if (cdc::is_log_for_some_table(db, ks_name, cf_name)) {
-            if (table && table != cdc::get_base_table(db, *s)) {
+        if (cdc::is_log_for_some_table(db.real_database(), ks_name, cf_name)) {
+            if (table && table != cdc::get_base_table(db.real_database(), *s)) {
                continue;
            }

            rjson::value new_entry = rjson::empty_object();

-            last = i->first;
+            last = i->schema()->id();
            rjson::add(new_entry, "StreamArn", *last);
            rjson::add(new_entry, "StreamLabel", rjson::from_string(stream_label(*s)));
            rjson::add(new_entry, "TableName", rjson::from_string(cdc::base_name(table_name(*s))));
@@ -424,7 +415,7 @@ using namespace std::string_literals;
 * This will be a partial overlap, but it is the best we can do.
 */

-static std::chrono::seconds confidence_interval(const database& db) {
+static std::chrono::seconds confidence_interval(data_dictionary::database db) {
    return std::chrono::seconds(db.get_config().alternator_streams_time_window_s());
 }

@@ -442,12 +433,12 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    auto stream_arn = rjson::get<alternator::stream_arn>(request, "StreamArn");

    schema_ptr schema, bs;
-    auto& db = _proxy.get_db().local();
+    auto db = _proxy.data_dictionary();

    try {
-        auto& cf = db.find_column_family(stream_arn);
+        auto cf = db.find_column_family(stream_arn);
        schema = cf.schema();
-        bs = cdc::get_base_table(_proxy.get_db().local(), *schema);
+        bs = cdc::get_base_table(db.real_database(), *schema);
    } catch (...) {        
    }
 
@@ -505,7 +496,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
    // filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
    auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);

-    return _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners }).then([this, &db, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)] (std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
+    return _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners }).then([this, db, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)] (std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {

        auto e = topologies.end();
        auto prev = e;
@@ -726,18 +717,18 @@ future<executor::request_return_type> executor::get_shard_iterator(client_state&
    }

    auto stream_arn = rjson::get<alternator::stream_arn>(request, "StreamArn");
-    auto& db = _proxy.get_db().local();
+    auto db = _proxy.data_dictionary();
    
    schema_ptr schema = nullptr;
    std::optional<shard_id> sid;

    try {
-        auto& cf = db.find_column_family(stream_arn);
+        auto cf = db.find_column_family(stream_arn);
        schema = cf.schema();
        sid = rjson::get<shard_id>(request, "ShardId");
    } catch (...) {
    }
-    if (!schema || !cdc::get_base_table(db, *schema) || !is_alternator_keyspace(schema->ks_name())) {
+    if (!schema || !cdc::get_base_table(db.real_database(), *schema) || !is_alternator_keyspace(schema->ks_name())) {
        throw api_error::resource_not_found("Invalid StreamArn");
    }
    if (!sid) {
@@ -814,12 +805,12 @@ future<executor::request_return_type> executor::get_records(client_state& client
        throw api_error::validation("Limit must be 1 or more");
    }

-    auto& db = _proxy.get_db().local();
+    auto db = _proxy.data_dictionary();
    schema_ptr schema, base;
    try {
-        auto& log_table = db.find_column_family(iter.table);
+        auto log_table = db.find_column_family(iter.table);
        schema = log_table.schema();
-        base = cdc::get_base_table(db, *schema);
+        base = cdc::get_base_table(db.real_database(), *schema);
    } catch (...) {        
    }

@@ -1023,8 +1014,8 @@ future<executor::request_return_type> executor::get_records(client_state& client

        // ugh. figure out if we are and end-of-shard
        auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
-        
-        return _sdks.cdc_current_generation_timestamp({ normal_token_owners }).then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
+
+        return _sdks.cdc_current_generation_timestamp({ normal_token_owners }).then([this, iter, high_ts, start_time, ret = std::move(ret), nrecords](db_clock::time_point ts) mutable {
            auto& shard = iter.shard;            

            if (shard.time < ts && ts < high_ts) {
@@ -1041,19 +1032,23 @@ future<executor::request_return_type> executor::get_records(client_state& client
                rjson::add(ret, "NextShardIterator", iter);
            }
            _stats.api_operations.get_records_latency.add(std::chrono::steady_clock::now() - start_time);
+            // TODO: determine a better threshold...
+            if (nrecords > 10) {
+                return make_ready_future<executor::request_return_type>(make_streamed(std::move(ret)));
+            }
            return make_ready_future<executor::request_return_type>(make_jsonable(std::move(ret)));
        });
    });
 }

-void executor::add_stream_options(const rjson::value& stream_specification, schema_builder& builder) const {
+void executor::add_stream_options(const rjson::value& stream_specification, schema_builder& builder, service::storage_proxy& sp) {
    auto stream_enabled = rjson::find(stream_specification, "StreamEnabled");
    if (!stream_enabled || !stream_enabled->IsBool()) {
        throw api_error::validation("StreamSpecification needs boolean StreamEnabled");
    }

    if (stream_enabled->GetBool()) {
-        auto& db = _proxy.get_db().local();
+        auto db = sp.data_dictionary();

        if (!db.features().cluster_supports_cdc()) {
            throw api_error::validation("StreamSpecification: streams (CDC) feature not enabled in cluster.");
@@ -1090,11 +1085,11 @@ void executor::add_stream_options(const rjson::value& stream_specification, sche
    }
 }

-void executor::supplement_table_stream_info(rjson::value& descr, const schema& schema) const {
+void executor::supplement_table_stream_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp) {
    auto& opts = schema.cdc_options();
    if (opts.enabled()) {
-        auto& db = _proxy.get_db().local();
-        auto& cf = db.find_column_family(schema.ks_name(), cdc::log_name(schema.cf_name()));
+        auto db = sp.data_dictionary();
+        auto cf = db.find_table(schema.ks_name(), cdc::log_name(schema.cf_name()));
        stream_arn arn(cf.schema()->id());
        rjson::add(descr, "LatestStreamArn", arn);
        rjson::add(descr, "LatestStreamLabel", rjson::from_string(stream_label(*cf.schema())));
--- a/alternator/tags_extension.hh
+++ b/alternator/tags_extension.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/alternator/ttl.cc
+++ b/alternator/ttl.cc
@@ -3,30 +3,51 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+#include <chrono>
+#include <cstdint>
+#include <optional>
 #include <seastar/core/sstring.hh>
 #include <seastar/core/coroutine.hh>
+#include <seastar/core/sleep.hh>
+#include <seastar/core/future.hh>
+#include <seastar/coroutine/maybe_yield.hh>
+#include <boost/multiprecision/cpp_int.hpp>

-#include "executor.hh"
+#include "gms/gossiper.hh"
+#include "gms/inet_address.hh"
+#include "inet_address_vectors.hh"
+#include "locator/abstract_replication_strategy.hh"
+#include "log.hh"
+#include "gc_clock.hh"
+#include "replica/database.hh"
+#include "service_permit.hh"
+#include "timestamp.hh"
 #include "service/storage_proxy.hh"
+#include "service/pager/paging_state.hh"
+#include "service/pager/query_pagers.hh"
 #include "gms/feature_service.hh"
-#include "database.hh"
+#include "sstables/types.hh"
+#include "mutation.hh"
+#include "types.hh"
+#include "types/map.hh"
 #include "utils/rjson.hh"
+#include "utils/big_decimal.hh"
+#include "utils/fb_utilities.hh"
+#include "cql3/selection/selection.hh"
+#include "cql3/values.hh"
+#include "cql3/query_options.hh"
+#include "cql3/column_identifier.hh"
+#include "alternator/executor.hh"
+#include "alternator/controller.hh"
+#include "alternator/serialization.hh"
+#include "dht/sharder.hh"
+
+#include "ttl.hh"
+
+static logging::logger tlogger("alternator_ttl");

 namespace alternator {

@@ -41,8 +62,8 @@ static const sstring TTL_TAG_KEY("system:ttl_attribute");

 future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
    _stats.api_operations.update_time_to_live++;
-    if (!_proxy.get_db().local().features().cluster_supports_alternator_ttl()) {
-        co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Experimental support is available if the 'alternator_ttl' experimental feature is enabled on all nodes.");
+    if (!_proxy.data_dictionary().features().cluster_supports_alternator_ttl()) {
+        co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Experimental support is available if the 'alternator-ttl' experimental feature is enabled on all nodes.");
    }

    schema_ptr schema = get_table(_proxy, request);
@@ -94,10 +115,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
 }

 future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
-    _stats.api_operations.update_time_to_live++;
-    if (!_proxy.get_db().local().features().cluster_supports_alternator_ttl()) {
-        co_return api_error::unknown_operation("DescribeTimeToLive not yet supported. Experimental support is available if the 'alternator_ttl' experimental feature is enabled on all nodes.");
-    }
+    _stats.api_operations.describe_time_to_live++;
    schema_ptr schema = get_table(_proxy, request);
    std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
    rjson::value desc = rjson::empty_object();
@@ -113,4 +131,653 @@ future<executor::request_return_type> executor::describe_time_to_live(client_sta
    co_return make_jsonable(std::move(response));
 }

+// expiration_service is a sharded service responsible for cleaning up expired
+// items in all tables with per-item expiration enabled. Currently, this means
+// Alternator tables with TTL configured via a UpdateTimeToLeave request.
+//
+// Here is a brief overview of how the expiration service works:
+//
+// An expiration thread on each shard periodically scans the items (i.e.,
+// rows) owned by this shard, looking for items whose chosen expiration-time
+// attribute indicates they are expired, and deletes those items.
+// The expiration-time "attribute" can be either an actual Scylla column
+// (must be numeric) or an Alternator "attribute" - i.e., an element in
+// the ATTRS_COLUMN_NAME map<utf8,bytes> column where the numeric expiration
+// time is encoded in DynamoDB's JSON encoding inside the bytes value.
+// To avoid scanning the same items RF times in RF replicas, only one node is
+// responsible for scanning a token range at a time. Normally, this is the
+// node owning this range as a "primary range" (the first node in the ring
+// with this range), but when this node is down, other nodes may take over
+// (FIXME: this is not implemented yet).
+// An expiration thread is reponsible for all tables which need expiration
+// scans. FIXME: explain how this is done with multiple tables - parallel,
+// staggered, or what?
+// The expiration thread scans item using CL=QUORUM to ensures that it reads
+// a consistent expiration-time attribute. This means that the items are read
+// locally and in addition QUORUM-1 additional nodes (one additional node
+// when RF=3) need to read the data and send digests.
+// FIXME: explain if we can read the exact attribute or the entire map.
+// When the expiration thread decides that an item has expired and wants
+// to delete it, it does it using a CL=QUORUM write. This allows this
+// deletion to be visible for consistent (quorum) reads. The deletion,
+// like user deletions, will also appear on the CDC log and therefore
+// Alternator Streams if enabled (FIXME: explain how we mark the
+// deletion different from user deletes. We don't do it yet.).
+expiration_service::expiration_service(data_dictionary::database db, service::storage_proxy& proxy)
+        : _db(db)
+        , _proxy(proxy)
+{
+    //FIXME: add metrics for the service
+    //setup_metrics();
+}
+
+// Convert the big_decimal used to represent expiration time to an integer.
+// Any fractional part is dropped. If the number is negative or invalid,
+// 0 is returned, and if it's too high, the maximum unsigned long is returned.
+static unsigned long bigdecimal_to_ul(const big_decimal& bd) {
+    // The big_decimal format has an integer mantissa of arbitrary length
+    // "unscaled_value" and then a (power of 10) exponent "scale".
+    if (bd.unscaled_value() <= 0) {
+        return 0;
+    }
+    if (bd.scale() == 0) {
+        // The fast path, when the expiration time is an integer, scale==0.
+        return static_cast<unsigned long>(bd.unscaled_value());
+    }
+    // Because the mantissa can be of arbitrary length, we work on it
+    // as a string. TODO: find a less ugly algorithm.
+    auto str = bd.unscaled_value().str();
+    if (bd.scale() > 0) {
+        int len = str.length();
+        if (len < bd.scale()) {
+            return 0;
+        }
+        str = str.substr(0, len-bd.scale());
+    } else {
+        if (bd.scale() < -20) {
+            return std::numeric_limits<unsigned long>::max();
+        }
+        for (int i = 0; i < -bd.scale(); i++) {
+            str.push_back('0');
+        }
+    }
+    // strtoul() returns ULONG_MAX if the number is too large, or 0 if not
+    // a number.
+    return strtoul(str.c_str(), nullptr, 10);
+}
+
+// The following is_expired() functions all check if an item with the given
+// expiration time has expired, according to the DynamoDB API rules.
+// The rules are:
+// 1. If the expiration time attribute's value is not a number type,
+//    the item is not expired.
+// 2. The expiration time is measured in seconds since the UNIX epoch.
+// 3. If the expiration time is more than 5 years in the past, it is assumed
+//    to be malformed and ignored - and the item does not expire.
+static bool is_expired(gc_clock::time_point expiration_time, gc_clock::time_point now) {
+    return expiration_time <= now &&
+           expiration_time > now - std::chrono::years(5);
+}
+
+static bool is_expired(const big_decimal& expiration_time, gc_clock::time_point now) {
+    unsigned long t = bigdecimal_to_ul(expiration_time);
+    // We assume - and the assumption turns out to be correct - that the
+    // epoch of gc_clock::time_point and the one used by the DynamoDB protocol
+    // are the same (the UNIX epoch in UTC). The resolution (seconds) is also
+    // the same.
+    return is_expired(gc_clock::time_point(gc_clock::duration(std::chrono::seconds(t))), now);
+}
+static bool is_expired(const rjson::value& expiration_time, gc_clock::time_point now) {
+    std::optional<big_decimal> n = try_unwrap_number(expiration_time);
+    return n && is_expired(*n, now);
+}
+
+// expire_item() expires an item - i.e., deletes it as appropriate for
+// expiration - with CL=QUORUM and (FIXME!) in a way Alternator Streams
+// understands it is an expiration event - not a user-initiated deletion.
+static future<> expire_item(service::storage_proxy& proxy,
+                            const service::query_state& qs,
+                            const std::vector<bytes_opt>& row,
+                            schema_ptr schema,
+                            api::timestamp_type ts) {
+    // Prepare the row key to delete
+    // NOTICE: the order of columns is guaranteed by the fact that selection::wildcard
+    // is used, which indicates that columns appear in the order defined by
+    // schema::all_columns_in_select_order() - partition key columns goes first,
+    // immediately followed by clustering key columns
+    std::vector<bytes> exploded_pk;
+    const unsigned pk_size = schema->partition_key_size();
+    const unsigned ck_size = schema->clustering_key_size();
+    for (unsigned c = 0; c < pk_size; ++c) {
+        const auto& row_c = row[c];
+        if (!row_c) {
+            // This shouldn't happen - all key columns must have values.
+            // But if it ever happens, let's just *not* expire the item.
+            // FIXME: log or increment a metric if this happens.
+            return make_ready_future<>();
+        }
+        exploded_pk.push_back(*row_c);
+    }
+    auto pk = partition_key::from_exploded(exploded_pk);
+    mutation m(schema, pk);
+    // If there's no clustering key, a tombstone should be created directly
+    // on a partition, not on a clustering row - otherwise it will look like
+    // an open-ended range tombstone, which will crash on KA/LA sstable format.
+    // See issue #6035
+    if (ck_size == 0) {
+        m.partition().apply(tombstone(ts, gc_clock::now()));
+    } else {
+        std::vector<bytes> exploded_ck;
+        for (unsigned c = pk_size; c < pk_size + ck_size; ++c) {
+            const auto& row_c = row[c];
+            if (!row_c) {
+                // This shouldn't happen - all key columns must have values.
+                // But if it ever happens, let's just *not* expire the item.
+                // FIXME: log or increment a metric if this happens.
+                return make_ready_future<>();
+            }
+            exploded_ck.push_back(*row_c);
+        }
+        auto ck = clustering_key::from_exploded(exploded_ck);
+        m.partition().clustered_row(*schema, ck).apply(tombstone(ts, gc_clock::now()));
+    }
+    return proxy.mutate(std::vector<mutation>{std::move(m)},
+        db::consistency_level::LOCAL_QUORUM,
+        executor::default_timeout(), // FIXME - which timeout?
+        qs.get_trace_state(), qs.get_permit());
+}
+
+static size_t random_offset(size_t min, size_t max) {
+    static thread_local std::default_random_engine re{std::random_device{}()};
+    std::uniform_int_distribution<size_t> dist(min, max);
+    return dist(re);
+}
+
+// Get a list of secondary token ranges for the given node, and the primary
+// node responsible for each of these token ranges.
+// A "secondary range" is a range of tokens where for each token, the second
+// node (in ring order) out of the RF replicas that hold this token is the
+// given node.
+// In the expiration scanner, we want to scan a secondary range but only if
+// this range's primary node is down. For this we need to return not just
+// a list of this node's secondary ranges - but also the primary owner of
+// each of those ranges.
+static std::vector<std::pair<dht::token_range, gms::inet_address>> get_secondary_ranges(
+        const locator::effective_replication_map_ptr& erm,
+        gms::inet_address ep) {
+    const auto& tm = *erm->get_token_metadata_ptr();
+    const auto& sorted_tokens = tm.sorted_tokens();
+    std::vector<std::pair<dht::token_range, gms::inet_address>> ret;
+    if (sorted_tokens.empty()) {
+        on_internal_error(tlogger, "Token metadata is empty");
+    }
+    auto prev_tok = sorted_tokens.back();
+    for (const auto& tok : sorted_tokens) {
+        inet_address_vector_replica_set eps = erm->get_natural_endpoints(tok);
+        if (eps.size() <= 1 || eps[1] != ep) {
+            prev_tok = tok;
+            continue;
+        }
+        // Add the range (prev_tok, tok] to ret. However, if the range wraps
+        // around, split it to two non-wrapping ranges.
+        if (prev_tok < tok) {
+            ret.emplace_back(
+                dht::token_range{
+                    dht::token_range::bound(prev_tok, false),
+                    dht::token_range::bound(tok, true)},
+                eps[0]);
+        } else {
+            ret.emplace_back(
+                dht::token_range{
+                    dht::token_range::bound(prev_tok, false),
+                    std::nullopt},
+                eps[0]);
+            ret.emplace_back(
+                dht::token_range{
+                    std::nullopt,
+                    dht::token_range::bound(tok, true)},
+                eps[0]);
+        }
+        prev_tok = tok;
+    }
+    return ret;
+}
+
+
+// A class for iterating over all the token ranges *owned* by this shard.
+// To avoid code duplication, it is a template with two distinct cases -
+// <primary> and <secondary>:
+//
+// In the <primary> case, we consider a token *owned* by this shard if:
+// 1. This node is a replica for this token.
+// 2. Moreover, this node is the *primary* replica of the token (i.e., the
+//    first replica in the ring).
+// 3. In this node, this shard is responsible for this token.
+// We will use this definition of which shard in the cluster owns which tokens
+// to split the expiration scanner's work between all the shards of the
+// system.
+//
+// In the <secondary> case, we consider a token *owned* by this shard if:
+// 1. This node is the *secondary* replica for this token (i.e., the second
+//    replica in the ring).
+// 2. The primary replica for this token is currently marked down.
+// 3. In this node, this shard is responsible for this token.
+// We use the <secondary> case to handle the possibility that some of the
+// nodes in the system are down. A dead node will not be expiring expiring
+// the tokens owned by it, so we want the secondary owner to take over its
+// primary ranges.
+//
+// FIXME: need to decide how to choose primary ranges in multi-DC setup!
+// We could call get_primary_ranges_within_dc() below instead of get_primary_ranges().
+// NOTICE: Iteration currently starts from a random token range in order to improve
+// the chances of covering all ranges during a scan when restarts occur.
+// A more deterministic way would be to regularly persist the scanning state,
+// but that incurs overhead that we want to avoid if not needed.
+enum primary_or_secondary_t {primary, secondary};
+template<primary_or_secondary_t primary_or_secondary>
+class token_ranges_owned_by_this_shard {
+    template<primary_or_secondary_t> class ranges_holder;
+    // ranges_holder<primary> holds just the primary ranges themselves
+    template<> class ranges_holder<primary> {
+        const dht::token_range_vector _token_ranges;
+     public:
+        ranges_holder(const locator::effective_replication_map_ptr& erm, gms::inet_address ep)
+            : _token_ranges(erm->get_primary_ranges(ep)) {}
+        std::size_t size() const { return _token_ranges.size(); }
+        const dht::token_range& operator[](std::size_t i) const {
+            return _token_ranges[i];
+        }
+        bool should_skip(std::size_t i) const {
+            return false;
+        }
+    };
+    // ranges_holder<secondary> holds the secondary token ranges plus each
+    // range's primary owner, needed to implement should_skip().
+    template<> class ranges_holder<secondary> {
+        std::vector<std::pair<dht::token_range, gms::inet_address>> _token_ranges;
+        gms::gossiper& _gossiper;
+     public:
+        ranges_holder(const locator::effective_replication_map_ptr& erm, gms::inet_address ep)
+            : _token_ranges(get_secondary_ranges(erm, ep))
+            , _gossiper(gms::get_local_gossiper()) {}
+        std::size_t size() const { return _token_ranges.size(); }
+        const dht::token_range& operator[](std::size_t i) const {
+            return _token_ranges[i].first;
+        }
+        // range i should be skipped if its primary owner is alive.
+        bool should_skip(std::size_t i) const {
+            return _gossiper.is_alive(_token_ranges[i].second);
+        }
+    };
+
+    schema_ptr _s;
+    // _token_ranges will contain a list of token ranges owned by this node.
+    // We'll further need to split each such range to the pieces owned by
+    // the current shard, using _intersecter.
+    const ranges_holder<primary_or_secondary> _token_ranges;
+    // NOTICE: _range_idx is used modulo _token_ranges size when accessing
+    // the data to ensure that it doesn't go out of bounds
+    size_t _range_idx;
+    size_t _end_idx;
+    std::optional<dht::selective_token_range_sharder> _intersecter;
+public:
+    token_ranges_owned_by_this_shard(replica::database& db, schema_ptr s)
+        :  _s(s)
+        , _token_ranges(db.find_keyspace(s->ks_name()).get_effective_replication_map(),
+                utils::fb_utilities::get_broadcast_address())
+        , _range_idx(random_offset(0, _token_ranges.size() - 1))
+        , _end_idx(_range_idx + _token_ranges.size())
+    {
+        tlogger.debug("Generating token ranges starting from base range {} of {}", _range_idx, _token_ranges.size());
+    }
+
+    // Return the next token_range owned by this shard, or nullopt when the
+    // iteration ends.
+    std::optional<dht::token_range> next() {
+        // We may need three or more iterations in the following loop if a
+        // vnode doesn't intersect with the given shard at all (such a small
+        // vnode is unlikely, but possible). The loop cannot be infinite
+        // because each iteration of the loop advances _range_idx.
+        for (;;) {
+            if (_intersecter) {
+                std::optional<dht::token_range> ret = _intersecter->next();
+                if (ret) {
+                    return ret;
+                }
+                // done with this range, go to next one
+                ++_range_idx;
+                _intersecter = std::nullopt;
+            }
+            if (_range_idx == _end_idx) {
+                return std::nullopt;
+            }
+            // If should_skip(), the range should be skipped. This happens for
+            // a secondary range whose primary owning node is still alive.
+            while (_token_ranges.should_skip(_range_idx % _token_ranges.size())) {
+                ++_range_idx;
+                if (_range_idx == _end_idx) {
+                    return std::nullopt;
+                }
+            }
+            _intersecter.emplace(_s->get_sharder(), _token_ranges[_range_idx % _token_ranges.size()], this_shard_id());
+        }
+    }
+
+    // Same as next(), just return a partition_range instead of token_range
+    std::optional<dht::partition_range> next_partition_range() {
+        std::optional<dht::token_range> ret = next();
+        if (ret) {
+            return dht::to_partition_range(*ret);
+        } else {
+            return std::nullopt;
+        }
+    }
+};
+
+// Precomputed information needed to perform a scan on partition ranges
+struct scan_ranges_context {
+    schema_ptr s;
+    bytes column_name;
+    std::optional<std::string> member;
+
+    ::shared_ptr<cql3::selection::selection> selection;
+    std::unique_ptr<service::query_state> query_state_ptr;
+    std::unique_ptr<cql3::query_options> query_options;
+    ::lw_shared_ptr<query::read_command> command;
+
+    scan_ranges_context(schema_ptr s, service::storage_proxy& proxy, bytes column_name, std::optional<std::string> member)
+        : s(s)
+        , column_name(column_name)
+        , member(member)
+    {
+        // FIXME: don't read the entire items - read only parts of it.
+        // We must read the key columns (to be able to delete) and also
+        // the requested attribute. If the requested attribute is a map's
+        // member we may be forced to read the entire map - but it would
+        // be good if we can read only the single item of the map - it
+        // should be possible (and a must for issue #7751!).
+        lw_shared_ptr<service::pager::paging_state> paging_state = nullptr;
+        auto regular_columns = boost::copy_range<query::column_id_vector>(
+            s->regular_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
+        selection = cql3::selection::selection::wildcard(s);
+        query::partition_slice::option_set opts = selection->get_query_options();
+        opts.set<query::partition_slice::option::allow_short_read>();
+        std::vector<query::clustering_range> ck_bounds{query::clustering_range::make_open_ended_both_sides()};
+        auto partition_slice = query::partition_slice(std::move(ck_bounds), {}, std::move(regular_columns), opts);
+        command = ::make_lw_shared<query::read_command>(s->id(), s->version(), partition_slice, proxy.get_max_result_size(partition_slice));
+        executor::client_state client_state{executor::client_state::internal_tag()};
+        tracing::trace_state_ptr trace_state;
+        // NOTICE: empty_service_permit is used because the TTL service has fixed parallelism
+        query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, empty_service_permit());
+        // FIXME: What should we do on multi-DC? Will we run the expiration on the same ranges on all
+        // DCs or only once for each range? If the latter, we need to change the CLs in the
+        // scanner and deleter.
+        db::consistency_level cl = db::consistency_level::LOCAL_QUORUM;
+        query_options = std::make_unique<cql3::query_options>(cl, std::vector<cql3::raw_value>{});
+        query_options = std::make_unique<cql3::query_options>(std::move(query_options), std::move(paging_state));
+    }
+};
+
+// Scan data in a list of token ranges in one table, looking for expired
+// items and deleting them.
+// Because of issue #9167, partition_ranges must have a single partition
+// for this code to work correctly.
+static future<> scan_table_ranges(
+        service::storage_proxy& proxy,
+        const scan_ranges_context& scan_ctx,
+        dht::partition_range_vector&& partition_ranges,
+        abort_source& abort_source,
+        named_semaphore& page_sem)
+{
+    const schema_ptr& s = scan_ctx.s;
+    assert (partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
+    auto p = service::pager::query_pagers::pager(proxy, s, scan_ctx.selection, *scan_ctx.query_state_ptr,
+            *scan_ctx.query_options, scan_ctx.command, std::move(partition_ranges), nullptr);
+    while (!p->is_exhausted()) {
+        if (abort_source.abort_requested()) {
+            co_return;
+        }
+        auto units = co_await get_units(page_sem, 1);
+        // We don't to limit page size in number of rows because there is a
+        // builtin limit of the page's size in bytes. Setting this limit to 1
+        // is useful for debugging the paging code with moderate-size data.
+        uint32_t limit = std::numeric_limits<uint32_t>::max();
+        // FIXME: which timeout?
+        // FIXME: if read times out, need to retry it.
+        std::unique_ptr<cql3::result_set> rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
+        auto rows = rs->rows();
+        auto meta = rs->get_metadata().get_names();
+        std::optional<unsigned> expiration_column;
+        for (unsigned i = 0; i < meta.size(); i++) {
+            const cql3::column_specification& col = *meta[i];
+            if (col.name->name() == scan_ctx.column_name) {
+                expiration_column = i;
+                break;
+            }
+        }
+        if (!expiration_column) {
+            continue;
+        }
+        for (const auto& row : rows) {
+            const bytes_opt& cell = row[*expiration_column];
+            if (!cell) {
+                continue;
+            }
+            auto v = meta[*expiration_column]->type->deserialize(*cell);
+            bool expired = false;
+            // FIXME: don't recalculate "now" all the time
+            auto now = gc_clock::now();
+            if (scan_ctx.member) {
+                // In this case, the expiration-time attribute we're
+                // looking for is a member in a map, saved serialized
+                // into bytes using Alternator's serialization (basically
+                // a JSON serialized into bytes)
+                // FIXME: is it possible to find a specific member of a map
+                // without iterating through it like we do here and compare
+                // the key?
+                for (const auto& entry : value_cast<map_type_impl::native_type>(v)) {
+                    std::string attr_name = value_cast<sstring>(entry.first);
+                    if (value_cast<sstring>(entry.first) == *scan_ctx.member) {
+                        bytes value = value_cast<bytes>(entry.second);
+                        rjson::value json = deserialize_item(value);
+                        expired = is_expired(json, now);
+                        break;
+                    }
+                }
+            } else {
+                // For a real column to contain an expiration time, it
+                // must be a numeric type.
+                // FIXME: Currently we only support decimal_type (which is
+                // what Alternator uses), but other numeric types can be
+                // supported as well to make this feature more useful in CQL.
+                // Note that kind::decimal is also checked above.
+                big_decimal n = value_cast<big_decimal>(v);
+                expired = is_expired(n, now);
+            }
+            if (expired) {
+                // FIXME: maybe don't recalculate new_timestamp() all the time
+                // FIXME: if expire_item() throws on timeout, we need to retry it.
+                auto ts = api::new_timestamp();
+                co_await expire_item(proxy, *scan_ctx.query_state_ptr, row, s, ts);
+            }
+        }
+        // FIXME: once in a while, persist p->state(), so on reboot
+        // we don't start from scratch.
+    }
+}
+
+// scan_table() scans data in one table "owned" by this shard, looking for
+// expired items and deleting them.
+// We consider each node to "own" its primary token ranges, i.e., the tokens
+// that this node is their first replica in the ring. Inside the node, each
+// shard "owns" subranges of the node's token ranges - according to the node's
+// sharding algorithm.
+// When a node goes down, the token ranges owned by it will not be scanned
+// and items in those token ranges will not expire, so in the future (FIXME)
+// this function should additionally work on token ranges whose primary owner
+// is down and this node is the range's secondary owner.
+// If the TTL (expiration-time scanning) feature is not enabled for this
+// table, scan_table() returns false without doing anything. Remember that the
+// TTL feature may be enabled later so this function will need to be called
+// again when the feature is enabled.
+// Currently this function scans the entire table (or, rather the parts owned
+// by this shard) at full rate, once. In the future (FIXME) we should consider
+// how to pace this scan, how and when to repeat it, how to interleave or
+// parallelize scanning of multiple tables, and how to continue scans after a
+// reboot.
+static future<bool> scan_table(
+    service::storage_proxy& proxy,
+    data_dictionary::database db,
+    schema_ptr s,
+    abort_source& abort_source,
+    named_semaphore& page_sem)
+{
+    // Check if an expiration-time attribute is enabled for this table.
+    // If not, just return false immediately.
+    std::optional<std::string> attribute_name = find_tag(*s, TTL_TAG_KEY);
+    if (!attribute_name) {
+        co_return false;
+    }
+    // attribute_name may be one of the schema's columns (in Alternator, this
+    // means it's a key column), or an element in Alternator's attrs map
+    // encoded in Alternator's JSON encoding.
+    // FIXME: To make this less Alternators-specific, we should encode in the
+    // single key's value three things:
+    // 1. The name of a column
+    // 2. Optionally if column is a map, a member in the map
+    // 3. The deserializer for the value: CQL or Alternator (JSON).
+    // The deserializer can be guessed: If the given column or map item is
+    // numeric, it can be used directly. If it is a "bytes" type, it needs to
+    // be deserialized using Alternator's deserializer.
+    bytes column_name = to_bytes(*attribute_name);
+    const column_definition *cd = s->get_column_definition(column_name);
+    std::optional<std::string> member;
+    if (!cd) {
+        member = std::move(attribute_name);
+        column_name = bytes(executor::ATTRS_COLUMN_NAME);
+        cd = s->get_column_definition(column_name);
+        tlogger.info("table {} TTL enabled with attribute {} in {}", s->cf_name(), *member, executor::ATTRS_COLUMN_NAME);
+    } else {
+        tlogger.info("table {} TTL enabled with attribute {}", s->cf_name(), *attribute_name);
+    }
+    if (!cd) {
+        tlogger.info("table {} TTL column is missing, not scanning", s->cf_name());
+        co_return false;
+    }
+    data_type column_type = cd->type;
+    // Verify that the column has the right type: If "member" exists
+    // the column must be a map, and if it doesn't, the column must
+    // (currently) be a decimal_type. If the column has the wrong type
+    // nothing can get expired in this table, and it's pointless to
+    // scan it.
+    if ((member && column_type->get_kind() != abstract_type::kind::map) ||
+        (!member && column_type->get_kind() != abstract_type::kind::decimal)) {
+        tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
+        co_return false;
+    }
+    // FIXME: need to pace the scan, not do it all at once.
+    // FIXME: consider if we should ask the scan without caching?
+    // can we use cache but not fill it?
+    scan_ranges_context scan_ctx{s, proxy, std::move(column_name), std::move(member)};
+    token_ranges_owned_by_this_shard<primary> my_ranges(db.real_database(), s);
+    while (std::optional<dht::partition_range> range = my_ranges.next_partition_range()) {
+        // Note that because of issue #9167 we need to run a separate
+        // query on each partition range, and can't pass several of
+        // them into one partition_range_vector.
+        dht::partition_range_vector partition_ranges;
+        partition_ranges.push_back(std::move(*range));
+        // FIXME: if scanning a single range fails, including network errors,
+        // we fail the entire scan (and rescan from the beginning). Need to
+        // reconsider this. Saving the scan position might be a good enough
+        // solution for this problem.
+        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem);
+    }
+    // If each node only scans its own primary ranges, then when any node is
+    // down part of the token range will not get scanned. This can be viewed
+    // as acceptable (when the comes back online, it will resume its scan),
+    // but as noted in issue #9787, we can allow more prompt expiration
+    // by tasking another node to take over scanning of the dead node's primary
+    // ranges. What we do here is that this node will also check expiration
+    // on its *secondary* ranges - but only those whose primary owner is down.
+    token_ranges_owned_by_this_shard<secondary> my_secondary_ranges(db.real_database(), s);
+    while (std::optional<dht::partition_range> range = my_secondary_ranges.next_partition_range()) {
+        dht::partition_range_vector partition_ranges;
+        partition_ranges.push_back(std::move(*range));
+        co_await scan_table_ranges(proxy, scan_ctx, std::move(partition_ranges), abort_source, page_sem);
+    }
+    co_return true;
+}
+
+
+future<> expiration_service::run() {
+    // FIXME: don't just tight-loop, think about timing, pace, and
+    // store position in durable storage, etc.
+    // FIXME: think about working on different tables in parallel.
+    // also need to notice when a new table is added, a table is
+    // deleted or when ttl is enabled or disabled for a table!
+    for (;;) {
+        // _db.tables() may change under our feet during a
+        // long-living loop, so we must keep our own copy of the list of
+        // schemas.
+        std::vector<schema_ptr> schemas;
+        for (auto cf : _db.get_tables()) {
+            schemas.push_back(cf.schema());
+        }
+        for (schema_ptr s : schemas) {
+            co_await coroutine::maybe_yield();
+            if (shutting_down()) {
+                co_return;
+            }
+            try {
+                co_await scan_table(_proxy, _db, s, _abort_source, _page_sem);
+            } catch (...) {
+                // The scan of a table may fail in the middle for many
+                // reasons, including network failure and even the table
+                // being removed. We'll continue scanning this table later
+                // (if it still exists). In any case it's important to catch
+                // the exception and not let the scanning service die for
+                // good.
+                // If the table has been deleted, it is expected that the scan
+                // will fail at some point, and even a warning is excessive.
+                if (_db.has_schema(s->ks_name(), s->cf_name())) {
+                    tlogger.warn("table {}.{} expiration scan failed: {}",
+                        s->ks_name(), s->cf_name(), std::current_exception());
+                } else {
+                    tlogger.info("expiration scan failed when table {}.{} was deleted",
+                        s->ks_name(), s->cf_name());
+                }
+            }
+        }
+        // FIXME: replace this silly 1-second sleep by something smarter.
+        try {
+            co_await seastar::sleep_abortable(std::chrono::seconds(1), _abort_source);
+        } catch(seastar::sleep_aborted&) {}
+    }
+}
+
+future<> expiration_service::start() {
+    // Called by main() on each shard to start the expiration-service
+    // thread. Just runs run() in the background and allows stop().
+    if (_db.features().cluster_supports_alternator_ttl()) {
+        if (!shutting_down()) {
+            _end = run().handle_exception([] (std::exception_ptr ep) {
+                tlogger.error("expiration_service failed: {}", ep);
+            });
+        }
+    }
+    return make_ready_future<>();
+}
+
+future<> expiration_service::stop() {
+    if (_abort_source.abort_requested()) {
+        throw std::logic_error("expiration_service::stop() called a second time");
+    }
+    _abort_source.request_abort();
+    if (!_end) {
+        // if _end is was not set, start() was never called
+        return make_ready_future<>();
+    }
+    return std::move(*_end);
+}
+
 } // namespace alternator
--- a/alternator/ttl.hh
+++ b/alternator/ttl.hh
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2021-present ScyllaDB
+ */
+
+/*
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "seastarx.hh"
+#include <seastar/core/sharded.hh>
+#include <seastar/core/abort_source.hh>
+#include <seastar/core/semaphore.hh>
+#include "data_dictionary/data_dictionary.hh"
+
+namespace replica {
+class database;
+}
+
+namespace service {
+    class storage_proxy;
+}
+
+namespace alternator {
+
+// expiration_service is a sharded service responsible for cleaning up expired
+// items in all tables with per-item expiration enabled. Currently, this means
+// Alternator tables with TTL configured via a UpdateTimeToLeave request.
+class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
+    data_dictionary::database _db;
+    service::storage_proxy& _proxy;
+    // _end is set by start(), and resolves when the the background service
+    // started by it ends. To ask the background service to end, _abort_source
+    // should be triggered. stop() below uses both _abort_source and _end.
+    std::optional<future<>> _end;
+    abort_source _abort_source;
+    // Ensures that at most 1 page of scan results at a time is processed by the TTL service
+    named_semaphore _page_sem{1, named_semaphore_exception_factory{"alternator_ttl"}};
+    bool shutting_down() { return _abort_source.abort_requested(); }
+public:
+    // sharded_service<expiration_service>::start() creates this object on
+    // all shards, so calls this constructor on each shard. Later, the
+    // additional start() function should be invoked on all shards.
+    expiration_service(data_dictionary::database, service::storage_proxy&);
+    future<> start();
+    future<> run();
+    // sharded_service<expiration_service>::stop() calls the following stop()
+    // method on each shard. This stop() asks the service on this shard to
+    // shut down as quickly as it can. The returned future indicates when the
+    // service is no longer running.
+    // stop() may be called even before start(), but may only be called once -
+    // calling it twice will result in an exception.
+    future<> stop();
+};
+
+} // namespace alternator
--- a/api/api-doc/compaction_manager.json
+++ b/api/api-doc/compaction_manager.json
@@ -102,7 +102,47 @@
               "parameters":[
                  {
                     "name":"type",
-                     "description":"the type of compaction to stop. Can be one of: - COMPACTION - VALIDATION - CLEANUP - SCRUB - INDEX_BUILD",
+                     "description":"The type of compaction to stop. Can be one of: COMPACTION | CLEANUP | SCRUB | UPGRADE | RESHAPE",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/compaction_manager/stop_keyspace_compaction/{keyspace}",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Stop all running compaction-like tasks in the given keyspace and tables having the provided type.",
+               "type":"void",
+               "nickname":"stop_keyspace_compaction",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"keyspace",
+                     "description":"The keyspace to stop compaction in",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  },
+                  {
+                     "name":"tables",
+                     "description":"Comma-seperated tables to stop compaction in",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"type",
+                     "description":"The type of compaction to stop. Can be one of: COMPACTION | CLEANUP | SCRUB | UPGRADE | RESHAPE",
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -624,7 +624,7 @@
                  },
                  {
                     "name":"kn",
-                     "description":"Comma seperated keyspaces name to snapshot",
+                     "description":"Keyspace(s) to snapshot. Multiple keyspaces can be provided using a comma-separated list. If omitted, snapshot all keyspaces.",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -632,7 +632,7 @@
                  },
                  {
                     "name":"cf",
-                     "description":"the column family to snapshot",
+                     "description":"Table(s) to snapshot. Multiple tables (in a single keyspace) can be provided using a comma-separated list. If omitted, snapshot all tables in the given keyspace(s).",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -765,6 +765,38 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/keyspace_offstrategy_compaction/{keyspace}",
+         "operations":[
+            {
+               "method":"POST",
+               "summary":"Perform offstrategy compaction, if needed, in a single keyspace",
+               "type":"boolean",
+               "nickname":"perform_keyspace_offstrategy_compaction",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"keyspace",
+                     "description":"The keyspace to operate on",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"path"
+                  },
+                  {
+                     "name":"cf",
+                     "description":"Comma-seperated table names",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/keyspace_scrub/{keyspace}",
         "operations":[
@@ -807,6 +839,19 @@
                     ],
                     "paramType":"query"
                  },
+                  {
+                     "name":"quarantine_mode",
+                     "description":"Controls whether to scrub quarantined sstables (default INCLUDE)",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "enum":[
+                        "INCLUDE",
+                        "EXCLUDE",
+                        "ONLY"
+                     ],
+                     "paramType":"query"
+                  },
                  {
                     "name":"keyspace",
                     "description":"The keyspace to query about",
--- a/api/api.cc
+++ b/api/api.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "api.hh"
@@ -49,7 +36,7 @@ namespace api {
 static std::unique_ptr<reply> exception_reply(std::exception_ptr eptr) {
    try {
        std::rethrow_exception(eptr);
-    } catch (const no_such_keyspace& ex) {
+    } catch (const replica::no_such_keyspace& ex) {
        throw bad_param_exception(ex.what());
    }
    // We never going to get here
@@ -180,9 +167,15 @@ future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_se
                });
 }

-future<> set_server_stream_manager(http_context& ctx) {
+future<> set_server_stream_manager(http_context& ctx, sharded<streaming::stream_manager>& sm) {
    return register_api(ctx, "stream_manager",
-                "The stream manager API", set_stream_manager);
+                "The stream manager API", [&sm] (http_context& ctx, routes& r) {
+                    set_stream_manager(ctx, r, sm);
+                });
+}
+
+future<> unset_server_stream_manager(http_context& ctx) {
+    return ctx.http_server.set_routes([&ctx] (routes& r) { unset_stream_manager(ctx, r); });
 }

 future<> set_server_cache(http_context& ctx) {
--- a/api/api.hh
+++ b/api/api.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -94,13 +81,6 @@ inline std::vector<sstring> split(const sstring& text, const char* separator) {
    return boost::split(tokens, text, boost::is_any_of(separator));
 }

-/**
- * Split a column family parameter
- */
-inline std::vector<sstring> split_cf(const sstring& cf) {
-    return split(cf, ",");
-}
-
 /**
 * A helper function to sum values on an a distributed object that
 * has a get_stats method.
--- a/api/api_init.hh
+++ b/api/api_init.hh
@@ -3,27 +3,14 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */
 #pragma once

 #include <seastar/http/httpd.hh>
 #include <seastar/core/future.hh>

-#include "database_fwd.hh"
+#include "replica/database_fwd.hh"
 #include "seastarx.hh"

 namespace service {
@@ -36,6 +23,10 @@ class storage_service;

 class sstables_loader;

+namespace streaming {
+class stream_manager;
+}
+
 namespace locator {

 class token_metadata;
@@ -68,12 +59,12 @@ struct http_context {
    sstring api_dir;
    sstring api_doc;
    httpd::http_server_control http_server;
-    distributed<database>& db;
+    distributed<replica::database>& db;
    distributed<service::storage_proxy>& sp;
    service::load_meter& lmeter;
    const sharded<locator::shared_token_metadata>& shared_token_metadata;

-    http_context(distributed<database>& _db,
+    http_context(distributed<replica::database>& _db,
            distributed<service::storage_proxy>& _sp,
            service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
            : db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm) {
@@ -103,7 +94,8 @@ future<> set_server_load_sstable(http_context& ctx);
 future<> set_server_messaging_service(http_context& ctx, sharded<netw::messaging_service>& ms);
 future<> unset_server_messaging_service(http_context& ctx);
 future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_service>& ss);
-future<> set_server_stream_manager(http_context& ctx);
+future<> set_server_stream_manager(http_context& ctx, sharded<streaming::stream_manager>& sm);
+future<> unset_server_stream_manager(http_context& ctx);
 future<> set_hinted_handoff(http_context& ctx, sharded<gms::gossiper>& g);
 future<> unset_hinted_handoff(http_context& ctx);
 future<> set_server_gossip_settle(http_context& ctx, sharded<gms::gossiper>& g);
--- a/api/cache_service.cc
+++ b/api/cache_service.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "cache_service.hh"
@@ -208,7 +195,7 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_capacity.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return ctx.db.map_reduce0([](database& db) -> uint64_t {
+        return ctx.db.map_reduce0([](replica::database& db) -> uint64_t {
            return db.row_cache_tracker().region().occupancy().used_space();
        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
            return make_ready_future<json::json_return_type>(res);
@@ -216,26 +203,26 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_hits.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.count();
        }, std::plus<uint64_t>());
    });

    cs::get_row_requests.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [](const column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count();
        }, std::plus<uint64_t>());
    });

    cs::get_row_hit_rate.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, ratio_holder(), [](const column_family& cf) {
+        return map_reduce_cf(ctx, ratio_holder(), [](const replica::column_family& cf) {
            return ratio_holder(cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count(),
                    cf.get_row_cache().stats().hits.count());
        }, std::plus<ratio_holder>());
    });

    cs::get_row_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -243,7 +230,7 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.rate() + cf.get_row_cache().stats().misses.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -253,7 +240,7 @@ void set_cache_service(http_context& ctx, routes& r) {
    cs::get_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        // In origin row size is the weighted size.
        // We currently do not support weights, so we use num entries instead
-        return ctx.db.map_reduce0([](database& db) -> uint64_t {
+        return ctx.db.map_reduce0([](replica::database& db) -> uint64_t {
            return db.row_cache_tracker().partitions();
        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
            return make_ready_future<json::json_return_type>(res);
@@ -261,7 +248,7 @@ void set_cache_service(http_context& ctx, routes& r) {
    });

    cs::get_row_entries.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return ctx.db.map_reduce0([](database& db) -> uint64_t {
+        return ctx.db.map_reduce0([](replica::database& db) -> uint64_t {
            return db.row_cache_tracker().partitions();
        }, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
            return make_ready_future<json::json_return_type>(res);
--- a/api/cache_service.hh
+++ b/api/cache_service.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/collectd.cc
+++ b/api/collectd.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "collectd.hh"
--- a/api/collectd.hh
+++ b/api/collectd.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "column_family.hh"
@@ -56,57 +43,57 @@ std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name) {
    return std::make_tuple(name.substr(0, pos), name.substr(end));
 }

-const utils::UUID& get_uuid(const sstring& ks, const sstring& cf, const database& db) {
+const utils::UUID& get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
    try {
        return db.find_uuid(ks, cf);
-    } catch (std::out_of_range& e) {
-        throw bad_param_exception(format("Column family '{}:{}' not found", ks, cf));
+    } catch (replica::no_such_column_family& e) {
+        throw bad_param_exception(e.what());
    }
 }

-const utils::UUID& get_uuid(const sstring& name, const database& db) {
+const utils::UUID& get_uuid(const sstring& name, const replica::database& db) {
    auto [ks, cf] = parse_fully_qualified_cf_name(name);
    return get_uuid(ks, cf, db);
 }

-future<> foreach_column_family(http_context& ctx, const sstring& name, function<void(column_family&)> f) {
+future<> foreach_column_family(http_context& ctx, const sstring& name, function<void(replica::column_family&)> f) {
    auto uuid = get_uuid(name, ctx.db.local());

-    return ctx.db.invoke_on_all([f, uuid](database& db) {
+    return ctx.db.invoke_on_all([f, uuid](replica::database& db) {
        f(db.find_column_family(uuid));
    });
 }

 future<json::json_return_type>  get_cf_stats(http_context& ctx, const sstring& name,
-        int64_t column_family_stats::*f) {
-    return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) {
+        int64_t replica::column_family_stats::*f) {
+    return map_reduce_cf(ctx, name, int64_t(0), [f](const replica::column_family& cf) {
        return cf.get_stats().*f;
    }, std::plus<int64_t>());
 }

 future<json::json_return_type>  get_cf_stats(http_context& ctx,
-        int64_t column_family_stats::*f) {
-    return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) {
+        int64_t replica::column_family_stats::*f) {
+    return map_reduce_cf(ctx, int64_t(0), [f](const replica::column_family& cf) {
        return cf.get_stats().*f;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
-    return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
+    return map_reduce_cf(ctx, name, int64_t(0), [f](const replica::column_family& cf) {
        return (cf.get_stats().*f).hist.count;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    auto uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([uuid, f](database& db) {
+    return ctx.db.map_reduce0([uuid, f](replica::database& db) {
        // Histograms information is sample of the actual load
        // so to get an estimation of sum, we multiply the mean
        // with count. The information is gather in nano second,
        // but reported in micro
-        column_family& cf = db.find_column_family(uuid);
+        replica::column_family& cf = db.find_column_family(uuid);
        return ((cf.get_stats().*f).hist.count/1000.0) * (cf.get_stats().*f).hist.mean;
    }, 0.0, std::plus<double>()).then([](double res) {
        return make_ready_future<json::json_return_type>((int64_t)res);
@@ -115,16 +102,16 @@ static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const


 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx,
-        utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
-    return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
+    return map_reduce_cf(ctx, int64_t(0), [f](const replica::column_family& cf) {
        return (cf.get_stats().*f).hist.count;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    utils::UUID uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([f, uuid](const database& p) {
+    return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
        return (p.find_column_family(uuid).get_stats().*f).hist;},
            utils::ihistogram(),
            std::plus<utils::ihistogram>())
@@ -133,8 +120,8 @@ static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const
    });
 }

-static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
-    std::function<utils::ihistogram(const database&)> fun = [f] (const database& db)  {
+static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
+    std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db)  {
        utils::ihistogram res;
        for (auto i : db.get_column_families()) {
            res += (i.second->get_stats().*f).hist;
@@ -149,9 +136,9 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils:
 }

 static future<json::json_return_type>  get_cf_rate_and_histogram(http_context& ctx, const sstring& name,
-        utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
+        utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
    utils::UUID uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([f, uuid](const database& p) {
+    return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
        return (p.find_column_family(uuid).get_stats().*f).rate();},
            utils::rate_moving_average_and_histogram(),
            std::plus<utils::rate_moving_average_and_histogram>())
@@ -160,8 +147,8 @@ static future<json::json_return_type>  get_cf_rate_and_histogram(http_context& c
    });
 }

-static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram column_family_stats::*f) {
-    std::function<utils::rate_moving_average_and_histogram(const database&)> fun = [f] (const database& db)  {
+static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
+    std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db)  {
        utils::rate_moving_average_and_histogram res;
        for (auto i : db.get_column_families()) {
            res += (i.second->get_stats().*f).rate();
@@ -176,12 +163,12 @@ static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ct
 }

 static future<json::json_return_type> get_cf_unleveled_sstables(http_context& ctx, const sstring& name) {
-    return map_reduce_cf(ctx, name, int64_t(0), [](const column_family& cf) {
+    return map_reduce_cf(ctx, name, int64_t(0), [](const replica::column_family& cf) {
        return cf.get_unleveled_sstables();
    }, std::plus<int64_t>());
 }

-static int64_t min_partition_size(column_family& cf) {
+static int64_t min_partition_size(replica::column_family& cf) {
    int64_t res = INT64_MAX;
    for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
        res = std::min(res, i->get_stats_metadata().estimated_partition_size.min());
@@ -189,7 +176,7 @@ static int64_t min_partition_size(column_family& cf) {
    return (res == INT64_MAX) ? 0 : res;
 }

-static int64_t max_partition_size(column_family& cf) {
+static int64_t max_partition_size(replica::column_family& cf) {
    int64_t res = 0;
    for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
        res = std::max(i->get_stats_metadata().estimated_partition_size.max(), res);
@@ -197,7 +184,7 @@ static int64_t max_partition_size(column_family& cf) {
    return res;
 }

-static integral_ratio_holder mean_partition_size(column_family& cf) {
+static integral_ratio_holder mean_partition_size(replica::column_family& cf) {
    integral_ratio_holder res;
    for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
        auto c = i->get_stats_metadata().estimated_partition_size.count();
@@ -223,7 +210,7 @@ static json::json_return_type sum_map(const std::unordered_map<sstring, uint64_t

 static future<json::json_return_type>  sum_sstable(http_context& ctx, const sstring name, bool total) {
    auto uuid = get_uuid(name, ctx.db.local());
-    return ctx.db.map_reduce0([uuid, total](database& db) {
+    return ctx.db.map_reduce0([uuid, total](replica::database& db) {
        std::unordered_map<sstring, uint64_t> m;
        auto sstables = (total) ? db.find_column_family(uuid).get_sstables_including_compacted_undeleted() :
                db.find_column_family(uuid).get_sstables();
@@ -239,7 +226,7 @@ static future<json::json_return_type>  sum_sstable(http_context& ctx, const sstr


 static future<json::json_return_type> sum_sstable(http_context& ctx, bool total) {
-    return map_reduce_cf_raw(ctx, std::unordered_map<sstring, uint64_t>(), [total](column_family& cf) {
+    return map_reduce_cf_raw(ctx, std::unordered_map<sstring, uint64_t>(), [total](replica::column_family& cf) {
        std::unordered_map<sstring, uint64_t> m;
        auto sstables = (total) ? cf.get_sstables_including_compacted_undeleted() :
                cf.get_sstables();
@@ -252,7 +239,7 @@ static future<json::json_return_type> sum_sstable(http_context& ctx, bool total)
    });
 }

-future<json::json_return_type> map_reduce_cf_time_histogram(http_context& ctx, const sstring& name, std::function<utils::time_estimated_histogram(const column_family&)> f) {
+future<json::json_return_type> map_reduce_cf_time_histogram(http_context& ctx, const sstring& name, std::function<utils::time_estimated_histogram(const replica::column_family&)> f) {
    return map_reduce_cf_raw(ctx, name, utils::time_estimated_histogram(), f, utils::time_estimated_histogram_merge).then([](const utils::time_estimated_histogram& res) {
        return make_ready_future<json::json_return_type>(time_to_json_histogram(res));
    });
@@ -275,7 +262,7 @@ public:
    }
 };

-static double get_compression_ratio(column_family& cf) {
+static double get_compression_ratio(replica::column_family& cf) {
    sum_ratio<double> result;
    for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
        auto compression_ratio = i->get_compression_ratio();
@@ -334,13 +321,13 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](replica::column_family& cf) {
            return cf.active_memtable().partition_count();
        }, std::plus<>());
    });

    cf::get_all_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t{0}, [](column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t{0}, [](replica::column_family& cf) {
            return cf.active_memtable().partition_count();
        }, std::plus<>());
    });
@@ -354,25 +341,25 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
            return cf.active_memtable().region().occupancy().total_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
            return cf.active_memtable().region().occupancy().total_space();
        }, std::plus<int64_t>());
    });

    cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
            return cf.active_memtable().region().occupancy().used_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
            return cf.active_memtable().region().occupancy().used_space();
        }, std::plus<int64_t>());
    });
@@ -387,14 +374,14 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
            return cf.occupancy().total_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return ctx.db.map_reduce0([](const database& db){
+        return ctx.db.map_reduce0([](const replica::database& db){
            return db.dirty_memory_region_group().memory_used();
        }, int64_t(0), std::plus<int64_t>()).then([](int res) {
            return make_ready_future<json::json_return_type>(res);
@@ -403,29 +390,29 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
            return cf.occupancy().used_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
            return cf.active_memtable().region().occupancy().used_space();
        }, std::plus<int64_t>());
    });

    cf::get_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx,req->param["name"] ,&column_family_stats::memtable_switch_count);
+        return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::memtable_switch_count);
    });

    cf::get_all_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family_stats::memtable_switch_count);
+        return get_cf_stats(ctx, &replica::column_family_stats::memtable_switch_count);
    });

    // FIXME: this refers to partitions, not rows.
    cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
            utils::estimated_histogram res(0);
            for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
                res.merge(i->get_stats_metadata().estimated_partition_size);
@@ -437,7 +424,7 @@ void set_column_family(http_context& ctx, routes& r) {

    // FIXME: this refers to partitions, not rows.
    cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
            uint64_t res = 0;
            for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
                res += i->get_stats_metadata().estimated_partition_size.count();
@@ -448,7 +435,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
            utils::estimated_histogram res(0);
            for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
                res.merge(i->get_stats_metadata().estimated_cells_count);
@@ -465,87 +452,87 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_pending_flushes.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx,req->param["name"] ,&column_family_stats::pending_flushes);
+        return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::pending_flushes);
    });

    cf::get_all_pending_flushes.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family_stats::pending_flushes);
+        return get_cf_stats(ctx, &replica::column_family_stats::pending_flushes);
    });

    cf::get_read.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats_count(ctx,req->param["name"] ,&column_family_stats::reads);
+        return get_cf_stats_count(ctx,req->param["name"] ,&replica::column_family_stats::reads);
    });

    cf::get_all_read.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats_count(ctx, &column_family_stats::reads);
+        return get_cf_stats_count(ctx, &replica::column_family_stats::reads);
    });

    cf::get_write.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats_count(ctx, req->param["name"] ,&column_family_stats::writes);
+        return get_cf_stats_count(ctx, req->param["name"] ,&replica::column_family_stats::writes);
    });

    cf::get_all_write.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats_count(ctx, &column_family_stats::writes);
+        return get_cf_stats_count(ctx, &replica::column_family_stats::writes);
    });

    cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &column_family_stats::reads);
+        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
    });

    cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_rate_and_histogram(ctx, req->param["name"], &column_family_stats::reads);
+        return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
    });

    cf::get_read_latency.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats_sum(ctx,req->param["name"] ,&column_family_stats::reads);
+        return get_cf_stats_sum(ctx,req->param["name"] ,&replica::column_family_stats::reads);
    });

    cf::get_write_latency.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats_sum(ctx, req->param["name"] ,&column_family_stats::writes);
+        return get_cf_stats_sum(ctx, req->param["name"] ,&replica::column_family_stats::writes);
    });

    cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, &column_family_stats::writes);
+        return get_cf_histogram(ctx, &replica::column_family_stats::writes);
    });

    cf::get_all_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_rate_and_histogram(ctx, &column_family_stats::writes);
+        return get_cf_rate_and_histogram(ctx, &replica::column_family_stats::writes);
    });

    cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &column_family_stats::writes);
+        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
    });

    cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_rate_and_histogram(ctx, req->param["name"], &column_family_stats::writes);
+        return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
    });

    cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, &column_family_stats::writes);
+        return get_cf_histogram(ctx, &replica::column_family_stats::writes);
    });

    cf::get_all_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_rate_and_histogram(ctx, &column_family_stats::writes);
+        return get_cf_rate_and_histogram(ctx, &replica::column_family_stats::writes);
    });

    cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
-            return cf.get_compaction_strategy().estimated_pending_compactions(cf);
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
+            return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
        }, std::plus<int64_t>());
    });

    cf::get_all_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
-            return cf.get_compaction_strategy().estimated_pending_compactions(cf);
+        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
+            return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
        }, std::plus<int64_t>());
    });

    cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, req->param["name"], &column_family_stats::live_sstable_count);
+        return get_cf_stats(ctx, req->param["name"], &replica::column_family_stats::live_sstable_count);
    });

    cf::get_all_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family_stats::live_sstable_count);
+        return get_cf_stats(ctx, &replica::column_family_stats::live_sstable_count);
    });

    cf::get_unleveled_sstables.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -601,7 +588,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_get_false_positive();
@@ -610,7 +597,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_get_false_positive();
@@ -619,7 +606,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_get_recent_false_positive();
@@ -628,7 +615,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_get_recent_false_positive();
@@ -637,31 +624,31 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
        }, std::plus<>());
    });

    cf::get_all_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) {
+        return map_reduce_cf(ctx, ratio_holder(), [] (replica::column_family& cf) {
            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
        }, std::plus<>());
    });

    cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
        }, std::plus<>());
    });

    cf::get_all_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, ratio_holder(), [] (column_family& cf) {
+        return map_reduce_cf(ctx, ratio_holder(), [] (replica::column_family& cf) {
            return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
        }, std::plus<>());
    });

    cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_size();
@@ -670,7 +657,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_size();
@@ -679,7 +666,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_memory_size();
@@ -688,7 +675,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->filter_memory_size();
@@ -697,7 +684,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->get_summary().memory_footprint();
@@ -706,7 +693,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
+        return map_reduce_cf(ctx, uint64_t(0), [] (replica::column_family& cf) {
            auto sstables = cf.get_sstables();
            return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
                return s + sst->get_summary().memory_footprint();
@@ -753,7 +740,7 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_true_snapshots_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        auto uuid = get_uuid(req->param["name"], ctx.db.local());
        return ctx.db.local().find_column_family(uuid).get_snapshot_details().then([](
-                const std::unordered_map<sstring, column_family::snapshot_details>& sd) {
+                const std::unordered_map<sstring, replica::column_family::snapshot_details>& sd) {
            int64_t res = 0;
            for (auto i : sd) {
                res += i.second.total;
@@ -782,7 +769,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -790,7 +777,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().hits.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -798,7 +785,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().misses.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -806,7 +793,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_all_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const replica::column_family& cf) {
            return cf.get_row_cache().stats().misses.rate();
        }, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
            return make_ready_future<json::json_return_type>(meter_to_json(m));
@@ -815,36 +802,36 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
            return cf.get_stats().estimated_cas_prepare;
        });
    });

    cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
            return cf.get_stats().estimated_cas_accept;
        });
    });

    cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
            return cf.get_stats().estimated_cas_learn;
        });
    });

    cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
            return cf.get_stats().estimated_sstable_per_read;
        },
        utils::estimated_histogram_merge, utils_json::estimated_histogram());
    });

    cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &column_family_stats::tombstone_scanned);
+        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::tombstone_scanned);
    });

    cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return get_cf_histogram(ctx, req->param["name"], &column_family_stats::live_scanned);
+        return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::live_scanned);
    });

    cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr<request> req) {
@@ -857,14 +844,14 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
        const utils::UUID& uuid = get_uuid(req.param["name"], ctx.db.local());
-        column_family& cf = ctx.db.local().find_column_family(uuid);
+        replica::column_family& cf = ctx.db.local().find_column_family(uuid);
        return !cf.is_auto_compaction_disabled_by_user();
    });

    cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (database& db) {
-            auto g = database::autocompaction_toggle_guard(db);
-            return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
+        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
+            auto g = replica::database::autocompaction_toggle_guard(db);
+            return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
                cf.enable_auto_compaction();
            }).then([g = std::move(g)] {
                return make_ready_future<json::json_return_type>(json_void());
@@ -873,10 +860,10 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
-        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (database& db) {
-            auto g = database::autocompaction_toggle_guard(db);
-            return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
-                cf.disable_auto_compaction();
+        return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
+            auto g = replica::database::autocompaction_toggle_guard(db);
+            return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
+                return cf.disable_auto_compaction();
            }).then([g = std::move(g)] {
                return make_ready_future<json::json_return_type>(json_void());
            });
@@ -896,7 +883,7 @@ void set_column_family(http_context& ctx, routes& r) {
            }
            std::vector<sstring> res;
            auto uuid = get_uuid(ks, cf_name, ctx.db.local());
-            column_family& cf = ctx.db.local().find_column_family(uuid);
+            replica::column_family& cf = ctx.db.local().find_column_family(uuid);
            res.reserve(cf.get_index_manager().list_indexes().size());
            for (auto&& i : cf.get_index_manager().list_indexes()) {
                if (!vp.contains(secondary_index::index_table_name(i.metadata().name()))) {
@@ -924,8 +911,8 @@ void set_column_family(http_context& ctx, routes& r) {
    cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr<request> req) {
        auto uuid = get_uuid(req->param["name"], ctx.db.local());

-        return ctx.db.map_reduce(sum_ratio<double>(), [uuid](database& db) {
-            column_family& cf = db.find_column_family(uuid);
+        return ctx.db.map_reduce(sum_ratio<double>(), [uuid](replica::database& db) {
+            replica::column_family& cf = db.find_column_family(uuid);
            return make_ready_future<double>(get_compression_ratio(cf));
        }).then([] (const double& result) {
            return make_ready_future<json::json_return_type>(result);
@@ -933,20 +920,20 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
            return cf.get_stats().estimated_read;
        });
    });

    cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const column_family& cf) {
+        return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
            return cf.get_stats().estimated_write;
        });
    });

    cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<request> req) {
        sstring strategy = req->get_query_param("class_name");
-        return foreach_column_family(ctx, req->param["name"], [strategy](column_family& cf) {
+        return foreach_column_family(ctx, req->param["name"], [strategy](replica::column_family& cf) {
            cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
        }).then([] {
                return make_ready_future<json::json_return_type>(json_void());
@@ -970,7 +957,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr<request> req) {
-        return map_reduce_cf_raw(ctx, req->param["name"], std::vector<uint64_t>(), [](const column_family& cf) {
+        return map_reduce_cf_raw(ctx, req->param["name"], std::vector<uint64_t>(), [](const replica::column_family& cf) {
            return cf.sstable_count_per_level();
        }, concat_sstable_count_per_level).then([](const std::vector<uint64_t>& res) {
            return make_ready_future<json::json_return_type>(res);
@@ -981,7 +968,7 @@ void set_column_family(http_context& ctx, routes& r) {
        auto key = req->get_query_param("key");
        auto uuid = get_uuid(req->param["name"], ctx.db.local());

-        return ctx.db.map_reduce0([key, uuid] (database& db) {
+        return ctx.db.map_reduce0([key, uuid] (replica::database& db) {
            return db.find_column_family(uuid).get_sstables_by_partition_key(key);
        }, std::unordered_set<sstring>(),
            [](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
@@ -1013,7 +1000,7 @@ void set_column_family(http_context& ctx, routes& r) {
        if (req->get_query_param("split_output") != "") {
            fail(unimplemented::cause::API);
        }
-        return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
+        return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
            return cf.compact_all_sstables();
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
--- a/api/column_family.hh
+++ b/api/column_family.hh
@@ -3,27 +3,14 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once

 #include "api.hh"
 #include "api/api-doc/column_family.json.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include <seastar/core/future-util.hh>
 #include <any>

@@ -31,17 +18,17 @@ namespace api {

 void set_column_family(http_context& ctx, routes& r);

-const utils::UUID& get_uuid(const sstring& name, const database& db);
-future<> foreach_column_family(http_context& ctx, const sstring& name, std::function<void(column_family&)> f);
+const utils::UUID& get_uuid(const sstring& name, const replica::database& db);
+future<> foreach_column_family(http_context& ctx, const sstring& name, std::function<void(replica::column_family&)> f);


 template<class Mapper, class I, class Reducer>
 future<I> map_reduce_cf_raw(http_context& ctx, const sstring& name, I init,
        Mapper mapper, Reducer reducer) {
    auto uuid = get_uuid(name, ctx.db.local());
-    using mapper_type = std::function<std::unique_ptr<std::any>(database&)>;
+    using mapper_type = std::function<std::unique_ptr<std::any>(replica::database&)>;
    using reducer_type = std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)>;
-    return ctx.db.map_reduce0(mapper_type([mapper, uuid](database& db) {
+    return ctx.db.map_reduce0(mapper_type([mapper, uuid](replica::database& db) {
        return std::make_unique<std::any>(I(mapper(db.find_column_family(uuid))));
    }), std::make_unique<std::any>(std::move(init)), reducer_type([reducer = std::move(reducer)] (std::unique_ptr<std::any> a, std::unique_ptr<std::any> b) mutable {
        return std::make_unique<std::any>(I(reducer(std::any_cast<I>(std::move(*a)), std::any_cast<I>(std::move(*b)))));
@@ -68,15 +55,15 @@ future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& n
    });
 }

-future<json::json_return_type> map_reduce_cf_time_histogram(http_context& ctx, const sstring& name, std::function<utils::time_estimated_histogram(const column_family&)> f);
+future<json::json_return_type> map_reduce_cf_time_histogram(http_context& ctx, const sstring& name, std::function<utils::time_estimated_histogram(const replica::column_family&)> f);

 struct map_reduce_column_families_locally {
    std::any init;
-    std::function<std::unique_ptr<std::any>(column_family&)> mapper;
+    std::function<std::unique_ptr<std::any>(replica::column_family&)> mapper;
    std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)> reducer;
-    future<std::unique_ptr<std::any>> operator()(database& db) const {
+    future<std::unique_ptr<std::any>> operator()(replica::database& db) const {
        auto res = seastar::make_lw_shared<std::unique_ptr<std::any>>(std::make_unique<std::any>(init));
-        return do_for_each(db.get_column_families(), [res, this](const std::pair<utils::UUID, seastar::lw_shared_ptr<table>>& i) {
+        return do_for_each(db.get_column_families(), [res, this](const std::pair<utils::UUID, seastar::lw_shared_ptr<replica::table>>& i) {
            *res = reducer(std::move(*res), mapper(*i.second.get()));
        }).then([res] {
            return std::move(*res);
@@ -87,9 +74,9 @@ struct map_reduce_column_families_locally {
 template<class Mapper, class I, class Reducer>
 future<I> map_reduce_cf_raw(http_context& ctx, I init,
        Mapper mapper, Reducer reducer) {
-    using mapper_type = std::function<std::unique_ptr<std::any>(column_family&)>;
+    using mapper_type = std::function<std::unique_ptr<std::any>(replica::column_family&)>;
    using reducer_type = std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)>;
-    auto wrapped_mapper = mapper_type([mapper = std::move(mapper)] (column_family& cf) mutable {
+    auto wrapped_mapper = mapper_type([mapper = std::move(mapper)] (replica::column_family& cf) mutable {
        return std::make_unique<std::any>(I(mapper(cf)));
    });
    auto wrapped_reducer = reducer_type([reducer = std::move(reducer)] (std::unique_ptr<std::any> a, std::unique_ptr<std::any> b) mutable {
@@ -111,10 +98,10 @@ future<json::json_return_type> map_reduce_cf(http_context& ctx, I init,
 }

 future<json::json_return_type>  get_cf_stats(http_context& ctx, const sstring& name,
-        int64_t column_family_stats::*f);
+        int64_t replica::column_family_stats::*f);

 future<json::json_return_type>  get_cf_stats(http_context& ctx,
-        int64_t column_family_stats::*f);
+        int64_t replica::column_family_stats::*f);


 std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name);
--- a/api/commitlog.cc
+++ b/api/commitlog.cc
@@ -3,26 +3,13 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "commitlog.hh"
 #include "db/commitlog/commitlog.hh"
 #include "api/api-doc/commitlog.json.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include <vector>

 namespace api {
@@ -31,7 +18,7 @@ template<typename T>
 static auto acquire_cl_metric(http_context& ctx, std::function<T (db::commitlog*)> func) {
    typedef T ret_type;

-    return ctx.db.map_reduce0([func = std::move(func)](database& db) {
+    return ctx.db.map_reduce0([func = std::move(func)](replica::database& db) {
        if (db.commitlog() == nullptr) {
            return make_ready_future<ret_type>();
        }
@@ -47,7 +34,7 @@ void set_commitlog(http_context& ctx, routes& r) {
        auto res = make_shared<std::vector<sstring>>();
        return ctx.db.map_reduce([res](std::vector<sstring> names) {
            res->insert(res->end(), names.begin(), names.end());
-        }, [](database& db) {
+        }, [](replica::database& db) {
            if (db.commitlog() == nullptr) {
                return make_ready_future<std::vector<sstring>>(std::vector<sstring>());
            }
--- a/api/commitlog.hh
+++ b/api/commitlog.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -3,28 +3,19 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+#include <seastar/core/coroutine.hh>
+
 #include "compaction_manager.hh"
 #include "compaction/compaction_manager.hh"
 #include "api/api-doc/compaction_manager.json.hh"
 #include "db/system_keyspace.hh"
 #include "column_family.hh"
 #include "unimplemented.hh"
+#include "storage_service.hh"
+
 #include <utility>

 namespace api {
@@ -34,7 +25,7 @@ using namespace json;

 static future<json::json_return_type> get_cm_stats(http_context& ctx,
        int64_t compaction_manager::stats::*f) {
-    return ctx.db.map_reduce0([f](database& db) {
+    return ctx.db.map_reduce0([f](replica::database& db) {
        return db.get_compaction_manager().get_stats().*f;
    }, int64_t(0), std::plus<int64_t>()).then([](const int64_t& res) {
        return make_ready_future<json::json_return_type>(res);
@@ -53,7 +44,7 @@ static std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_ha

 void set_compaction_manager(http_context& ctx, routes& r) {
    cm::get_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return ctx.db.map_reduce0([](database& db) {
+        return ctx.db.map_reduce0([](replica::database& db) {
            std::vector<cm::summary> summaries;
            const compaction_manager& cm = db.get_compaction_manager();

@@ -75,11 +66,11 @@ void set_compaction_manager(http_context& ctx, routes& r) {
    });

    cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return ctx.db.map_reduce0([&ctx](database& db) {
+        return ctx.db.map_reduce0([&ctx](replica::database& db) {
            return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&ctx, &db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
-                return do_for_each(db.get_column_families(), [&tasks](const std::pair<utils::UUID, seastar::lw_shared_ptr<table>>& i) {
-                    table& cf = *i.second.get();
-                    tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.get_compaction_strategy().estimated_pending_compactions(cf);
+                return do_for_each(db.get_column_families(), [&tasks](const std::pair<utils::UUID, seastar::lw_shared_ptr<replica::table>>& i) {
+                    replica::table& cf = *i.second.get();
+                    tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
                    return make_ready_future<>();
                }).then([&tasks] {
                    return std::move(tasks);
@@ -109,17 +100,34 @@ void set_compaction_manager(http_context& ctx, routes& r) {

    cm::stop_compaction.set(r, [&ctx] (std::unique_ptr<request> req) {
        auto type = req->get_query_param("type");
-        return ctx.db.invoke_on_all([type] (database& db) {
+        return ctx.db.invoke_on_all([type] (replica::database& db) {
            auto& cm = db.get_compaction_manager();
-            cm.stop_compaction(type);
+            return cm.stop_compaction(type);
        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

+    cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
+        auto ks_name = validate_keyspace(ctx, req->param);
+        auto table_names = parse_tables(ks_name, ctx, req->query_parameters, "tables");
+        if (table_names.empty()) {
+            table_names = map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
+        }
+        auto type = req->get_query_param("type");
+        co_await ctx.db.invoke_on_all([&ks_name, &table_names, type] (replica::database& db) {
+            auto& cm = db.get_compaction_manager();
+            return parallel_for_each(table_names, [&db, &cm, &ks_name, type] (sstring& table_name) {
+                auto& t = db.find_column_family(ks_name, table_name);
+                return cm.stop_compaction(type, &t);
+            });
+        });
+        co_return json_void();
+    });
+
    cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
-            return cf.get_compaction_strategy().estimated_pending_compactions(cf);
+        return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
+            return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
        }, std::plus<int64_t>());
    });

--- a/api/compaction_manager.hh
+++ b/api/compaction_manager.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/config.cc
+++ b/api/config.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "api/config.hh"
--- a/api/config.hh
+++ b/api/config.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/endpoint_snitch.cc
+++ b/api/endpoint_snitch.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "locator/snitch_base.hh"
--- a/api/endpoint_snitch.hh
+++ b/api/endpoint_snitch.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/error_injection.cc
+++ b/api/error_injection.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "api/api-doc/error_injection.json.hh"
--- a/api/error_injection.hh
+++ b/api/error_injection.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "failure_detector.hh"
--- a/api/failure_detector.hh
+++ b/api/failure_detector.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/gossiper.cc
+++ b/api/gossiper.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "gossiper.hh"
--- a/api/gossiper.hh
+++ b/api/gossiper.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/hinted_handoff.cc
+++ b/api/hinted_handoff.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include <algorithm>
--- a/api/hinted_handoff.hh
+++ b/api/hinted_handoff.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/lsa.cc
+++ b/api/lsa.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "api/api-doc/lsa.json.hh"
@@ -26,7 +13,7 @@
 #include <seastar/http/exception.hh>
 #include "utils/logalloc.hh"
 #include "log.hh"
-#include "database.hh"
+#include "replica/database.hh"

 namespace api {

@@ -35,7 +22,7 @@ static logging::logger alogger("lsa-api");
 void set_lsa(http_context& ctx, routes& r) {
    httpd::lsa_json::lsa_compact.set(r, [&ctx](std::unique_ptr<request> req) {
        alogger.info("Triggering compaction");
-        return ctx.db.invoke_on_all([] (database&) {
+        return ctx.db.invoke_on_all([] (replica::database&) {
            logalloc::shard_tracker().reclaim(std::numeric_limits<size_t>::max());
        }).then([] {
            return json::json_return_type(json::json_void());
--- a/api/lsa.hh
+++ b/api/lsa.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/messaging_service.cc
+++ b/api/messaging_service.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "messaging_service.hh"
--- a/api/messaging_service.hh
+++ b/api/messaging_service.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "storage_proxy.hh"
@@ -26,7 +13,7 @@
 #include "service/storage_service.hh"
 #include "db/config.hh"
 #include "utils/histogram.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include "seastar/core/scheduling_specific.hh"

 namespace api {
--- a/api/storage_proxy.hh
+++ b/api/storage_proxy.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "storage_service.hh"
@@ -39,14 +26,14 @@
 #include "db/system_keyspace.hh"
 #include "seastar/http/exception.hh"
 #include <seastar/core/coroutine.hh>
-#include "repair/repair.hh"
+#include "repair/row_level.hh"
 #include "locator/snitch_base.hh"
 #include "column_family.hh"
 #include "log.hh"
 #include "release.hh"
 #include "compaction/compaction_manager.hh"
 #include "sstables/sstables.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include "db/extensions.hh"
 #include "db/snapshot-ctl.hh"
 #include "transport/controller.hh"
@@ -69,11 +56,31 @@ const locator::token_metadata& http_context::get_token_metadata() {
 namespace ss = httpd::storage_service_json;
 using namespace json;

-static sstring validate_keyspace(http_context& ctx, const parameters& param) {
-    if (ctx.db.local().has_keyspace(param["keyspace"])) {
-        return param["keyspace"];
+sstring validate_keyspace(http_context& ctx, const parameters& param) {
+    const auto& ks_name = param["keyspace"];
+    if (ctx.db.local().has_keyspace(ks_name)) {
+        return ks_name;
    }
-    throw bad_param_exception("Keyspace " + param["keyspace"] + " Does not exist");
+    throw bad_param_exception(replica::no_such_keyspace(ks_name).what());
+}
+
+// splits a request parameter assumed to hold a comma-separated list of table names
+// verify that the tables are found, otherwise a bad_param_exception exception is thrown
+// containing the description of the respective no_such_column_family error.
+std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
+    auto it = query_params.find(param_name);
+    if (it == query_params.end()) {
+        return {};
+    }
+    std::vector<sstring> names = split(it->second, ",");
+    try {
+        for (const auto& table_name : names) {
+            ctx.db.local().find_column_family(ks_name, table_name);
+        }
+    } catch (const replica::no_such_column_family& e) {
+        throw bad_param_exception(e.what());
+    }
+    return names;
 }

 static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
@@ -99,7 +106,7 @@ using ks_cf_func = std::function<future<json::json_return_type>(http_context&, s
 static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
    return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = split_cf(req->get_query_param("cf"));
+        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
        if (column_families.empty()) {
            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
        }
@@ -145,15 +152,15 @@ future<json::json_return_type> set_tables_autocompaction(http_context& ctx, serv

    apilog.info("set_tables_autocompaction: enabled={} keyspace={} tables={}", enabled, keyspace, tables);
    return do_with(keyspace, std::move(tables), [&ctx, enabled] (const sstring &keyspace, const std::vector<sstring>& tables) {
-        return ctx.db.invoke_on(0, [&ctx, &keyspace, &tables, enabled] (database& db) {
-            auto g = database::autocompaction_toggle_guard(db);
-            return ctx.db.invoke_on_all([&keyspace, &tables, enabled] (database& db) {
+        return ctx.db.invoke_on(0, [&ctx, &keyspace, &tables, enabled] (replica::database& db) {
+            auto g = replica::database::autocompaction_toggle_guard(db);
+            return ctx.db.invoke_on_all([&keyspace, &tables, enabled] (replica::database& db) {
                return parallel_for_each(tables, [&db, &keyspace, enabled] (const sstring& table) {
-                    column_family& cf = db.find_column_family(keyspace, table);
+                    replica::column_family& cf = db.find_column_family(keyspace, table);
                    if (enabled) {
                        cf.enable_auto_compaction();
                    } else {
-                        cf.disable_auto_compaction();
+                        return cf.disable_auto_compaction();
                    }
                    return make_ready_future<>();
                });
@@ -166,19 +173,25 @@ future<json::json_return_type> set_tables_autocompaction(http_context& ctx, serv

 void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
    ss::start_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.start_server().then([] {
+        return smp::submit_to(0, [&] {
+            return ctl.start_server();
+        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::stop_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.stop_server().then([] {
+        return smp::submit_to(0, [&] {
+            return ctl.request_stop_server();
+        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::is_native_transport_running.set(r, [&ctl] (std::unique_ptr<request> req) {
-        return ctl.is_server_running().then([] (bool running) {
+        return smp::submit_to(0, [&] {
+            return !ctl.listen_addresses().empty();
+        }).then([] (bool running) {
            return make_ready_future<json::json_return_type>(running);
        });
    });
@@ -192,19 +205,25 @@ void unset_transport_controller(http_context& ctx, routes& r) {

 void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
    ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.stop_server().then([] {
+        return smp::submit_to(0, [&] {
+            return ctl.request_stop_server();
+        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::start_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
-        return ctl.start_server().then([] {
+        return smp::submit_to(0, [&] {
+            return ctl.start_server();
+        }).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::is_rpc_server_running.set(r, [&ctl] (std::unique_ptr<request> req) {
-        return ctl.is_server_running().then([] (bool running) {
+        return smp::submit_to(0, [&] {
+            return !ctl.listen_addresses().empty();
+        }).then([] (bool running) {
            return make_ready_future<json::json_return_type>(running);
        });
    });
@@ -239,14 +258,14 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
                });
    });

-    ss::get_active_repair_async.set(r, [&ctx](std::unique_ptr<request> req) {
-        return get_active_repairs(ctx.db).then([] (std::vector<int> res){
+    ss::get_active_repair_async.set(r, [&repair] (std::unique_ptr<request> req) {
+        return repair.local().get_active_repairs().then([] (std::vector<int> res) {
            return make_ready_future<json::json_return_type>(res);
        });
    });

-    ss::repair_async_status.set(r, [&ctx](std::unique_ptr<request> req) {
-        return repair_get_status(ctx.db, boost::lexical_cast<int>( req->get_query_param("id")))
+    ss::repair_async_status.set(r, [&repair] (std::unique_ptr<request> req) {
+        return repair.local().get_status(boost::lexical_cast<int>( req->get_query_param("id")))
                .then_wrapped([] (future<repair_status>&& fut) {
            ss::ns_repair_async_status::return_type_wrapper res;
            try {
@@ -258,7 +277,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
        });
    });

-    ss::repair_await_completion.set(r, [&ctx](std::unique_ptr<request> req) {
+    ss::repair_await_completion.set(r, [&repair] (std::unique_ptr<request> req) {
        int id;
        using clock = std::chrono::steady_clock;
        clock::time_point expire;
@@ -279,7 +298,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
        } catch (std::exception& e) {
            return make_exception_future<json::json_return_type>(httpd::bad_param_exception(e.what()));
        }
-        return repair_await_completion(ctx.db, id, expire)
+        return repair.local().await_completion(id, expire)
                .then_wrapped([] (future<repair_status>&& fut) {
            ss::ns_repair_async_status::return_type_wrapper res;
            try {
@@ -291,14 +310,14 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
        });
    });

-    ss::force_terminate_all_repair_sessions.set(r, [&ctx](std::unique_ptr<request> req) {
-        return repair_abort_all(ctx.db).then([] {
+    ss::force_terminate_all_repair_sessions.set(r, [&repair] (std::unique_ptr<request> req) {
+        return repair.local().abort_all().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

-    ss::force_terminate_all_repair_sessions_new.set(r, [&ctx](std::unique_ptr<request> req) {
-        return repair_abort_all(ctx.db).then([] {
+    ss::force_terminate_all_repair_sessions_new.set(r, [&repair] (std::unique_ptr<request> req) {
+        return repair.local().abort_all().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });
@@ -530,7 +549,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family_stats::live_disk_space_used);
+        return get_cf_stats(ctx, &replica::column_family_stats::live_disk_space_used);
    });

    ss::get_load_map.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -570,11 +589,12 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = split_cf(req->get_query_param("cf"));
+        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
        if (column_families.empty()) {
            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
        }
-        return ctx.db.invoke_on_all([keyspace, column_families] (database& db) -> future<> {
+        apilog.debug("force_keyspace_compaction: keyspace={} tables={}", keyspace, column_families);
+        return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
            auto table_ids = boost::copy_range<std::vector<utils::UUID>>(column_families | boost::adaptors::transformed([&] (auto& cf_name) {
                return db.find_uuid(keyspace, cf_name);
            }));
@@ -594,35 +614,55 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = split_cf(req->get_query_param("cf"));
+        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
        if (column_families.empty()) {
            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
        }
+        apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, column_families);
        return ss.local().is_cleanup_allowed(keyspace).then([&ctx, keyspace,
                column_families = std::move(column_families)] (bool is_cleanup_allowed) mutable {
            if (!is_cleanup_allowed) {
                return make_exception_future<json::json_return_type>(
                        std::runtime_error("Can not perform cleanup operation when topology changes"));
            }
-            return ctx.db.invoke_on_all([keyspace, column_families] (database& db) {
-                std::vector<column_family*> column_families_vec;
-                auto& cm = db.get_compaction_manager();
-                for (auto cf : column_families) {
-                    column_families_vec.push_back(&db.find_column_family(keyspace, cf));
-                }
-                return parallel_for_each(column_families_vec, [&cm, &db] (column_family* cf) {
-                    return cm.perform_cleanup(db, cf);
+            return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
+                auto table_ids = boost::copy_range<std::vector<utils::UUID>>(column_families | boost::adaptors::transformed([&] (auto& table_name) {
+                    return db.find_uuid(keyspace, table_name);
+                }));
+                // cleanup smaller tables first, to increase chances of success if low on space.
+                std::ranges::sort(table_ids, std::less<>(), [&] (const utils::UUID& id) {
+                    return db.find_column_family(id).get_stats().live_disk_space_used;
                });
+                auto& cm = db.get_compaction_manager();
+                // as a table can be dropped during loop below, let's find it before issuing the cleanup request.
+                for (auto& id : table_ids) {
+                    replica::table& t = db.find_column_family(id);
+                    co_await t.perform_cleanup_compaction(db);
+                }
+                co_return;
            }).then([]{
                return make_ready_future<json::json_return_type>(0);
            });
        });
    });

+    ss::perform_keyspace_offstrategy_compaction.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> tables) -> future<json::json_return_type> {
+        apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, tables);
+        co_return co_await ctx.db.map_reduce0([&keyspace, &tables] (replica::database& db) -> future<bool> {
+            bool needed = false;
+            for (const auto& table : tables) {
+                auto& t = db.find_column_family(keyspace, table);
+                needed |= co_await t.perform_offstrategy_compaction();
+            }
+            co_return needed;
+        }, false, std::plus<bool>());
+    }));
+
    ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
        bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);

-        return ctx.db.invoke_on_all([=] (database& db) {
+        apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, column_families, exclude_current_version);
+        return ctx.db.invoke_on_all([=] (replica::database& db) {
            return do_for_each(column_families, [=, &db](sstring cfname) {
                auto& cm = db.get_compaction_manager();
                auto& cf = db.find_column_family(keyspace, cfname);
@@ -633,23 +673,22 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        });
    }));

-    ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<request> req) {
+    ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<request> req) -> future<json::json_return_type> {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_families = split_cf(req->get_query_param("cf"));
+        auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
+        apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
+        auto &db = ctx.db.local();
        if (column_families.empty()) {
-            column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
+            co_await db.flush_on_all(keyspace);
+        } else {
+            co_await db.flush_on_all(keyspace, std::move(column_families));
        }
-        return ctx.db.invoke_on_all([keyspace, column_families] (database& db) {
-            return parallel_for_each(column_families, [&db, keyspace](const sstring& cf) mutable {
-                return db.find_column_family(keyspace, cf).flush();
-            });
-        }).then([]{
-                return make_ready_future<json::json_return_type>(json_void());
-        });
+        co_return json_void();
    });


    ss::decommission.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("decommission");
        return ss.local().decommission().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -665,6 +704,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::remove_node.set(r, [&ss](std::unique_ptr<request> req) {
        auto host_id = req->get_query_param("host_id");
        std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
+        apilog.info("remove_node: host_id={} ignore_nodes={}", host_id, ignore_nodes_strs);
        auto ignore_nodes = std::list<gms::inet_address>();
        for (std::string n : ignore_nodes_strs) {
            try {
@@ -727,9 +767,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        });
    });

-    ss::get_drain_progress.set(r, [&ss](std::unique_ptr<request> req) {
-        return ss.map_reduce(adder<service::storage_service::drain_progress>(), [] (auto& ss) {
-            return ss.get_drain_progress();
+    ss::get_drain_progress.set(r, [&ctx](std::unique_ptr<request> req) {
+        return ctx.db.map_reduce(adder<replica::database::drain_progress>(), [] (auto& db) {
+            return db.get_drain_progress();
        }).then([] (auto&& progress) {
            auto progress_str = format("Drained {}/{} ColumnFamilies", progress.remaining_cfs, progress.total_cfs);
            return make_ready_future<json::json_return_type>(std::move(progress_str));
@@ -737,6 +777,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::drain.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("drain");
        return ss.local().drain().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -769,12 +810,14 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::stop_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("stop_gossiping");
        return ss.local().stop_gossiping().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
    });

    ss::start_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
+        apilog.info("start_gossiping");
        return ss.local().start_gossiping().then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -835,7 +878,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::is_incremental_backups_enabled.set(r, [&ctx](std::unique_ptr<request> req) {
        // If this is issued in parallel with an ongoing change, we may see values not agreeing.
        // Reissuing is asking for trouble, so we will just return true upon seeing any true value.
-        return ctx.db.map_reduce(adder<bool>(), [] (database& db) {
+        return ctx.db.map_reduce(adder<bool>(), [] (replica::database& db) {
            for (auto& pair: db.get_keyspaces()) {
                auto& ks = pair.second;
                if (ks.incremental_backups_enabled()) {
@@ -851,7 +894,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    ss::set_incremental_backups_enabled.set(r, [&ctx](std::unique_ptr<request> req) {
        auto val_str = req->get_query_param("value");
        bool value = (val_str == "True") || (val_str == "true") || (val_str == "1");
-        return ctx.db.invoke_on_all([value] (database& db) {
+        return ctx.db.invoke_on_all([value] (replica::database& db) {
            db.set_enable_incremental_backups(value);

            // Change both KS and CF, so they are in sync
@@ -871,6 +914,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::rebuild.set(r, [&ss](std::unique_ptr<request> req) {
        auto source_dc = req->get_query_param("source_dc");
+        apilog.info("rebuild: source_dc={}", source_dc);
        return ss.local().rebuild(std::move(source_dc)).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -907,6 +951,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        // FIXME: We should truncate schema tables if more than one node in the cluster.
        auto& sp = service::get_storage_proxy();
        auto& fs = sp.local().features();
+        apilog.info("reset_local_schema");
        return db::schema_tables::recalculate_schema_version(sp, fs).then([] {
            return make_ready_future<json::json_return_type>(json_void());
        });
@@ -914,6 +959,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
        auto probability = req->get_query_param("probability");
+        apilog.info("set_trace_probability: probability={}", probability);
        return futurize_invoke([probability] {
            double real_prob = std::stod(probability.c_str());
            return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
@@ -951,6 +997,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
        auto ttl = req->get_query_param("ttl");
        auto threshold = req->get_query_param("threshold");
        auto fast = req->get_query_param("fast");
+        apilog.info("set_slow_query: enable={} ttl={} threshold={} fast={}", enable, ttl, threshold, fast);
        try {
            return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold, fast] (auto& local_tracing) {
                if (threshold != "") {
@@ -975,15 +1022,17 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_

    ss::enable_auto_compaction.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto tables = split_cf(req->get_query_param("cf"));
+        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

+        apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
        return set_tables_autocompaction(ctx, ss.local(), keyspace, tables, true);
    });

    ss::disable_auto_compaction.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
        auto keyspace = validate_keyspace(ctx, req->param);
-        auto tables = split_cf(req->get_query_param("cf"));
+        auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");

+        apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
        return set_tables_autocompaction(ctx, ss.local(), keyspace, tables, false);
    });

@@ -1049,7 +1098,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
    });

    ss::get_metrics_load.set(r, [&ctx](std::unique_ptr<request> req) {
-        return get_cf_stats(ctx, &column_family_stats::live_disk_space_used);
+        return get_cf_stats(ctx, &replica::column_family_stats::live_disk_space_used);
    });

    ss::get_exceptions.set(r, [&ss](const_req req) {
@@ -1111,7 +1160,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
                        }
                    }
                }
-            }, [ks, cf](const database& db) {
+            }, [ks, cf](const replica::database& db) {
                // see above
                table_sstables_list res;

@@ -1248,40 +1297,46 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        });
    });

-    ss::take_snapshot.set(r, [&snap_ctl](std::unique_ptr<request> req) {
-        apilog.debug("take_snapshot: {}", req->query_parameters);
+    ss::take_snapshot.set(r, [&snap_ctl](std::unique_ptr<request> req) -> future<json::json_return_type> {
+        apilog.info("take_snapshot: {}", req->query_parameters);
        auto tag = req->get_query_param("tag");
        auto column_families = split(req->get_query_param("cf"), ",");
        auto sfopt = req->get_query_param("sf");
        auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);

        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
-
-        auto resp = make_ready_future<>();
-        if (column_families.empty()) {
-            resp = snap_ctl.local().take_snapshot(tag, keynames, sf);
-        } else {
-            if (keynames.empty()) {
-                throw httpd::bad_param_exception("The keyspace of column families must be specified");
+        try {
+            if (column_families.empty()) {
+                co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
+            } else {
+                if (keynames.empty()) {
+                    throw httpd::bad_param_exception("The keyspace of column families must be specified");
+                }
+                if (keynames.size() > 1) {
+                    throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
+                }
+                co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
            }
-            if (keynames.size() > 1) {
-                throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
-            }
-            resp = snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
+            co_return json_void();
+        } catch (...) {
+            apilog.error("take_snapshot failed: {}", std::current_exception());
+            throw;
        }
-        return resp.then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
    });

-    ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<request> req) {
+    ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<request> req) -> future<json::json_return_type> {
+        apilog.info("del_snapshot: {}", req->query_parameters);
        auto tag = req->get_query_param("tag");
        auto column_family = req->get_query_param("cf");

        std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
-        return snap_ctl.local().clear_snapshot(tag, keynames, column_family).then([] {
-            return make_ready_future<json::json_return_type>(json_void());
-        });
+        try {
+            co_await snap_ctl.local().clear_snapshot(tag, keynames, column_family);
+            co_return json_void();
+        } catch (...) {
+            apilog.error("del_snapshot failed: {}", std::current_exception());
+            throw;
+        }
    });

    ss::true_snapshots_size.set(r, [&snap_ctl](std::unique_ptr<request> req) {
@@ -1318,16 +1373,29 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
        if (!req_param<bool>(*req, "disable_snapshot", false)) {
            auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
            f = parallel_for_each(column_families, [&snap_ctl, keyspace, tag](sstring cf) {
-                return snap_ctl.local().take_column_family_snapshot(keyspace, cf, tag);
+                return snap_ctl.local().take_column_family_snapshot(keyspace, cf, tag, db::snapshot_ctl::skip_flush::no, db::snapshot_ctl::allow_view_snapshots::yes);
            });
        }

-        return f.then([&ctx, keyspace, column_families, scrub_mode] {
-            return ctx.db.invoke_on_all([=] (database& db) {
+        sstables::compaction_type_options::scrub opts = {
+            .operation_mode = scrub_mode,
+        };
+        const sstring quarantine_mode_str = req_param<sstring>(*req, "quarantine_mode", "INCLUDE");
+        if (quarantine_mode_str == "INCLUDE") {
+            opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::include;
+        } else if (quarantine_mode_str == "EXCLUDE") {
+            opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::exclude;
+        } else if (quarantine_mode_str == "ONLY") {
+            opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::only;
+        } else {
+            throw std::invalid_argument(fmt::format("Unknown argument for 'quarantine_mode' parameter: {}", quarantine_mode_str));
+        }
+        return f.then([&ctx, keyspace, column_families, opts] {
+            return ctx.db.invoke_on_all([=] (replica::database& db) {
                return do_for_each(column_families, [=, &db](sstring cfname) {
                    auto& cm = db.get_compaction_manager();
                    auto& cf = db.find_column_family(keyspace, cfname);
-                    return cm.perform_sstable_scrub(&cf, scrub_mode);
+                    return cm.perform_sstable_scrub(&cf, opts);
                });
            });
        }).then([]{
--- a/api/storage_service.hh
+++ b/api/storage_service.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -46,6 +33,15 @@ class gossiper;

 namespace api {

+// verify that the keyspace parameter is found, otherwise a bad_param_exception exception is thrown
+// containing the description of the respective keyspace error.
+sstring validate_keyspace(http_context& ctx, const parameters& param);
+
+// splits a request parameter assumed to hold a comma-separated list of table names
+// verify that the tables are found, otherwise a bad_param_exception exception is thrown
+// containing the description of the respective no_such_column_family error.
+std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
+
 void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs);
 void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader);
 void unset_sstables_loader(http_context& ctx, routes& r);
--- a/api/stream_manager.cc
+++ b/api/stream_manager.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "stream_manager.hh"
@@ -87,13 +74,13 @@ static hs::stream_state get_state(
    return state;
 }

-void set_stream_manager(http_context& ctx, routes& r) {
+void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_manager>& sm) {
    hs::get_current_streams.set(r,
-            [] (std::unique_ptr<request> req) {
-                return streaming::get_stream_manager().invoke_on_all([] (auto& sm) {
+            [&sm] (std::unique_ptr<request> req) {
+                return sm.invoke_on_all([] (auto& sm) {
                    return sm.update_all_progress_info();
-                }).then([] {
-                    return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
+                }).then([&sm] {
+                    return sm.map_reduce0([](streaming::stream_manager& stream) {
                        std::vector<hs::stream_state> res;
                        for (auto i : stream.get_initiated_streams()) {
                            res.push_back(get_state(*i.second.get()));
@@ -109,17 +96,17 @@ void set_stream_manager(http_context& ctx, routes& r) {
                });
            });

-    hs::get_all_active_streams_outbound.set(r, [](std::unique_ptr<request> req) {
-        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& stream) {
+    hs::get_all_active_streams_outbound.set(r, [&sm](std::unique_ptr<request> req) {
+        return sm.map_reduce0([](streaming::stream_manager& stream) {
            return stream.get_initiated_streams().size();
        }, 0, std::plus<int64_t>()).then([](int64_t res) {
            return make_ready_future<json::json_return_type>(res);
        });
    });

-    hs::get_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
+    hs::get_total_incoming_bytes.set(r, [&sm](std::unique_ptr<request> req) {
        gms::inet_address peer(req->param["peer"]);
-        return streaming::get_stream_manager().map_reduce0([peer](streaming::stream_manager& sm) {
+        return sm.map_reduce0([peer](streaming::stream_manager& sm) {
            return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
                return sbytes.bytes_received;
            });
@@ -128,8 +115,8 @@ void set_stream_manager(http_context& ctx, routes& r) {
        });
    });

-    hs::get_all_total_incoming_bytes.set(r, [](std::unique_ptr<request> req) {
-        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
+    hs::get_all_total_incoming_bytes.set(r, [&sm](std::unique_ptr<request> req) {
+        return sm.map_reduce0([](streaming::stream_manager& sm) {
            return sm.get_progress_on_all_shards().then([] (auto sbytes) {
                return sbytes.bytes_received;
            });
@@ -138,9 +125,9 @@ void set_stream_manager(http_context& ctx, routes& r) {
        });
    });

-    hs::get_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
+    hs::get_total_outgoing_bytes.set(r, [&sm](std::unique_ptr<request> req) {
        gms::inet_address peer(req->param["peer"]);
-        return streaming::get_stream_manager().map_reduce0([peer] (streaming::stream_manager& sm) {
+        return sm.map_reduce0([peer] (streaming::stream_manager& sm) {
            return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
                return sbytes.bytes_sent;
            });
@@ -149,8 +136,8 @@ void set_stream_manager(http_context& ctx, routes& r) {
        });
    });

-    hs::get_all_total_outgoing_bytes.set(r, [](std::unique_ptr<request> req) {
-        return streaming::get_stream_manager().map_reduce0([](streaming::stream_manager& sm) {
+    hs::get_all_total_outgoing_bytes.set(r, [&sm](std::unique_ptr<request> req) {
+        return sm.map_reduce0([](streaming::stream_manager& sm) {
            return sm.get_progress_on_all_shards().then([] (auto sbytes) {
                return sbytes.bytes_sent;
            });
@@ -160,4 +147,13 @@ void set_stream_manager(http_context& ctx, routes& r) {
    });
 }

+void unset_stream_manager(http_context& ctx, routes& r) {
+    hs::get_current_streams.unset(r);
+    hs::get_all_active_streams_outbound.unset(r);
+    hs::get_total_incoming_bytes.unset(r);
+    hs::get_all_total_incoming_bytes.unset(r);
+    hs::get_total_outgoing_bytes.unset(r);
+    hs::get_all_total_outgoing_bytes.unset(r);
+}
+
 }
--- a/api/stream_manager.hh
+++ b/api/stream_manager.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -25,6 +12,7 @@

 namespace api {

-void set_stream_manager(http_context& ctx, routes& r);
+void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_manager>& sm);
+void unset_stream_manager(http_context& ctx, routes& r);

 }
--- a/api/system.cc
+++ b/api/system.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "api/api-doc/system.json.hh"
@@ -25,7 +12,7 @@
 #include <seastar/core/reactor.hh>
 #include <seastar/http/exception.hh>
 #include "log.hh"
-#include "database.hh"
+#include "replica/database.hh"

 extern logging::logger apilog;

@@ -76,7 +63,7 @@ void set_system(http_context& ctx, routes& r) {

    hs::drop_sstable_caches.set(r, [&ctx](std::unique_ptr<request> req) {
        apilog.info("Dropping sstable caches");
-        return ctx.db.invoke_on_all([] (database& db) {
+        return ctx.db.invoke_on_all([] (replica::database& db) {
            return db.drop_caches();
        }).then([] {
            apilog.info("Caches dropped");
--- a/api/system.hh
+++ b/api/system.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/atomic_cell.cc
+++ b/atomic_cell.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "atomic_cell.hh"
@@ -79,6 +66,49 @@ atomic_cell::atomic_cell(const abstract_type& type, atomic_cell_view other)
    set_view(_data);
 }

+// Based on:
+//  - org.apache.cassandra.db.AbstractCell#reconcile()
+//  - org.apache.cassandra.db.BufferExpiringCell#reconcile()
+//  - org.apache.cassandra.db.BufferDeletedCell#reconcile()
+std::strong_ordering
+compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) {
+    if (left.timestamp() != right.timestamp()) {
+        return left.timestamp() <=> right.timestamp();
+    }
+    if (left.is_live() != right.is_live()) {
+        return left.is_live() ? std::strong_ordering::less : std::strong_ordering::greater;
+    }
+    if (left.is_live()) {
+        auto c = compare_unsigned(left.value(), right.value()) <=> 0;
+        if (c != 0) {
+            return c;
+        }
+        if (left.is_live_and_has_ttl() != right.is_live_and_has_ttl()) {
+            // prefer expiring cells.
+            return left.is_live_and_has_ttl() ? std::strong_ordering::greater : std::strong_ordering::less;
+        }
+        if (left.is_live_and_has_ttl()) {
+            if (left.expiry() != right.expiry()) {
+                return left.expiry() <=> right.expiry();
+            } else {
+                // prefer the cell that was written later,
+                // so it survives longer after it expires, until purged.
+                return right.ttl() <=> left.ttl();
+            }
+        }
+    } else {
+        // Both are deleted
+
+        // Origin compares big-endian serialized deletion time. That's because it
+        // delegates to AbstractCell.reconcile() which compares values after
+        // comparing timestamps, which in case of deleted cells will hold
+        // serialized expiry.
+        return (uint64_t) left.deletion_time().time_since_epoch().count()
+                <=> (uint64_t) right.deletion_time().time_since_epoch().count();
+    }
+    return std::strong_ordering::equal;
+}
+
 atomic_cell_or_collection atomic_cell_or_collection::copy(const abstract_type& type) const {
    if (_data.empty()) {
        return atomic_cell_or_collection();
--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/atomic_cell_hash.hh
+++ b/atomic_cell_hash.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/atomic_cell_or_collection.hh
+++ b/atomic_cell_or_collection.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/auth/allow_all_authenticator.cc
+++ b/auth/allow_all_authenticator.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "auth/allow_all_authenticator.hh"
--- a/auth/allow_all_authenticator.hh
+++ b/auth/allow_all_authenticator.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/auth/allow_all_authorizer.cc
+++ b/auth/allow_all_authorizer.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "auth/allow_all_authorizer.hh"
--- a/auth/allow_all_authorizer.hh
+++ b/auth/allow_all_authorizer.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/auth/authenticated_user.cc
+++ b/auth/authenticated_user.cc
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #include "auth/authenticated_user.hh"
--- a/auth/authenticated_user.hh
+++ b/auth/authenticated_user.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/authentication_options.cc
+++ b/auth/authentication_options.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "auth/authentication_options.hh"
--- a/auth/authentication_options.hh
+++ b/auth/authentication_options.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/auth/authenticator.cc
+++ b/auth/authenticator.cc
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #include "auth/authenticator.hh"
--- a/auth/authenticator.hh
+++ b/auth/authenticator.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/authorizer.hh
+++ b/auth/authorizer.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/common.cc
+++ b/auth/common.cc
@@ -3,29 +3,17 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+#include <seastar/core/coroutine.hh>
 #include "auth/common.hh"

 #include <seastar/core/shared_ptr.hh>

 #include "cql3/query_processor.hh"
 #include "cql3/statements/create_table_statement.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include "schema_builder.hh"
 #include "service/migration_manager.hh"
 #include "timeout_config.hh"
@@ -34,9 +22,9 @@ namespace auth {

 namespace meta {

-constexpr std::string_view AUTH_KS("system_auth");
-constexpr std::string_view USERS_CF("users");
-constexpr std::string_view AUTH_PACKAGE_NAME("org.apache.cassandra.auth.");
+constinit const std::string_view AUTH_KS("system_auth");
+constinit const std::string_view USERS_CF("users");
+constinit const std::string_view AUTH_PACKAGE_NAME("org.apache.cassandra.auth.");

 }

@@ -61,10 +49,9 @@ static future<> create_metadata_table_if_missing_impl(
        cql3::query_processor& qp,
        std::string_view cql,
        ::service::migration_manager& mm) {
-    static auto ignore_existing = [] (seastar::noncopyable_function<future<>()> func) {
-        return futurize_invoke(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
-    };
-    auto& db = qp.db();
+    assert(this_shard_id() == 0); // once_among_shards makes sure a function is executed on shard 0 only
+
+    auto db = qp.db();
    auto parsed_statement = cql3::query_processor::parse_statement(cql);
    auto& parsed_cf_statement = static_cast<cql3::statements::raw::cf_statement&>(*parsed_statement);

@@ -79,9 +66,14 @@ static future<> create_metadata_table_if_missing_impl(
    schema_builder b(schema);
    b.set_uuid(uuid);
    schema_ptr table = b.build();
-    return ignore_existing([&mm, table = std::move(table)] () {
-        return mm.announce_new_column_family(table);
-    });
+
+    if (!db.has_schema(table->ks_name(), table->cf_name())) {
+        auto group0_guard = co_await mm.start_group0_operation();
+        auto ts = group0_guard.write_timestamp();
+        try {
+            co_return co_await mm.announce(co_await mm.prepare_new_column_family_announcement(table, ts), std::move(group0_guard));
+        } catch (exceptions::already_exists_exception&) {}
+    }
 }

 future<> create_metadata_table_if_missing(
@@ -92,12 +84,12 @@ future<> create_metadata_table_if_missing(
    return futurize_invoke(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
 }

-future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db, seastar::abort_source& as) {
+future<> wait_for_schema_agreement(::service::migration_manager& mm, const replica::database& db, seastar::abort_source& as) {
    static const auto pause = [] { return sleep(std::chrono::milliseconds(500)); };

    return do_until([&db, &as] {
        as.check();
-        return db.get_version() != database::empty_version;
+        return db.get_version() != replica::database::empty_version;
    }, pause).then([&mm, &as] {
        return do_until([&mm, &as] {
            as.check();
--- a/auth/common.hh
+++ b/auth/common.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -39,7 +26,10 @@

 using namespace std::chrono_literals;

+namespace replica {
 class database;
+}
+
 class timeout_config;

 namespace service {
@@ -55,9 +45,9 @@ namespace auth {
 namespace meta {

 constexpr std::string_view DEFAULT_SUPERUSER_NAME("cassandra");
-extern const std::string_view AUTH_KS;
-extern const std::string_view USERS_CF;
-extern const std::string_view AUTH_PACKAGE_NAME;
+extern constinit const std::string_view AUTH_KS;
+extern constinit const std::string_view USERS_CF;
+extern constinit const std::string_view AUTH_PACKAGE_NAME;

 }

@@ -79,7 +69,7 @@ future<> create_metadata_table_if_missing(
        std::string_view cql,
        ::service::migration_manager&) noexcept;

-future<> wait_for_schema_agreement(::service::migration_manager&, const database&, seastar::abort_source&);
+future<> wait_for_schema_agreement(::service::migration_manager&, const replica::database&, seastar::abort_source&);

 ///
 /// Time-outs for internal, non-local CQL queries.
--- a/auth/default_authorizer.cc
+++ b/auth/default_authorizer.cc
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #include "auth/default_authorizer.hh"
@@ -61,7 +33,7 @@ extern "C" {
 #include "cql3/untyped_result_set.hh"
 #include "exceptions/exceptions.hh"
 #include "log.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include "utils/class_registrator.hh"

 namespace auth {
@@ -159,7 +131,7 @@ future<> default_authorizer::start() {
                _migration_manager).then([this] {
            _finished = do_after_system_ready(_as, [this] {
                return async([this] {
-                    wait_for_schema_agreement(_migration_manager, _qp.db(), _as).get0();
+                    wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();

                    if (legacy_metadata_exists()) {
                        if (!any_granted().get0()) {
--- a/auth/default_authorizer.hh
+++ b/auth/default_authorizer.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/password_authenticator.cc
+++ b/auth/password_authenticator.cc
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #include "auth/password_authenticator.hh"
@@ -58,7 +30,7 @@
 #include "log.hh"
 #include "service/migration_manager.hh"
 #include "utils/class_registrator.hh"
-#include "database.hh"
+#include "replica/database.hh"
 #include "cql3/query_processor.hh"

 namespace auth {
@@ -160,7 +132,7 @@ future<> password_authenticator::start() {

         _stopped = do_after_system_ready(_as, [this] {
             return async([this] {
-                 wait_for_schema_agreement(_migration_manager, _qp.db(), _as).get0();
+                 wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();

                 if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash).get0()) {
                     if (legacy_metadata_exists()) {
--- a/auth/password_authenticator.hh
+++ b/auth/password_authenticator.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/passwords.cc
+++ b/auth/passwords.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "auth/passwords.hh"
--- a/auth/passwords.hh
+++ b/auth/passwords.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/auth/permission.cc
+++ b/auth/permission.cc
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #include "auth/permission.hh"
--- a/auth/permission.hh
+++ b/auth/permission.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/permissions_cache.cc
+++ b/auth/permissions_cache.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "auth/permissions_cache.hh"
--- a/auth/permissions_cache.hh
+++ b/auth/permissions_cache.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
@@ -67,6 +54,7 @@ class permissions_cache final {
    using cache_type = utils::loading_cache<
            std::pair<role_or_anonymous, resource>,
            permission_set,
+            1,
            utils::loading_cache_reload_enabled::yes,
            utils::simple_entry_size<permission_set>,
            utils::tuple_hash>;
--- a/auth/resource.cc
+++ b/auth/resource.cc
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #include "auth/resource.hh"
--- a/auth/resource.hh
+++ b/auth/resource.hh
@@ -1,19 +1,4 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
 */

 /*
@@ -23,20 +8,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
 */

 #pragma once
--- a/auth/role_manager.hh
+++ b/auth/role_manager.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/auth/role_or_anonymous.cc
+++ b/auth/role_or_anonymous.cc
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #include "auth/role_or_anonymous.hh"
--- a/auth/role_or_anonymous.hh
+++ b/auth/role_or_anonymous.hh
@@ -3,20 +3,7 @@
 */

 /*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: AGPL-3.0-or-later
 */

 #pragma once
--- a/Show More
+++ b/Show More