diff --git a/db/system_distributed_keyspace.cc b/db/system_distributed_keyspace.cc index de23c355f6..809213f287 100644 --- a/db/system_distributed_keyspace.cc +++ b/db/system_distributed_keyspace.cc @@ -96,6 +96,20 @@ schema_ptr cdc_timestamps() { static const sstring CDC_TIMESTAMPS_KEY = "timestamps"; +schema_ptr service_levels() { + static thread_local auto schema = [] { + auto id = generate_legacy_id(system_distributed_keyspace::NAME, system_distributed_keyspace::SERVICE_LEVELS); + return schema_builder(system_distributed_keyspace::NAME, system_distributed_keyspace::SERVICE_LEVELS, std::make_optional(id)) + .with_column("service_level", utf8_type, column_kind::partition_key) + .with_column("timeout", duration_type) + .with_column("workload_type", utf8_type) + .with_column("shares", int32_type) + .with_hash_version() + .build(); + }(); + return schema; +} + schema_ptr snapshot_sstables() { static thread_local auto schema = [] { auto id = generate_legacy_id(system_distributed_keyspace::NAME, system_distributed_keyspace::SNAPSHOT_SSTABLES); @@ -143,12 +157,13 @@ static std::vector ensured_tables() { view_build_status(), cdc_desc(), cdc_timestamps(), + service_levels(), snapshot_sstables(), }; } std::vector system_distributed_keyspace::all_distributed_tables() { - return {view_build_status(), cdc_desc(), cdc_timestamps(), snapshot_sstables()}; + return {view_build_status(), cdc_desc(), cdc_timestamps(), service_levels(), snapshot_sstables()}; } system_distributed_keyspace::system_distributed_keyspace(cql3::query_processor& qp, service::migration_manager& mm, service::storage_proxy& sp) diff --git a/db/system_distributed_keyspace.hh b/db/system_distributed_keyspace.hh index 5b30d5ec9a..fa697d9feb 100644 --- a/db/system_distributed_keyspace.hh +++ b/db/system_distributed_keyspace.hh @@ -55,6 +55,7 @@ public: static constexpr auto NAME = "system_distributed"; static constexpr auto VIEW_BUILD_STATUS = "view_build_status"; + static constexpr auto SERVICE_LEVELS = "service_levels"; /* This table is used by CDC clients to learn about available CDC streams. */ static constexpr auto CDC_DESC_V2 = "cdc_streams_descriptions_v2"; diff --git a/main.cc b/main.cc index 7b5124a04d..4555b3db46 100644 --- a/main.cc +++ b/main.cc @@ -68,6 +68,7 @@ #include "vector_search/vector_store_client.hh" #include #include +#include #include #include #include @@ -223,6 +224,33 @@ read_config(bpo::variables_map& opts, db::config& cfg) { } } +static void +self_heal_service_levels_version(db::system_keyspace& sys_ks, cql3::query_processor& qp, service::raft_group0_client& group0_client, abort_source& as) { + static constexpr unsigned max_attempts = 10; + for (unsigned attempt = 1; attempt <= max_attempts; ++attempt) { + try { + auto guard = group0_client.start_operation(as).get(); + auto service_levels_version = sys_ks.get_service_levels_version().get(); + service::release_guard(std::move(guard)); + if (service_levels_version && *service_levels_version == 2) { + startlog.info("Service levels version marker was already self-healed to v2."); + return; + } + + auto nodes_count = qp.db().real_database().get_token_metadata().get_normal_token_owners().size(); + qos::service_level_controller::migrate_to_v2(nodes_count, sys_ks, qp, group0_client, as).get(); + group0_client.send_group0_read_barrier_to_live_members().get(); + startlog.info("Self-healed service levels version marker to v2."); + return; + } catch (...) { + if (attempt == max_attempts) { + std::throw_with_nested(std::runtime_error(format("Failed to self-heal service levels version marker after {} attempts", max_attempts))); + } + startlog.info("Concurrent group0 operation while self-healing service levels version marker, retrying ({}/{}).", attempt, max_attempts); + } + } +} + #ifdef SCYLLA_ENABLE_ERROR_INJECTION static future<> enable_initial_error_injections(const db::config& cfg) { @@ -1525,6 +1553,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl sys_ks.local().build_bootstrap_info().get(); + bool should_self_heal_service_levels_version = false; if (sys_ks.local().bootstrap_complete()) { // Check as early as possible if the cluster is fully upgraded to use Raft, since if it's not, then this node cannot be started with the current version. if (sys_ks.local().load_group0_upgrade_state().get() != "use_post_raft_procedures") { @@ -1532,7 +1561,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl " a node of a cluster that is not using Raft yet. This is no longer supported. Please first complete the upgrade of the cluster to use Raft"); } - if (sys_ks.local().load_topology_upgrade_state().get() != "done") { + const bool raft_topology_done = sys_ks.local().load_topology_upgrade_state().get() == "done"; + if (!raft_topology_done) { throw std::runtime_error( "Cannot start - cluster is not yet upgraded to use raft topology and this version does not support legacy topology operations. " "If you are trying to upgrade the node then first upgrade the cluster to use raft topology."); @@ -1543,10 +1573,13 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl "Cannot start - cluster is not yet upgraded to use auth v2 and this version does not support legacy auth. " "If you are trying to upgrade the node then first upgrade the cluster to use auth v2."); } - if (sys_ks.local().get_service_levels_version().get() != 2) { - throw std::runtime_error( - "Cannot start - cluster is not yet upgraded to use service levels v2 and this version does not support legacy service levels. " - "If you are trying to upgrade the node then first upgrade the cluster to use service levels v2."); + auto service_levels_version = sys_ks.local().get_service_levels_version().get(); + if (raft_topology_done && (!service_levels_version || *service_levels_version != 2)) { + should_self_heal_service_levels_version = true; + startlog.warn( + "Cluster is using raft topology but service levels are still marked as version {}. " + "Startup will continue and the service levels version marker will be self-healed after group0 starts.", + service_levels_version ? format("{}", *service_levels_version) : "unset"); } if (sys_ks.local().get_view_builder_version().get() != db::system_keyspace::view_builder_version_t::v2) { throw std::runtime_error( @@ -2368,6 +2401,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl }).get(); stop_signal.ready(false); + if (should_self_heal_service_levels_version) { + checkpoint(stop_signal, "self-healing service levels version"); + self_heal_service_levels_version(sys_ks.local(), qp.local(), group0_client, stop_signal.as_local_abort_source()); + } + // At this point, `locator::topology` should be stable, i.e. we should have complete information // about the layout of the cluster (= list of nodes along with the racks/DCs). startlog.info("Verifying that all of the keyspaces are RF-rack-valid"); diff --git a/service/qos/service_level_controller.cc b/service/qos/service_level_controller.cc index c79364ced8..c4731c51ad 100644 --- a/service/qos/service_level_controller.cc +++ b/service/qos/service_level_controller.cc @@ -26,6 +26,7 @@ #include #include "service/qos/raft_service_level_distributed_data_accessor.hh" #include "service_level_controller.hh" +#include "db/system_distributed_keyspace.hh" #include "cql3/query_processor.hh" #include "service/storage_service.hh" #include "service/topology_state_machine.hh"