code: Add maintenance/maintenance group

And move some activities from streaming group into it, namely

- tablet_allocator background group
- sstables_manager-s components reclaimer
- tablet storage group manager merge completion fiber
- prometheus

All other activity that was in streaming group remains there, but can be
moved to this group (or to new maintenance subgroup) later.

All but prometheus are patched here, prometheus still uses the
maintenance_sched_group variable in main.cc, so it transparently
moves into new group

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
This commit is contained in:
Pavel Emelyanov
2026-02-04 18:25:46 +03:00
parent de9bfe0f1d
commit cb329b10bf
6 changed files with 16 additions and 9 deletions

View File

@@ -96,7 +96,7 @@ else()
set(Seastar_EXCLUDE_APPS_FROM_ALL ON CACHE BOOL "" FORCE)
set(Seastar_EXCLUDE_TESTS_FROM_ALL ON CACHE BOOL "" FORCE)
set(Seastar_IO_URING ON CACHE BOOL "" FORCE)
set(Seastar_SCHEDULING_GROUPS_COUNT 23 CACHE STRING "" FORCE)
set(Seastar_SCHEDULING_GROUPS_COUNT 24 CACHE STRING "" FORCE)
set(Seastar_UNUSED_RESULT_ERROR ON CACHE BOOL "" FORCE)
add_subdirectory(seastar)
target_compile_definitions (seastar

View File

@@ -2148,7 +2148,7 @@ def configure_seastar(build_dir, mode, mode_config, compiler_cache=None):
'-DSeastar_DEPRECATED_OSTREAM_FORMATTERS=OFF',
'-DSeastar_UNUSED_RESULT_ERROR=ON',
'-DCMAKE_EXPORT_COMPILE_COMMANDS=ON',
'-DSeastar_SCHEDULING_GROUPS_COUNT=23',
'-DSeastar_SCHEDULING_GROUPS_COUNT=24',
'-DSeastar_IO_URING=ON',
]

View File

@@ -945,8 +945,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
auto maintenance_supergroup = create_scheduling_supergroup(200).get();
auto bandwidth_updater = io_throughput_updater("maintenance supergroup", maintenance_supergroup,
cfg->maintenance_io_throughput_mb_per_sec.is_set() ? cfg->maintenance_io_throughput_mb_per_sec : cfg->stream_io_throughput_mb_per_sec);
auto maintenance_scheduling_group = create_scheduling_group("streaming", "strm", 200, maintenance_supergroup).get();
debug::streaming_scheduling_group = maintenance_scheduling_group;
auto maintenance_scheduling_group = create_scheduling_group("maintenance", "mant", 200, maintenance_supergroup).get();
smp::invoke_on_all([&cfg, background_reclaim_scheduling_group] {
logalloc::tracker::config st_cfg;
@@ -1186,7 +1185,9 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
dbcfg.compaction_scheduling_group = create_scheduling_group("compaction", "comp", 1000).get();
dbcfg.maintenance_compaction_scheduling_group = create_scheduling_group("maintenance_compaction", "manc", 200, maintenance_supergroup).get();
dbcfg.memory_compaction_scheduling_group = create_scheduling_group("mem_compaction", "mcmp", 1000).get();
dbcfg.streaming_scheduling_group = maintenance_scheduling_group;
dbcfg.streaming_scheduling_group = create_scheduling_group("streaming", "strm", 200, maintenance_supergroup).get();
debug::streaming_scheduling_group = dbcfg.streaming_scheduling_group;
dbcfg.maintenance_scheduling_group = maintenance_scheduling_group;
dbcfg.statement_scheduling_group = create_scheduling_group("statement", "stmt", 1000, user_ssg).get();
dbcfg.memtable_scheduling_group = create_scheduling_group("memtable", "mt", 1000).get();
dbcfg.memtable_to_cache_scheduling_group = create_scheduling_group("memtable_to_cache", "mt2c", 200).get();
@@ -1761,7 +1762,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
checkpoint(stop_signal, "starting tablet allocator");
service::tablet_allocator::config tacfg {
.background_sg = dbcfg.streaming_scheduling_group,
.background_sg = dbcfg.maintenance_scheduling_group,
};
sharded<service::tablet_allocator> tablet_allocator;
tablet_allocator.start(tacfg, std::ref(mm_notifier), std::ref(db)).get();

View File

@@ -460,8 +460,8 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
, _nop_large_data_handler(std::make_unique<db::nop_large_data_handler>())
, _corrupt_data_handler(std::make_unique<db::system_table_corrupt_data_handler>(db::system_table_corrupt_data_handler::config{.entry_ttl = std::chrono::days(10)}, db::corrupt_data_handler::register_metrics::yes))
, _nop_corrupt_data_handler(std::make_unique<db::nop_corrupt_data_handler>(db::corrupt_data_handler::register_metrics::no))
, _user_sstables_manager(std::make_unique<sstables::sstables_manager>("user", *_large_data_handler, *_corrupt_data_handler, configure_sstables_manager(_cfg, dbcfg), feat, _row_cache_tracker, sst_dir_sem, [&stm]{ return stm.get()->get_my_id(); }, scf, abort, _cfg.extensions().sstable_file_io_extensions(), dbcfg.streaming_scheduling_group, &sstm))
, _system_sstables_manager(std::make_unique<sstables::sstables_manager>("system", *_nop_large_data_handler, *_nop_corrupt_data_handler, configure_sstables_manager(_cfg, dbcfg), feat, _row_cache_tracker, sst_dir_sem, [&stm]{ return stm.get()->get_my_id(); }, scf, abort, _cfg.extensions().sstable_file_io_extensions(), dbcfg.streaming_scheduling_group))
, _user_sstables_manager(std::make_unique<sstables::sstables_manager>("user", *_large_data_handler, *_corrupt_data_handler, configure_sstables_manager(_cfg, dbcfg), feat, _row_cache_tracker, sst_dir_sem, [&stm]{ return stm.get()->get_my_id(); }, scf, abort, _cfg.extensions().sstable_file_io_extensions(), dbcfg.maintenance_scheduling_group, &sstm))
, _system_sstables_manager(std::make_unique<sstables::sstables_manager>("system", *_nop_large_data_handler, *_nop_corrupt_data_handler, configure_sstables_manager(_cfg, dbcfg), feat, _row_cache_tracker, sst_dir_sem, [&stm]{ return stm.get()->get_my_id(); }, scf, abort, _cfg.extensions().sstable_file_io_extensions(), dbcfg.maintenance_scheduling_group))
, _result_memory_limiter(dbcfg.available_memory / 10)
, _data_listeners(std::make_unique<db::data_listeners>())
, _mnotifier(mn)
@@ -1570,6 +1570,7 @@ keyspace::make_column_family_config(const schema& s, const database& db) const {
cfg.memtable_scheduling_group = _config.memtable_scheduling_group;
cfg.memtable_to_cache_scheduling_group = _config.memtable_to_cache_scheduling_group;
cfg.streaming_scheduling_group = _config.streaming_scheduling_group;
cfg.maintenance_scheduling_group = _config.maintenance_scheduling_group;
cfg.enable_metrics_reporting = db_config.enable_keyspace_column_family_metrics();
cfg.enable_node_aggregated_table_metrics = db_config.enable_node_aggregated_table_metrics();
cfg.tombstone_warn_threshold = db_config.tombstone_warn_threshold();
@@ -1712,6 +1713,7 @@ request_class classify_request(const database_config& _dbcfg) {
// Requests done on behalf of view update generation run in the streaming group
} else if (current_group == _dbcfg.streaming_scheduling_group
|| current_group == _dbcfg.backup_scheduling_group
|| current_group == _dbcfg.maintenance_scheduling_group
|| current_group == _dbcfg.maintenance_compaction_scheduling_group) {
return request_class::maintenance;
// Everything else is considered a user request
@@ -2521,6 +2523,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm, system_keyspace is_
cfg.memtable_scheduling_group = _dbcfg.memtable_scheduling_group;
cfg.memtable_to_cache_scheduling_group = _dbcfg.memtable_to_cache_scheduling_group;
cfg.streaming_scheduling_group = _dbcfg.streaming_scheduling_group;
cfg.maintenance_scheduling_group = _dbcfg.maintenance_scheduling_group;
cfg.enable_metrics_reporting = _cfg.enable_keyspace_column_family_metrics();
cfg.view_update_memory_semaphore_limit = max_memory_pending_view_updates();

View File

@@ -470,6 +470,7 @@ public:
seastar::scheduling_group memtable_to_cache_scheduling_group;
seastar::scheduling_group memory_compaction_scheduling_group;
seastar::scheduling_group streaming_scheduling_group;
seastar::scheduling_group maintenance_scheduling_group;
bool enable_metrics_reporting = false;
bool enable_node_aggregated_table_metrics = true;
size_t view_update_memory_semaphore_limit;
@@ -1456,6 +1457,7 @@ public:
seastar::scheduling_group memtable_to_cache_scheduling_group;
seastar::scheduling_group memory_compaction_scheduling_group;
seastar::scheduling_group streaming_scheduling_group;
seastar::scheduling_group maintenance_scheduling_group;
bool enable_metrics_reporting = false;
size_t view_update_memory_semaphore_limit;
};
@@ -1536,6 +1538,7 @@ struct database_config {
seastar::scheduling_group memory_compaction_scheduling_group;
seastar::scheduling_group statement_scheduling_group;
seastar::scheduling_group streaming_scheduling_group;
seastar::scheduling_group maintenance_scheduling_group;
seastar::scheduling_group gossip_scheduling_group;
seastar::scheduling_group commitlog_scheduling_group;
seastar::scheduling_group schema_commitlog_scheduling_group;

View File

@@ -3313,7 +3313,7 @@ void tablet_storage_group_manager::handle_tablet_split_completion(const locator:
}
future<> tablet_storage_group_manager::merge_completion_fiber() {
co_await coroutine::switch_to(_t.get_config().streaming_scheduling_group);
co_await coroutine::switch_to(_t.get_config().maintenance_scheduling_group);
while (!_t.async_gate().is_closed()) {
try {