From dfd7981fa7db47bbcd18d29fb74f5f6722f02208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Thu, 9 Nov 2023 09:52:08 -0500 Subject: [PATCH] api/storage_service: start/stop native transport in the statement sg Currently, it is started/stopped in the streaming/maintenance sg, which is what the API itself runs in. Starting the native transport in the streaming sg, will lead to severely degraded performance, as the streaming sg has significantly less CPU/disk shares and reader concurrency semaphore resources. Furthermore, it will lead to multi-paged reads possibly switching between scheduling groups mid-way, triggering an internal error. To fix, use `with_scheduling_group()` for both starting and stopping native transport. Technically, it is only strictly necessary for starting, but I added it for stop as well for consistency. Also apply the same treatment to RPC (Thrift). Although no one uses it, best to fix it, just to be on the safe side. I think we need a more systematic approach for solving this once and for all, like passing the scheduling group to the protocol server and have it switch to it internally. This allows the server to always run on the correct scheduling group, not depending on the caller to remember using it. However, I think this is best done in a follow-up, to keep this critical patch small and easily backportable. Fixes: #15485 Closes scylladb/scylladb#16019 --- api/storage_service.cc | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 8dbdcee9cc..de835a0b6c 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -250,17 +250,21 @@ future set_tables_tombstone_gc(http_context& ctx, const } void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) { - ss::start_native_transport.set(r, [&ctl](std::unique_ptr req) { + ss::start_native_transport.set(r, [&ctx, &ctl](std::unique_ptr req) { return smp::submit_to(0, [&] { - return ctl.start_server(); + return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] { + return ctl.start_server(); + }); }).then([] { return make_ready_future(json_void()); }); }); - ss::stop_native_transport.set(r, [&ctl](std::unique_ptr req) { + ss::stop_native_transport.set(r, [&ctx, &ctl](std::unique_ptr req) { return smp::submit_to(0, [&] { - return ctl.request_stop_server(); + return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] { + return ctl.request_stop_server(); + }); }).then([] { return make_ready_future(json_void()); }); @@ -282,17 +286,21 @@ void unset_transport_controller(http_context& ctx, routes& r) { } void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) { - ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr req) { + ss::stop_rpc_server.set(r, [&ctx, &ctl](std::unique_ptr req) { return smp::submit_to(0, [&] { - return ctl.request_stop_server(); + return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] { + return ctl.request_stop_server(); + }); }).then([] { return make_ready_future(json_void()); }); }); - ss::start_rpc_server.set(r, [&ctl](std::unique_ptr req) { + ss::start_rpc_server.set(r, [&ctx, &ctl](std::unique_ptr req) { return smp::submit_to(0, [&] { - return ctl.start_server(); + return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] { + return ctl.start_server(); + }); }).then([] { return make_ready_future(json_void()); });