mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-24 00:32:15 +00:00
Merge 'Sanitize start-stop of protocol servers' from Pavel Emelyanov
Protocol servers are started last, and are registered in storage_service, which stops them. Also there are deferred actions scheduled to stop protocol servers on aborted start and a FIXME asking to make even this case rely on storage_service. Also, there's a (rather rare) aborted-start bug in alternator and redis. Yet, thrift can be left started in some weird circumstances. This patch fixes it all. As a side effect, the start-stop code becomes shorter and a bit better structured. refs: #2737 Closes scylladb/scylladb#19042 * github.com:scylladb/scylladb: main: Start alternator expiration service earlier main: Start redis transparently main: Start alternator transparently main: Start thrift transparently main: Start native transport transparently storage_service: Make register_protocol_server() start the server storage_service: Turn register_protocol_server() async method storage_service: Outline register_protocol_server() main: Schedule deferred drain_on_shutdown() prior to protocol servers main: Move some trailing startup earlier
This commit is contained in:
78
main.cc
78
main.cc
@@ -1961,49 +1961,10 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
api::unset_server_view_builder(ctx).get();
|
||||
});
|
||||
|
||||
db.invoke_on_all(&replica::database::revert_initial_system_read_concurrency_boost).get();
|
||||
|
||||
notify_set.notify_all(configurable::system_state::started).get();
|
||||
|
||||
cql_transport::controller cql_server_ctl(auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, cql_sg_stats_key, maintenance_socket_enabled::no, dbcfg.statement_scheduling_group);
|
||||
|
||||
ss.local().register_protocol_server(cql_server_ctl);
|
||||
|
||||
std::any stop_cql;
|
||||
|
||||
if (cfg->start_native_transport()) {
|
||||
start_cql(cql_server_ctl, stop_cql, "native transport");
|
||||
}
|
||||
|
||||
api::set_transport_controller(ctx, cql_server_ctl).get();
|
||||
auto stop_transport_controller = defer_verbose_shutdown("transport controller API", [&ctx] {
|
||||
api::unset_transport_controller(ctx).get();
|
||||
});
|
||||
|
||||
::thrift_controller thrift_ctl(db, auth_service, qp, service_memory_limiter, ss, proxy, dbcfg.statement_scheduling_group);
|
||||
|
||||
ss.local().register_protocol_server(thrift_ctl);
|
||||
|
||||
std::any stop_rpc;
|
||||
if (cfg->start_rpc()) {
|
||||
thrift_ctl.start_server().get();
|
||||
// FIXME -- this should be done via client hooks instead
|
||||
stop_rpc = defer_verbose_shutdown("rpc server", [&thrift_ctl] {
|
||||
thrift_ctl.stop_server().get();
|
||||
});
|
||||
}
|
||||
|
||||
api::set_rpc_controller(ctx, thrift_ctl).get();
|
||||
auto stop_rpc_controller = defer_verbose_shutdown("rpc controller API", [&ctx] {
|
||||
api::unset_rpc_controller(ctx).get();
|
||||
});
|
||||
|
||||
alternator::controller alternator_ctl(gossiper, proxy, mm, sys_dist_ks, cdc_generation_service, service_memory_limiter, auth_service, sl_controller, *cfg, dbcfg.statement_scheduling_group);
|
||||
sharded<alternator::expiration_service> es;
|
||||
std::any stop_expiration_service;
|
||||
|
||||
if (cfg->alternator_port() || cfg->alternator_https_port()) {
|
||||
alternator_ctl.start_server().get();
|
||||
// Start the expiration service on all shards.
|
||||
// Currently we only run it if Alternator is enabled, because
|
||||
// only Alternator uses it for its TTL feature. But in the
|
||||
@@ -2019,23 +1980,44 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
return es.invoke_on_all(&alternator::expiration_service::start);
|
||||
}).get();
|
||||
}
|
||||
ss.local().register_protocol_server(alternator_ctl);
|
||||
|
||||
redis::controller redis_ctl(proxy, auth_service, mm, *cfg, gossiper, dbcfg.statement_scheduling_group);
|
||||
if (cfg->redis_port() || cfg->redis_ssl_port()) {
|
||||
redis_ctl.start_server().get();
|
||||
}
|
||||
ss.local().register_protocol_server(redis_ctl);
|
||||
|
||||
db.invoke_on_all(&replica::database::revert_initial_system_read_concurrency_boost).get();
|
||||
notify_set.notify_all(configurable::system_state::started).get();
|
||||
seastar::set_abort_on_ebadf(cfg->abort_on_ebadf());
|
||||
api::set_server_done(ctx).get();
|
||||
supervisor::notify("serving");
|
||||
// Register at_exit last, so that storage_service::drain_on_shutdown will be called first
|
||||
|
||||
// Create controllers before drain_on_shutdown() below, so that it destructs
|
||||
// after drain stops them in stop_transport()
|
||||
// Register controllers after drain_on_shutdown() below, so that even on start
|
||||
// failure drain is called and stops controllers
|
||||
cql_transport::controller cql_server_ctl(auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, cql_sg_stats_key, maintenance_socket_enabled::no, dbcfg.statement_scheduling_group);
|
||||
::thrift_controller thrift_ctl(db, auth_service, qp, service_memory_limiter, ss, proxy, dbcfg.statement_scheduling_group);
|
||||
alternator::controller alternator_ctl(gossiper, proxy, mm, sys_dist_ks, cdc_generation_service, service_memory_limiter, auth_service, sl_controller, *cfg, dbcfg.statement_scheduling_group);
|
||||
redis::controller redis_ctl(proxy, auth_service, mm, *cfg, gossiper, dbcfg.statement_scheduling_group);
|
||||
|
||||
// Register at_exit last, so that storage_service::drain_on_shutdown will be called first
|
||||
auto do_drain = defer_verbose_shutdown("local storage", [&ss] {
|
||||
ss.local().drain_on_shutdown().get();
|
||||
});
|
||||
|
||||
ss.local().register_protocol_server(cql_server_ctl, cfg->start_native_transport()).get();
|
||||
api::set_transport_controller(ctx, cql_server_ctl).get();
|
||||
auto stop_transport_controller = defer_verbose_shutdown("transport controller API", [&ctx] {
|
||||
api::unset_transport_controller(ctx).get();
|
||||
});
|
||||
|
||||
ss.local().register_protocol_server(thrift_ctl, cfg->start_rpc()).get();
|
||||
api::set_rpc_controller(ctx, thrift_ctl).get();
|
||||
auto stop_rpc_controller = defer_verbose_shutdown("rpc controller API", [&ctx] {
|
||||
api::unset_rpc_controller(ctx).get();
|
||||
});
|
||||
|
||||
ss.local().register_protocol_server(alternator_ctl, cfg->alternator_port() || cfg->alternator_https_port()).get();
|
||||
|
||||
ss.local().register_protocol_server(redis_ctl, cfg->redis_port() || cfg->redis_ssl_port()).get();
|
||||
|
||||
supervisor::notify("serving");
|
||||
|
||||
startlog.info("Scylla version {} initialization completed.", scylla_version());
|
||||
if (after_init_func) {
|
||||
after_init_func(cfg);
|
||||
|
||||
@@ -7221,5 +7221,12 @@ void storage_service::set_topology_change_kind(topology_change_kind kind) {
|
||||
_gossiper.set_topology_state_machine(kind == topology_change_kind::raft ? & _topology_state_machine : nullptr);
|
||||
}
|
||||
|
||||
future<> storage_service::register_protocol_server(protocol_server& server, bool start_instantly) {
|
||||
_protocol_servers.push_back(&server);
|
||||
if (start_instantly) {
|
||||
co_await server.start_server();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace service
|
||||
|
||||
|
||||
@@ -329,9 +329,7 @@ public:
|
||||
// should only be called via JMX
|
||||
future<bool> is_gossip_running();
|
||||
|
||||
void register_protocol_server(protocol_server& server) {
|
||||
_protocol_servers.push_back(&server);
|
||||
}
|
||||
future<> register_protocol_server(protocol_server& server, bool start_instantly = false);
|
||||
|
||||
// All pointers are valid.
|
||||
const std::vector<protocol_server*>& protocol_servers() const {
|
||||
|
||||
Reference in New Issue
Block a user