Files
scylladb/service/qos/service_level_controller.hh
Piotr Dulikowski ecd53db3b0 service/qos: remove the marked_for_deletion parameter
It is always set to false and it doesn't seem to serve any function now.
2024-09-04 21:52:34 +02:00

350 lines
14 KiB
C++

/*
* Copyright (C) 2020-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include <seastar/core/timer.hh>
#include "seastar/util/bool_class.hh"
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include <seastar/core/distributed.hh>
#include <seastar/core/abort_source.hh>
#include "seastarx.hh"
#include "auth/role_manager.hh"
#include "auth/service.hh"
#include <map>
#include "qos_common.hh"
#include "service/endpoint_lifecycle_subscriber.hh"
#include "qos_configuration_change_subscriber.hh"
#include "service/raft/raft_group0_client.hh"
#include "service/raft/raft_group_registry.hh"
namespace db {
class system_keyspace;
class system_distributed_keyspace;
}
namespace cql3 {
class query_processor;
}
namespace service {
class storage_service;
}
namespace qos {
/**
* a structure to hold a service level
* data and configuration.
*/
struct service_level {
service_level_options slo;
bool is_static = false;
service_level() = default;
service_level(service_level_options slo, bool is_static)
: slo(std::move(slo))
, is_static(is_static)
{}
};
using update_both_cache_levels = bool_class<class update_both_cache_levels_tag>;
/**
* The service_level_controller class is an implementation of the service level
* controller design.
* It is logically divided into 2 parts:
* 1. Global controller which is responsible for all of the data and plumbing
* manipulation.
* 2. Local controllers that act upon the data and facilitates execution in
* the service level context
*
* Definitions:
* service level - User creates service level with some parameters (timeout/workload type).
* Then it can be attached to some role, but it's not guaranteed the role
* will effectively get exactly the same parameter values
* as its assigned service level.
*
* effective service level - Represents exact values of role's parameters. It's created by
* merging all service levels attached to all roles which are
* granted to the user.
*
* Example:
* Roles: user_role, r1, r2
* GRANT ROLE r1 TO user_role
* GRANT ROLE r2 TO r1
* Service levels:
* - sl1 (timeout: 1h, workload type: batch)
* - sl2 (timeout: 30 min)
* - ATTACH SERVICE LEVEL sl1 TO user_role
* - ATTACH SERVICE LEVEL sl2 TO r2
*
* Effective service level of user_role is (timeout: 30 min, workload_type: batch).
* It gets created by merging all service levels (sl1, sl2) which are attached to
* all roles granted to the user (user_role, r1, r2).
*
* For more detail check qos_common.hh (service_level_options::merge_with()) and
* docs/dev/service_levels.md
*/
class service_level_controller : public peering_sharded_service<service_level_controller>, public service::endpoint_lifecycle_subscriber {
public:
class service_level_distributed_data_accessor {
public:
virtual future<qos::service_levels_info> get_service_levels() const = 0;
virtual future<qos::service_levels_info> get_service_level(sstring service_level_name) const = 0;
virtual future<> set_service_level(sstring service_level_name, qos::service_level_options slo, service::group0_batch& mc) const = 0;
virtual future<> drop_service_level(sstring service_level_name, service::group0_batch& mc) const = 0;
virtual future<> commit_mutations(service::group0_batch&& mc, abort_source& as) const = 0;
virtual bool is_v2() const = 0;
// Returns v2(raft) data accessor. If data accessor is already a raft one, returns nullptr.
virtual ::shared_ptr<service_level_distributed_data_accessor> upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const = 0;
};
using service_level_distributed_data_accessor_ptr = ::shared_ptr<service_level_distributed_data_accessor>;
private:
struct global_controller_data {
service_levels_info static_configurations{};
int schedg_group_cnt = 0;
int io_priority_cnt = 0;
service_level_options default_service_level_config;
// The below future is used to serialize work so no reordering can occur.
// This is needed so for example: delete(x), add(x) will not reverse yielding
// a completely different result than the one intended.
future<> work_future = make_ready_future();
semaphore notifications_serializer = semaphore(1);
future<> distributed_data_update = make_ready_future();
abort_source dist_data_update_aborter;
abort_source group0_aborter;
};
std::unique_ptr<global_controller_data> _global_controller_db;
static constexpr shard_id global_controller = 0;
// service level name -> service_level object
std::map<sstring, service_level> _service_levels_db;
// role name -> effective service_level_options
std::map<sstring, service_level_options> _effective_service_levels_db;
service_level _default_service_level;
service_level_distributed_data_accessor_ptr _sl_data_accessor;
sharded<auth::service>& _auth_service;
locator::shared_token_metadata& _token_metadata;
std::chrono::time_point<seastar::lowres_clock> _last_successful_config_update;
unsigned _logged_intervals;
atomic_vector<qos_configuration_change_subscriber*> _subscribers;
optimized_optional<abort_source::subscription> _early_abort_subscription;
void do_abort() noexcept;
public:
service_level_controller(sharded<auth::service>& auth_service, locator::shared_token_metadata& tm, abort_source& as, service_level_options default_service_level_config);
/**
* this function must be called *once* from any shard before any other functions are called.
* No other function should be called before the future returned by the function is resolved.
* @return a future that resolves when the initialization is over.
*/
future<> start();
void set_distributed_data_accessor(service_level_distributed_data_accessor_ptr sl_data_accessor);
/**
* Reloads data accessor, this is used to align it with service level version
* stored in scylla_local table.
*/
future<> reload_distributed_data_accessor(cql3::query_processor&, service::raft_group0_client&, db::system_keyspace&, db::system_distributed_keyspace&);
/**
* Adds a service level configuration if it doesn't exists, and updates
* an the existing one if it does exist.
* Handles both, static and non static service level configurations.
* @param name - the service level name.
* @param slo - the service level configuration
* @param is_static - is this configuration static or not
*/
future<> add_service_level(sstring name, service_level_options slo, bool is_static = false);
/**
* Removes a service level configuration if it exists.
* Handles both, static and non static service level configurations.
* @param name - the service level name.
* @param remove_static - indicates if it is a removal of a static configuration
* or not.
*/
future<> remove_service_level(sstring name, bool remove_static);
/**
* stops all ongoing operations if exists
* @return a future that is resolved when all operations has stopped
*/
future<> stop();
void abort_group0_operations();
/**
* Start legacy update loop if RAFT_SERVICE_LEVELS_CHANGE feature is not enabled yet
* or the cluster is in recovery mode
* or the cluster hasn't been migrated to raft topology.
*
* The update loop check the distributed data
* for changes in a constant interval and updates
* the service_levels configuration in accordance (adds, removes, or updates
* service levels as necessary).
* @param interval_f - lambda function which returns a interval in milliseconds.
The interval is time to check the distributed data.
* @return a future that is resolved when the update loop stops.
*/
void maybe_start_legacy_update_from_distributed_data(std::function<steady_clock_type::duration()> interval_f, service::storage_service& storage_service, service::raft_group0_client& group0_client);
/**
* Request abort of update loop.
* Must be called on shard 0.
*/
void stop_legacy_update_from_distributed_data();
/**
* Updates the service level cache from the distributed data store.
* Must be executed on shard 0.
* @return a future that is resolved when the update is done
*/
future<> update_service_levels_cache();
/**
* Updates effective service levels cache.
* The method uses service levels cache (_service_levels_db)
* and data from auth tables.
* Must be executed on shard 0.
* @return a future that is resolved when the update is done
*/
future<> update_effective_service_levels_cache();
/**
* Service levels cache consists of two levels: service levels cache and effective service levels cache
* The second one is dependent on the first one.
*
* update_both_cache_levels::yes - updates both levels of the cache
* update_both_cache_levels::no - update only effective service levels cache
*/
future<> update_cache(update_both_cache_levels update_both_cache_levels = update_both_cache_levels::yes);
future<> add_distributed_service_level(sstring name, service_level_options slo, bool if_not_exsists, service::group0_batch& mc);
future<> alter_distributed_service_level(sstring name, service_level_options slo, service::group0_batch& mc);
future<> drop_distributed_service_level(sstring name, bool if_exists, service::group0_batch& mc);
future<service_levels_info> get_distributed_service_levels();
future<service_levels_info> get_distributed_service_level(sstring service_level_name);
/**
* Returns the service level options **in effect** for a user having the given
* collection of roles.
* @param roles - the collection of roles to consider
* @return the effective service level options - they may in particular be a combination
* of options from multiple service levels
*/
future<std::optional<service_level_options>> find_effective_service_level(const sstring& role_name);
/**
* Gets the service level data by name.
* @param service_level_name - the name of the requested service level
* @return the service level data if it exists (in the local controller) or
* get_service_level("default") otherwise.
*/
service_level& get_service_level(sstring service_level_name) {
auto sl_it = _service_levels_db.find(service_level_name);
if (sl_it == _service_levels_db.end()) {
sl_it = _service_levels_db.find(default_service_level_name);
}
return sl_it->second;
}
future<> commit_mutations(::service::group0_batch&& mc) {
if (_sl_data_accessor->is_v2()) {
return _sl_data_accessor->commit_mutations(std::move(mc), _global_controller_db->group0_aborter);
}
return make_ready_future();
}
/**
* Returns true if service levels module is running under raft
*/
bool is_v2() const;
void upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client);
/**
* Migrate data from `system_distributed.service_levels` to `system.service_levels_v2`
*/
static future<> migrate_to_v2(size_t nodes_count, db::system_keyspace& sys_ks, cql3::query_processor& qp, service::raft_group0_client& group0_client, abort_source& as);
private:
/**
* Adds a service level configuration if it doesn't exists, and updates
* an the existing one if it does exist.
* Handles both, static and non static service level configurations.
* @param name - the service level name.
* @param slo - the service level configuration
* @param is_static - is this configuration static or not
*/
future<> do_add_service_level(sstring name, service_level_options slo, bool is_static = false);
/**
* Removes a service level configuration if it exists.
* Handles both, static and non static service level configurations.
* @param name - the service level name.
* @param remove_static - indicates if it is a removal of a static configuration
* or not.
*/
future<> do_remove_service_level(sstring name, bool remove_static);
/**
* The notify functions are used by the global service level controller
* to propagate configuration changes to the local controllers.
* the returned future is resolved when the local controller is done acting
* on the notification. updates are done in sequence so their meaning will not
* change due to execution reordering.
*/
future<> notify_service_level_added(sstring name, service_level sl_data);
future<> notify_service_level_updated(sstring name, service_level_options slo);
future<> notify_service_level_removed(sstring name);
future<> notify_effective_service_levels_cache_reloaded();
enum class set_service_level_op_type {
add_if_not_exists,
add,
alter
};
future<> set_distributed_service_level(sstring name, service_level_options slo, set_service_level_op_type op_type, service::group0_batch& mc);
public:
/**
* Register a subscriber for service level configuration changes
* notifications
* @param subscriber - a pointer to the subscriber.
*
* Note: the caller is responsible to keep the pointed to object alive until performing
* a call to service_level_controller::unregister_subscriber()).
*
* Note 2: the subscription is per shard.
*/
void register_subscriber(qos_configuration_change_subscriber* subscriber);
future<> unregister_subscriber(qos_configuration_change_subscriber* subscriber);
static sstring default_service_level_name;
virtual void on_join_cluster(const gms::inet_address& endpoint) override;
virtual void on_leave_cluster(const gms::inet_address& endpoint, const locator::host_id& hid) override;
virtual void on_up(const gms::inet_address& endpoint) override;
virtual void on_down(const gms::inet_address& endpoint) override;
};
future<shared_ptr<service_level_controller::service_level_distributed_data_accessor>>
get_service_level_distributed_data_accessor_for_current_version(
db::system_keyspace& sys_ks,
db::system_distributed_keyspace& sys_dist_ks,
cql3::query_processor& qp, service::raft_group0_client& group0_client
);
}