Merge 'LIST EFFECTIVE SERVICE LEVEL statement' from Michał Jadwiszczak

Add `LIST EFFECTIVE SERVICE LEVEL` statement to be able to display from which service level come which service level options.

Example:
There are 2 roles: role1 and role2. Role1 is assigned with sl1 (timeout = 2s, workload_type = interactive) and role2 is assigned with sl2 (timeout = 10s, workload_type = batch).
Then, if we grant role1 to role2, the user with role2 will have 2s timeout (from sl1) and batch workload type (from sl2).

```
> LIST EFFECTIVE SERVICE LEVEL OF role2;

 service_level_option | effective_service_level | value
----------------------+-------------------------+-------------
        workload_type |                     sl2 |       batch
              timeout |                     sl1 |          2s
```

Fixes: https://github.com/scylladb/scylladb/issues/15604

Closes scylladb/scylladb#14431

* github.com:scylladb/scylladb:
  cql-pytest: add `LIST EFFECTIVE SERVICE LEVEL OF` test
  docs: add `LIST EFFECTIVE SERVICE LEVEL` statement docs
  cql3:statements: add `LIST EFFECTIVE SERVICE LEVEL` statement
  service:qos: add option to include effective names to SLO
This commit is contained in:
Nadav Har'El
2023-11-30 18:12:52 +02:00
14 changed files with 327 additions and 34 deletions

View File

@@ -962,6 +962,7 @@ scylla_core = (['message/messaging_service.cc',
'cql3/statements/detach_service_level_statement.cc',
'cql3/statements/list_service_level_statement.cc',
'cql3/statements/list_service_level_attachments_statement.cc',
'cql3/statements/list_effective_service_level_statement.cc',
'cql3/statements/describe_statement.cc',
'cql3/update_parameters.cc',
'cql3/util.cc',

View File

@@ -102,6 +102,7 @@ target_sources(cql3
statements/detach_service_level_statement.cc
statements/list_service_level_statement.cc
statements/list_service_level_attachments_statement.cc
statements/list_effective_service_level_statement.cc
statements/describe_statement.cc
update_parameters.cc
util.cc

View File

@@ -59,6 +59,7 @@ options {
#include "cql3/statements/list_roles_statement.hh"
#include "cql3/statements/list_service_level_statement.hh"
#include "cql3/statements/list_service_level_attachments_statement.hh"
#include "cql3/statements/list_effective_service_level_statement.hh"
#include "cql3/statements/grant_role_statement.hh"
#include "cql3/statements/revoke_role_statement.hh"
#include "cql3/statements/drop_role_statement.hh"
@@ -376,6 +377,7 @@ cqlStatement returns [std::unique_ptr<raw::parsed_statement> stmt]
| st47=listServiceLevelAttachStatement { $stmt = std::move(st47); }
| st48=pruneMaterializedViewStatement { $stmt = std::move(st48); }
| st49=describeStatement { $stmt = std::move(st49); }
| st50=listEffectiveServiceLevelStatement { $stmt = std::move(st50); }
;
/*
@@ -1410,6 +1412,16 @@ listServiceLevelAttachStatement returns [std::unique_ptr<list_service_level_atta
{ $stmt = std::make_unique<list_service_level_attachments_statement>(); }
;
/**
* LIST EFFECTIVE SERVICE_LEVEL OF <role_name>
*/
listEffectiveServiceLevelStatement returns [std::unique_ptr<list_effective_service_level_statement stmt>]
@init {
}
: K_LIST K_EFFECTIVE serviceLevel K_OF role_name=serviceLevelOrRoleName
{ $stmt = std::make_unique<list_effective_service_level_statement>(role_name); }
;
/**
* (DESCRIBE | DESC) (
* CLUSTER
@@ -2081,6 +2093,7 @@ basic_unreserved_keyword returns [sstring str]
| K_DESC
| K_EXECUTE
| K_MUTATION_FRAGMENTS
| K_EFFECTIVE
) { $str = $k.text; }
;
@@ -2274,6 +2287,7 @@ K_FOR: F O R;
K_SERVICE: S E R V I C E;
K_LEVEL: L E V E L;
K_LEVELS: L E V E L S;
K_EFFECTIVE: E F F E C T I V E;
K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X;
K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T;

View File

@@ -0,0 +1,95 @@
/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include "list_effective_service_level_statement.hh"
#include "auth/role_manager.hh"
#include "cql3/statements/prepared_statement.hh"
#include <memory>
#include "exceptions/exceptions.hh"
#include "service/qos/qos_common.hh"
#include "service/query_state.hh"
#include "cql3/result_set.hh"
#include "types/types.hh"
#include "duration.hh"
#include "transport/messages/result_message.hh"
namespace cql3 {
namespace statements {
list_effective_service_level_statement::list_effective_service_level_statement(sstring role_name)
: _role_name(std::move(role_name)) {}
std::unique_ptr<prepared_statement>
list_effective_service_level_statement::prepare(data_dictionary::database db, cql_stats& stats) {
return std::make_unique<prepared_statement>(::make_shared<list_effective_service_level_statement>(*this));
}
static auto make_column(sstring name, const shared_ptr<const abstract_type> type) {
return make_lw_shared<column_specification>(
"QOS",
"effective_service_level",
::make_shared<column_identifier>(std::move(name), true),
type);
};
static bytes_opt decompose_timeout (const qos::service_level_options::timeout_type& duration) {
return std::visit(overloaded_functor{
[&] (const qos::service_level_options::unset_marker&) {
return bytes_opt();
},
[&] (const qos::service_level_options::delete_marker&) {
return bytes_opt();
},
[&] (const lowres_clock::duration& d) -> bytes_opt {
auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(d).count();
return utf8_type->decompose(to_string(cql_duration(months_counter{0}, days_counter{0}, nanoseconds_counter{nanos})));
},
}, duration);
};
future<::shared_ptr<cql_transport::messages::result_message>>
list_effective_service_level_statement::execute(query_processor& qp, service::query_state& state, const query_options&, std::optional<service::group0_guard>) const {
static thread_local const std::vector<lw_shared_ptr<column_specification>> metadata({
make_column("service_level_option", utf8_type),
make_column("effective_service_level", utf8_type),
make_column("value", utf8_type)
});
auto& role_manager = state.get_client_state().get_auth_service()->underlying_role_manager();
if (!co_await role_manager.exists(_role_name)) {
throw auth::nonexistant_role(_role_name);
}
auto role_set = co_await role_manager.query_granted(_role_name, auth::recursive_role_query::yes);
auto& sl_controller = state.get_service_level_controller();
auto slo = co_await sl_controller.find_service_level(role_set, qos::include_effective_names::yes);
if (!slo) {
throw exceptions::invalid_request_exception(format("Role {} doesn't have assigned any service level", _role_name));
}
auto rs = std::make_unique<result_set>(metadata);
rs->add_row({
utf8_type->decompose("workload_type"),
utf8_type->decompose(slo->effective_names->workload),
utf8_type->decompose(qos::service_level_options::to_string(slo->workload))
});
rs->add_row({
utf8_type->decompose("timeout"),
utf8_type->decompose(slo->effective_names->timeout),
decompose_timeout(slo->timeout)
});
auto rows = ::make_shared<cql_transport::messages::result_message::rows>(result(std::move(std::move(rs))));
co_return ::static_pointer_cast<cql_transport::messages::result_message>(rows);
}
}
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include "cql3/statements/service_level_statement.hh"
namespace cql3 {
namespace statements {
class list_effective_service_level_statement final : public service_level_statement {
sstring _role_name;
public:
list_effective_service_level_statement(sstring role_name);
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
virtual future<::shared_ptr<cql_transport::messages::result_message>>
execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard>) const override;
};
}
}

View File

@@ -422,3 +422,19 @@ For more details, see:
- Detailed [design notes](https://github.com/scylladb/scylla/blob/master/docs/dev/per-partition-rate-limit.md)
- Description of the [rate limit exceeded](https://github.com/scylladb/scylla/blob/master/docs/dev/protocol-extensions.md#rate-limit-error) error
## Effective service level
Actual values of service level's options may come from different service levels, not only from the one user is assigned with.
To facilitate insight into which values come from which service level, there is ``LIST EFFECTIVE SERVICE LEVEL OF <role_name>`` command.
```cql
> LIST EFFECTIVE SERVICE LEVEL OF role2;
service_level_option | effective_service_level | value
----------------------+-------------------------+-------------
workload_type | sl2 | batch
timeout | sl1 | 2s
```
For more details, check [Service Levels docs](https://github.com/scylladb/scylla/blob/master/docs/cql/service-levels.rst)

View File

@@ -22,6 +22,7 @@ CQL Reference
mv
non-reserved-keywords
reserved-keywords
service-levels
cql-extensions.md
cqlsh - The CQL Shell

View File

@@ -0,0 +1,29 @@
==============
Service Levels
==============
Service Levels CQL commands
===========================
``LIST EFFECTIVE SERVICE LEVEL OF <role>``
-----------------------------------------------------------
Actual values of service level's options may come from different service levels, not only from the one user is assigned with. This can be achived by assigning one role to another.
For instance:
There are 2 roles: role1 and role2. Role1 is assigned with sl1 (timeout = 2s, workload_type = interactive) and role2 is assigned with sl2 (timeout = 10s, workload_type = batch).
Then, if we grant role1 to role2, the user with role2 will have 2s timeout (from sl1 because merging rule says to take lower timeout) and batch workload type (from sl2).
To facilitate insight into which values come from which service level, there is ``LIST EFFECTIVE SERVICE LEVEL OF <role_name>`` command.
The command displays a table with: option name, effective service level the value comes from and the option value.
.. code-block:: cql
> LIST EFFECTIVE SERVICE LEVEL OF role2;
service_level_option | effective_service_level | value
----------------------+-------------------------+-------------
workload_type | sl2 | batch
timeout | sl1 | 2s

View File

@@ -93,6 +93,7 @@ role4: `timeout = 10ms`
The granting hierarchy is as follows, with role1 inheriting from role2, which in turn
inherits from role3 and role4:
role4 role3
\ /
role2
@@ -134,3 +135,26 @@ Otherwise, e.g. if a role has multiple workload types declared,
the conflicts are resolved as follows:
- `X` vs `unspecified` -> `X`
- `batch` vs `interactive` -> `batch` - under the assumption that `batch` is safer, because it would not trigger load shedding as eagerly as `interactive`
### Effective service level
Actual values of service level's options may come from different service levels, not only from the one user is assigned with. This can be achived by assigning one role to another.
For instance:
There are 2 roles: role1 and role2. Role1 is assigned with sl1 (timeout = 2s, workload_type = interactive) and role2 is assigned with sl2 (timeout = 10s, workload_type = batch).
Then, if we grant role1 to role2, the user with role2 will have 2s timeout (from sl1) and batch workload type (from sl2).
To see detail how the options are merged, check [combining service levels section](#combining-service-level-timeouts-from-multiple-roles).
To facilitate insight into which values come from which service level, there is `LIST EFFECTIVE SERVICE LEVEL OF <role_name>` command.
The command displays a table with: option name, effective service level the value comes from and the option value.
```
> LIST EFFECTIVE SERVICE LEVEL OF role2;
service_level_option | effective_service_level | value
----------------------+-------------------------+-------------
workload_type | sl2 | batch
timeout | sl1 | 2s
```

View File

@@ -41,26 +41,50 @@ service_level_options service_level_options::replace_defaults(const service_leve
}
service_level_options service_level_options::merge_with(const service_level_options& other) const {
auto maybe_update_timeout_name = [] (service_level_options& slo, const service_level_options& other) {
if (slo.effective_names && other.effective_names) {
slo.effective_names->timeout = other.effective_names->timeout;
}
};
auto maybe_update_workload_name = [] (service_level_options& slo, const service_level_options& other) {
if (slo.effective_names && other.effective_names) {
slo.effective_names->workload = other.effective_names->workload;
}
};
service_level_options ret = *this;
std::visit(overloaded_functor {
[&] (const unset_marker& um) {
ret.timeout = other.timeout;
maybe_update_timeout_name(ret, other);
},
[&] (const delete_marker& dm) {
ret.timeout = other.timeout;
maybe_update_timeout_name(ret, other);
},
[&] (const lowres_clock::duration& d) {
if (auto* other_timeout = std::get_if<lowres_clock::duration>(&other.timeout)) {
auto prev_timeout = ret.timeout;
ret.timeout = std::min(d, *other_timeout);
if (prev_timeout != ret.timeout) {
maybe_update_timeout_name(ret, other);
}
}
},
}, ret.timeout);
// Specified workloads should be preferred over unspecified ones
auto prev_workload = ret.workload;
if (ret.workload == workload_type::unspecified || other.workload == workload_type::unspecified) {
ret.workload = std::max(ret.workload, other.workload);
} else {
ret.workload = std::min(ret.workload, other.workload);
}
if (prev_workload != ret.workload) {
maybe_update_workload_name(ret, other);
}
return ret;
}
@@ -89,4 +113,11 @@ std::optional<service_level_options::workload_type> service_level_options::parse
return std::nullopt;
}
void service_level_options::init_effective_names(sstring& service_level_name) {
effective_names = service_level_options::slo_effective_names {
.timeout = service_level_name,
.workload = service_level_name
};
}
}

View File

@@ -18,6 +18,8 @@
namespace qos {
enum class include_effective_names { yes, no };
/**
* a structure that holds the configuration for
* a service level.
@@ -47,6 +49,17 @@ struct service_level_options {
static std::string_view to_string(const workload_type& wt);
static std::optional<workload_type> parse_workload_type(std::string_view sv);
struct slo_effective_names {
sstring timeout;
sstring workload;
bool operator==(const slo_effective_names& other) const = default;
bool operator!=(const slo_effective_names& other) const = default;
};
std::optional<slo_effective_names> effective_names = std::nullopt;
void init_effective_names(sstring& service_level_name);
};
std::ostream& operator<<(std::ostream& os, const service_level_options::workload_type&);

View File

@@ -184,12 +184,12 @@ future<> service_level_controller::update_service_levels_from_distributed_data()
});
}
future<std::optional<service_level_options>> service_level_controller::find_service_level(auth::role_set roles) {
future<std::optional<service_level_options>> service_level_controller::find_service_level(auth::role_set roles, include_effective_names include_names) {
auto& role_manager = _auth_service.local().underlying_role_manager();
// converts a list of roles into the chosen service level.
return ::map_reduce(roles.begin(), roles.end(), [&role_manager, this] (const sstring& role) {
return role_manager.get_attribute(role, "service_level").then_wrapped([this, role] (future<std::optional<sstring>> sl_name_fut) -> std::optional<service_level_options> {
return ::map_reduce(roles.begin(), roles.end(), [&role_manager, include_names, this] (const sstring& role) {
return role_manager.get_attribute(role, "service_level").then_wrapped([include_names, this, role] (future<std::optional<sstring>> sl_name_fut) -> std::optional<service_level_options> {
try {
std::optional<sstring> sl_name = sl_name_fut.get0();
if (!sl_name) {
@@ -199,6 +199,10 @@ future<std::optional<service_level_options>> service_level_controller::find_serv
if ( sl_it == _service_levels_db.end()) {
return std::nullopt;
}
if (include_names == include_effective_names::yes) {
sl_it->second.slo.init_effective_names(*sl_name);
}
return sl_it->second.slo;
} catch (...) { // when we fail, we act as if the attribute does not exist so the node
// will not be brought down.

View File

@@ -155,7 +155,7 @@ public:
* @return the effective service level options - they may in particular be a combination
* of options from multiple service levels
*/
future<std::optional<service_level_options>> find_service_level(auth::role_set roles);
future<std::optional<service_level_options>> find_service_level(auth::role_set roles, include_effective_names include_names = include_effective_names::no);
/**
* Gets the service level data by name.

View File

@@ -9,24 +9,54 @@
#############################################################################
from contextlib import contextmanager
from util import unique_name, new_test_table
from util import unique_name, new_test_table, new_user
from cassandra.protocol import InvalidRequest, ReadTimeout
from cassandra.util import Duration
import pytest
import time
@contextmanager
def new_service_level(cql):
def new_service_level(cql, timeout=None, workload_type=None, role=None):
params = ""
if timeout and workload_type:
params = f"WITH timeout = {timeout} AND workload_type = '{workload_type}'"
elif timeout:
params = f"WITH timeout = {timeout}"
elif workload_type:
params = f"WITH workload_type = '{workload_type}'"
attach_to = role if role else cql.cluster.auth_provider.username
try:
sl = f"sl_{unique_name()}"
cql.execute(f"CREATE SERVICE LEVEL {sl}")
cql.execute(f"ATTACH SERVICE LEVEL {sl} TO {cql.cluster.auth_provider.username}")
cql.execute(f"CREATE SERVICE LEVEL {sl} {params}")
cql.execute(f"ATTACH SERVICE LEVEL {sl} TO {attach_to}")
yield sl
finally:
cql.execute(f"DETACH SERVICE LEVEL FROM {cql.cluster.auth_provider.username}")
cql.execute(f"DETACH SERVICE LEVEL FROM {attach_to}")
cql.execute(f"DROP SERVICE LEVEL IF EXISTS {sl}")
# Some of the service levels operations depends on controller's update
# which currently are done in 10s intervals.
# To not do plain 10s sleeps in tests, you should use this function
# to wait as little as possible.
def try_until_success(f, timeout, step_sleep = 0.5):
start_time = time.time()
last_exception = None
while time.time() - start_time < timeout:
try:
f()
return
except Exception as e:
last_exception = e
time.sleep(step_sleep)
if last_exception is not None:
raise last_exception
# Test that setting service level timeouts correctly sets the timeout parameter
def test_set_service_level_timeouts(scylla_only, cql):
with new_service_level(cql) as sl:
@@ -56,30 +86,35 @@ def test_attached_service_level(scylla_only, cql):
res_one = cql.execute(f"LIST ALL ATTACHED SERVICE LEVELS").one()
assert res_one.role == cql.cluster.auth_provider.username and res_one.service_level == sl
# Test that declaring service level workload types is possible
def test_set_workload_type(scylla_only, cql):
with new_service_level(cql) as sl:
res = cql.execute(f"LIST SERVICE LEVEL {sl}")
assert not res.one().workload_type
for wt in ['interactive', 'batch']:
cql.execute(f"ALTER SERVICE LEVEL {sl} WITH workload_type = '{wt}'")
res = cql.execute(f"LIST SERVICE LEVEL {sl}")
assert res.one().workload_type == wt
def test_list_effective_service_level(scylla_only, cql):
sl1 = "sl1"
sl2 = "sl2"
timeout = "10s"
workload_type = "batch"
# Test that workload type input is validated
def test_set_invalid_workload_types(scylla_only, cql):
with new_service_level(cql) as sl:
for incorrect in ['', 'i', 'b', 'dog', 'x'*256]:
print(f"Checking {incorrect}")
with pytest.raises(Exception):
cql.execute(f"ALTER SERVICE LEVEL {sl} WITH workload_type = '{incorrect}'")
with new_user(cql, "r1") as r1:
with new_user(cql, "r2") as r2:
with new_service_level(cql, timeout=timeout, role=r1) as sl1:
with new_service_level(cql, workload_type=workload_type, role=r2) as sl2:
cql.execute(f"GRANT {r2} TO {r1}")
# Test that resetting an already set workload type by assigning NULL to it works fine
def test_reset_workload_type(scylla_only, cql):
with new_service_level(cql) as sl:
cql.execute(f"ALTER SERVICE LEVEL {sl} WITH workload_type = 'interactive'")
res = cql.execute(f"LIST SERVICE LEVEL {sl}")
assert res.one().workload_type == 'interactive'
cql.execute(f"ALTER SERVICE LEVEL {sl} WITH workload_type = null")
res = cql.execute(f"LIST SERVICE LEVEL {sl}")
assert not res.one().workload_type
def check_list_effective_statament():
list_r1 = cql.execute(f"LIST EFFECTIVE SERVICE LEVEL OF {r1}")
for row in list_r1:
if row.service_level_option == "timeout":
assert row.effective_service_level == sl1
assert row.value == "10s"
if row.service_level_option == "workload_type":
assert row.effective_service_level == sl2
assert row.value == "batch"
list_r2 = cql.execute(f"LIST EFFECTIVE SERVICE LEVEL OF {r2}")
for row in list_r2:
if row.service_level_option == "timeout":
assert row.effective_service_level == sl2
assert row.value == None
if row.service_level_option == "workload_type":
assert row.effective_service_level == sl2
assert row.value == "batch"
try_until_success(check_list_effective_statament, 11)