mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-23 01:50:35 +00:00
Tests in `test_service_level_api` were written before scylladb/scylladb#16585 and they were doing 10s sleeps to wait for service level controller to update its configuration. Now performing a read barrier is sufficient to ensure SL configuration is up-to-date, which significantly reduces tests time (from ~60s to ~2-3s). Moreover, there was flakiness in the `test_switch_tenants` test. Until now, the test waited up to 60s for the connections to update their scheduling groups. However, it is difficult to determine how long the process might take because a connection may be blocked while waiting for the next request to be processed, and the scheduling group will be updated only after a request is processed (see `generic_server::connection::process_until_tenant_switch()`). To address this issue, 100 simple queries are executed so that connections on all shards process at least one request and update their scheduling groups. Fixes scylladb/scylladb#22768 Closes scylladb/scylladb#23381
141 lines
5.7 KiB
Python
141 lines
5.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2024-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
########################################
|
|
# Tests for the service levels HTTP API.
|
|
########################################
|
|
|
|
import pytest
|
|
from .rest_api import get_request, post_request
|
|
from .util import new_session, unique_name
|
|
import time
|
|
|
|
# All tests in this file check the Scylla-only service levels feature,
|
|
# so let's mark them all scylla_only with an autouse fixture:
|
|
@pytest.fixture(scope="function", autouse=True)
|
|
def all_tests_are_scylla_only(scylla_only):
|
|
pass
|
|
|
|
def get_shard_count(cql):
|
|
return cql.execute("SELECT shard_count FROM system.topology").one().shard_count
|
|
|
|
def read_barrier(cql):
|
|
cql.execute("DROP TABLE IF EXISTS nosuchkeyspace.nosuchtable")
|
|
|
|
def count_opened_connections(cql, retry_unauthenticated=True):
|
|
response = get_request(cql, "service_levels/count_connections")
|
|
return response
|
|
|
|
def switch_tenants(cql):
|
|
return post_request(cql, "service_levels/switch_tenants")
|
|
|
|
def count_opened_connections_from_table(cql):
|
|
connections = cql.execute("SELECT username, scheduling_group FROM system.clients WHERE client_type='cql' ALLOW FILTERING")
|
|
result = {}
|
|
for row in connections:
|
|
user = row[0]
|
|
shg = row[1]
|
|
|
|
if shg in result:
|
|
if user in result[shg]:
|
|
result[shg][user] += 1
|
|
else:
|
|
result[shg][user] = 1
|
|
else:
|
|
result[shg] = {user: 1}
|
|
|
|
return result
|
|
|
|
def wait_until_all_connections_authenticated(cql, wait_s = 1, timeout_s = 30):
|
|
start_time = time.time()
|
|
while time.time() - start_time < timeout_s:
|
|
result = cql.execute("SELECT COUNT(*) FROM system.clients WHERE username='anonymous' ALLOW FILTERING")
|
|
if result.one()[0] == 0:
|
|
return
|
|
else:
|
|
time.sleep(wait_s)
|
|
|
|
raise RuntimeError(f"Awaiting for connections authentication timed out.")
|
|
|
|
# The driver creates 1 connection per shard plus 1 control connection.
|
|
# This function validates that all connections execept the control one use correct scheduling group.
|
|
def verify_scheduling_group_assignment(cql, user, target_scheduling_group, shard_count):
|
|
shards_with_correct_sg = set()
|
|
connections = cql.execute(f"SELECT username, scheduling_group, shard_id FROM system.clients WHERE client_type='cql' AND username='{user}' ALLOW FILTERING")
|
|
|
|
for conn in connections:
|
|
if target_scheduling_group in conn.scheduling_group:
|
|
shards_with_correct_sg.add(conn.shard_id)
|
|
|
|
assert len(shards_with_correct_sg) == shard_count, (f"Not all user '{user}' connections are working under target scheduling group '{target_scheduling_group}'."
|
|
f"Shards with correct scehduling group: {shards_with_correct_sg}, shard")
|
|
|
|
# Test if `/service_levels/count_connections` prints counted CQL connections
|
|
# per scheduling group per user.
|
|
def test_count_opened_cql_connections(cql):
|
|
user = f"test_user_{unique_name()}"
|
|
sl = f"sl_{unique_name()}"
|
|
|
|
cql.execute(f"CREATE ROLE {user} WITH login = true AND password='{user}'")
|
|
cql.execute(f"CREATE SERVICE LEVEL {sl} WITH shares = 100")
|
|
cql.execute(f"ATTACH SERVICE LEVEL {sl} TO {user}")
|
|
read_barrier(cql)
|
|
|
|
try:
|
|
with new_session(cql, user):
|
|
wait_until_all_connections_authenticated(cql)
|
|
verify_scheduling_group_assignment(cql, user, sl, get_shard_count(cql))
|
|
|
|
api_response = count_opened_connections(cql)
|
|
assert f"sl:{sl}" in api_response
|
|
assert user in api_response[f"sl:{sl}"]
|
|
|
|
table_response = count_opened_connections_from_table(cql)
|
|
assert api_response == table_response
|
|
finally:
|
|
cql.execute(f"DETACH SERVICE LEVEL FROM {user}")
|
|
cql.execute(f"DROP ROLE {user}")
|
|
cql.execute(f"DROP SERVICE LEVEL {sl}")
|
|
|
|
# Test if `/service_levels/switch_tenants` updates scheduling group
|
|
# of CQL connections without restarting them.
|
|
#
|
|
# This test creates a `test_user` and 2 service levels `sl1` and `sl2`.
|
|
# Firstly the user is assigned to `sl1` and his connections is created.
|
|
# Then the test changes user's service level to `sl2` and
|
|
# `/service_levels/switch_tenants` endpoint is called.
|
|
def test_switch_tenants(cql):
|
|
user = f"test_user_{unique_name()}"
|
|
sl1 = f"sl1_{unique_name()}"
|
|
sl2 = f"sl2_{unique_name()}"
|
|
shard_count = get_shard_count(cql)
|
|
|
|
cql.execute(f"CREATE ROLE {user} WITH login = true AND password='{user}' AND superuser = true")
|
|
cql.execute(f"CREATE SERVICE LEVEL {sl1} WITH shares = 100")
|
|
cql.execute(f"CREATE SERVICE LEVEL {sl2} WITH shares = 200")
|
|
cql.execute(f"ATTACH SERVICE LEVEL {sl1} TO {user}")
|
|
read_barrier(cql)
|
|
|
|
try:
|
|
with new_session(cql, user) as user_session:
|
|
wait_until_all_connections_authenticated(cql)
|
|
verify_scheduling_group_assignment(cql, user, sl1, shard_count)
|
|
|
|
cql.execute(f"DETACH SERVICE LEVEL FROM {user}")
|
|
cql.execute(f"ATTACH SERVICE LEVEL {sl2} TO {user}")
|
|
read_barrier(cql)
|
|
|
|
switch_tenants(cql)
|
|
# Switching tenants may be blocked if a connection is waiting for a request (see 'generic_server::connection::process_until_tenant_switch()').
|
|
# Execute enough cheap statements, so that connection on each shard will process at one statement and update its tenant.
|
|
for _ in range(100):
|
|
read_barrier(user_session)
|
|
verify_scheduling_group_assignment(cql, user, sl2, shard_count)
|
|
finally:
|
|
cql.execute(f"DETACH SERVICE LEVEL FROM {user}")
|
|
cql.execute(f"DROP ROLE {user}")
|
|
cql.execute(f"DROP SERVICE LEVEL {sl1}")
|
|
cql.execute(f"DROP SERVICE LEVEL {sl2}")
|