mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-19 16:15:07 +00:00
Compare commits
68 Commits
debug_form
...
ykaul/skip
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
65b774309e | ||
|
|
b4f652b7c1 | ||
|
|
ab4a2cdde2 | ||
|
|
b97cf0083c | ||
|
|
6d50e67bd2 | ||
|
|
74542be5aa | ||
|
|
8c0920202b | ||
|
|
2c22d69793 | ||
|
|
b6ebbbf036 | ||
|
|
6526a78334 | ||
|
|
eb78498e07 | ||
|
|
e7527392c4 | ||
|
|
2ec47a8a21 | ||
|
|
4d4ce074bb | ||
|
|
0351756b15 | ||
|
|
18f41dcd71 | ||
|
|
d438e35cdd | ||
|
|
2d2ff4fbda | ||
|
|
2ce409dca0 | ||
|
|
df2924b2a3 | ||
|
|
6d8ec8a0c0 | ||
|
|
f040f1b703 | ||
|
|
216d39883a | ||
|
|
b355bb70c2 | ||
|
|
b9f82f6f23 | ||
|
|
0cb63fb669 | ||
|
|
1d623196eb | ||
|
|
ba54b2272b | ||
|
|
4913acd742 | ||
|
|
ab43420d30 | ||
|
|
068a7894aa | ||
|
|
d32fe72252 | ||
|
|
b8e3ef0c64 | ||
|
|
a86928caa1 | ||
|
|
9dad68e58d | ||
|
|
181ad9f476 | ||
|
|
854c374ebf | ||
|
|
b708e5d7c9 | ||
|
|
c38e312321 | ||
|
|
627a8294ed | ||
|
|
5a086ae9b7 | ||
|
|
c575bbf1e8 | ||
|
|
7fdd650009 | ||
|
|
552a2d0995 | ||
|
|
73de865ca3 | ||
|
|
f988ec18cb | ||
|
|
cd1679934c | ||
|
|
d52fbf7ada | ||
|
|
141aa2d696 | ||
|
|
c670183be8 | ||
|
|
e639dcda0b | ||
|
|
503a6e2d7e | ||
|
|
0f02c0d6fa | ||
|
|
4fead4baae | ||
|
|
ffd58ca1f0 | ||
|
|
f1b3bff4a5 | ||
|
|
38aaed1ed4 | ||
|
|
b382f3593c | ||
|
|
f6fd3bbea0 | ||
|
|
148217bed6 | ||
|
|
2b472fe7fd | ||
|
|
ae12c712ce | ||
|
|
dd446aa442 | ||
|
|
dea79b09a9 | ||
|
|
3d04fd1d13 | ||
|
|
f5438e0587 | ||
|
|
f6ab576ed9 | ||
|
|
b36dc80835 |
@@ -699,6 +699,17 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
// for such a size.
|
||||
co_return api_error::payload_too_large(fmt::format("Request content length limit of {} bytes exceeded", request_content_length_limit));
|
||||
}
|
||||
// Check the concurrency limit early, before acquiring memory and
|
||||
// reading the request body, to avoid piling up memory from excess
|
||||
// requests that will be rejected anyway. This mirrors the CQL
|
||||
// transport which also checks concurrency before memory acquisition
|
||||
// (transport/server.cc).
|
||||
if (_pending_requests.get_count() >= _max_concurrent_requests) {
|
||||
_executor._stats.requests_shed++;
|
||||
co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
|
||||
}
|
||||
_pending_requests.enter();
|
||||
auto leave = defer([this] () noexcept { _pending_requests.leave(); });
|
||||
// JSON parsing can allocate up to roughly 2x the size of the raw
|
||||
// document, + a couple of bytes for maintenance.
|
||||
// If the Content-Length of the request is not available, we assume
|
||||
@@ -760,12 +771,6 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
_executor._stats.unsupported_operations++;
|
||||
co_return api_error::unknown_operation(fmt::format("Unsupported operation {}", op));
|
||||
}
|
||||
if (_pending_requests.get_count() >= _max_concurrent_requests) {
|
||||
_executor._stats.requests_shed++;
|
||||
co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
|
||||
}
|
||||
_pending_requests.enter();
|
||||
auto leave = defer([this] () noexcept { _pending_requests.leave(); });
|
||||
executor::client_state client_state(service::client_state::external_tag(),
|
||||
_auth_service, &_sl_controller, _timeout_config.current_values(), req->get_client_address());
|
||||
if (!username.empty()) {
|
||||
|
||||
@@ -583,8 +583,7 @@ sstable_format: ms
|
||||
audit: "table"
|
||||
#
|
||||
# List of statement categories that should be audited.
|
||||
# Possible categories are: QUERY, DML, DCL, DDL, AUTH, ADMIN
|
||||
audit_categories: "DCL,AUTH,ADMIN"
|
||||
audit_categories: "DCL,DDL,AUTH,ADMIN"
|
||||
#
|
||||
# List of tables that should be audited.
|
||||
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
|
||||
|
||||
@@ -1582,7 +1582,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"\tnone : No auditing enabled.\n"
|
||||
"\tsyslog : Audit messages sent to Syslog.\n"
|
||||
"\ttable : Audit messages written to column family named audit.audit_log.\n")
|
||||
, audit_categories(this, "audit_categories", liveness::LiveUpdate, value_status::Used, "DCL,AUTH,ADMIN", "Comma separated list of operation categories that should be audited.")
|
||||
, audit_categories(this, "audit_categories", liveness::LiveUpdate, value_status::Used, "DCL,DDL,AUTH,ADMIN", "Comma separated list of operation categories that should be audited.")
|
||||
, audit_tables(this, "audit_tables", liveness::LiveUpdate, value_status::Used, "", "Comma separated list of table names (<keyspace>.<table>) that will be audited.")
|
||||
, audit_keyspaces(this, "audit_keyspaces", liveness::LiveUpdate, value_status::Used, "", "Comma separated list of keyspaces that will be audited. All tables in those keyspaces will be audited")
|
||||
, audit_unix_socket_path(this, "audit_unix_socket_path", value_status::Used, "/dev/log", "The path to the unix socket used for writing to syslog. Only applicable when audit is set to syslog.")
|
||||
|
||||
28
dist/common/scripts/scylla_swap_setup
vendored
28
dist/common/scripts/scylla_swap_setup
vendored
@@ -9,6 +9,7 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shlex
|
||||
import argparse
|
||||
import psutil
|
||||
from pathlib import Path
|
||||
@@ -103,16 +104,41 @@ if __name__ == '__main__':
|
||||
run('dd if=/dev/zero of={} bs=1M count={}'.format(swapfile, swapsize_mb), shell=True, check=True)
|
||||
swapfile.chmod(0o600)
|
||||
run('mkswap -f {}'.format(swapfile), shell=True, check=True)
|
||||
|
||||
mount_point = find_mount_point(swap_directory)
|
||||
mount_unit = out(f'systemd-escape -p --suffix=mount {shlex.quote(str(mount_point))}')
|
||||
|
||||
# Add DefaultDependencies=no to the swap unit to avoid getting the default
|
||||
# Before=swap.target dependency. We apply this to all clouds, but the
|
||||
# requirement came from Azure:
|
||||
#
|
||||
# On Azure, the swap directory is on the Azure ephemeral disk (mounted on /mnt).
|
||||
# However, cloud-init makes this mount (i.e., the mnt.mount unit) depend on
|
||||
# the network (After=network-online.target). By extension, this means that
|
||||
# the swap unit depends on the network. If we didn't use DefaultDependencies=no,
|
||||
# then the swap unit would be part of the swap.target which other services
|
||||
# assume to be a local boot target, so we would end up with dependency cycles
|
||||
# such as:
|
||||
#
|
||||
# swap.target -> mnt-swapfile.swap -> mnt.mount -> network-online.target -> network.target -> systemd-resolved.service -> tmp.mount -> swap.target
|
||||
#
|
||||
# By removing the automatic Before=swap.target, the swap unit is no longer
|
||||
# part of swap.target, avoiding such cycles. The swap will still be
|
||||
# activated via WantedBy=multi-user.target.
|
||||
unit_data = '''
|
||||
[Unit]
|
||||
Description=swapfile
|
||||
DefaultDependencies=no
|
||||
After={}
|
||||
Conflicts=umount.target
|
||||
Before=umount.target
|
||||
|
||||
[Swap]
|
||||
What={}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
'''[1:-1].format(swapfile)
|
||||
'''[1:-1].format(mount_unit, swapfile)
|
||||
with swapunit.open('w') as f:
|
||||
f.write(unit_data)
|
||||
systemd_unit.reload()
|
||||
|
||||
@@ -42,7 +42,14 @@ void everywhere_replication_strategy::validate_options(const gms::feature_servic
|
||||
|
||||
sstring everywhere_replication_strategy::sanity_check_read_replicas(const effective_replication_map& erm, const host_id_vector_replica_set& read_replicas) const {
|
||||
const auto replication_factor = erm.get_replication_factor();
|
||||
if (read_replicas.size() > replication_factor) {
|
||||
if (const auto& topo_info = erm.get_token_metadata().get_topology_change_info(); topo_info && topo_info->read_new) {
|
||||
if (read_replicas.size() > replication_factor + 1) {
|
||||
return seastar::format(
|
||||
"everywhere_replication_strategy: the number of replicas for everywhere_replication_strategy is {}, "
|
||||
"cannot be higher than replication factor {} + 1 during the 'read from new replicas' stage of a topology change",
|
||||
read_replicas.size(), replication_factor);
|
||||
}
|
||||
} else if (read_replicas.size() > replication_factor) {
|
||||
return seastar::format("everywhere_replication_strategy: the number of replicas for everywhere_replication_strategy is {}, cannot be higher than replication factor {}", read_replicas.size(), replication_factor);
|
||||
}
|
||||
return {};
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e59fe56eac435fd03c2f0d7dfc11c6998d7c0750e1851535575497dd13d96015
|
||||
size 6505524
|
||||
oid sha256:54662978b9ce4a6e25790b1b0a5099e6063173ffa95a399a6287cf474376ed09
|
||||
size 6595952
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:34a0955d2c5a88e18ddab0f1df085e10a17e14129c3e21de91e4f27ef949b6c4
|
||||
size 6502668
|
||||
oid sha256:0cf44ea1fb2ae20de45d26fe8095054e60cb8700cddcb2fd79ef79705484b18a
|
||||
size 6603780
|
||||
|
||||
23
raft/fsm.cc
23
raft/fsm.cc
@@ -1098,7 +1098,8 @@ std::optional<std::pair<read_id, index_t>> fsm::start_read_barrier(server_id req
|
||||
|
||||
// Make sure that only a leader or a node that is part of the config can request read barrier
|
||||
// Nodes outside of the config may never get the data, so they will not be able to read it.
|
||||
if (requester != _my_id && leader_state().tracker.find(requester) == nullptr) {
|
||||
follower_progress* opt_progress = leader_state().tracker.find(requester);
|
||||
if (requester != _my_id && opt_progress == nullptr) {
|
||||
throw std::runtime_error(fmt::format("Read barrier requested by a node outside of the configuration {}", requester));
|
||||
}
|
||||
|
||||
@@ -1109,19 +1110,23 @@ std::optional<std::pair<read_id, index_t>> fsm::start_read_barrier(server_id req
|
||||
return {};
|
||||
}
|
||||
|
||||
// Optimization for read barriers requested on non-voters. A non-voter doesn't receive the read_quorum message, so
|
||||
// it might update its commit index only after another leader tick, which would slow down wait_for_apply() at the
|
||||
// end of the read barrier. Prevent that by replicating to the non-voting requester here.
|
||||
if (requester != _my_id && opt_progress->commit_idx < _commit_idx && opt_progress->match_idx == _log.last_idx()
|
||||
&& !opt_progress->can_vote) {
|
||||
logger.trace("start_read_barrier[{}]: replicate to {} because follower commit_idx={} < commit_idx={}, "
|
||||
"follower match_idx={} == last_idx={}, and follower can_vote={}",
|
||||
_my_id, requester, opt_progress->commit_idx, _commit_idx, opt_progress->match_idx,
|
||||
_log.last_idx(), opt_progress->can_vote);
|
||||
replicate_to(*opt_progress, true);
|
||||
}
|
||||
|
||||
read_id id = next_read_id();
|
||||
logger.trace("start_read_barrier[{}] starting read barrier with id {}", _my_id, id);
|
||||
return std::make_pair(id, _commit_idx);
|
||||
}
|
||||
|
||||
void fsm::maybe_update_commit_idx_for_read(index_t read_idx) {
|
||||
// read_idx from the leader might not be replicated to the local node yet.
|
||||
const bool in_local_log = read_idx <= _log.last_idx();
|
||||
if (in_local_log && log_term_for(read_idx) == get_current_term()) {
|
||||
advance_commit_idx(read_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void fsm::stop() {
|
||||
if (is_leader()) {
|
||||
// Become follower to stop accepting requests
|
||||
|
||||
@@ -480,15 +480,6 @@ public:
|
||||
|
||||
std::optional<std::pair<read_id, index_t>> start_read_barrier(server_id requester);
|
||||
|
||||
// Update the commit index to the read index (a read barrier result from the leader) if the local entry with the
|
||||
// read index belongs to the current term.
|
||||
//
|
||||
// Satisfying the condition above guarantees that the local log matches the current leader's log up to the read
|
||||
// index (the Log Matching Property), so the current leader won't drop the local entry with the read index.
|
||||
// Moreover, this entry has been committed by the leader, so future leaders also won't drop it (the Leader
|
||||
// Completeness Property). Hence, updating the commit index is safe.
|
||||
void maybe_update_commit_idx_for_read(index_t read_idx);
|
||||
|
||||
size_t in_memory_log_size() const {
|
||||
return _log.in_memory_size();
|
||||
}
|
||||
|
||||
@@ -1109,6 +1109,18 @@ future<> server_impl::process_fsm_output(index_t& last_stable, fsm_output&& batc
|
||||
// case.
|
||||
co_await _persistence->store_term_and_vote(batch.term_and_vote->first, batch.term_and_vote->second);
|
||||
_stats.store_term_and_vote++;
|
||||
|
||||
// When the term advances, any in-flight snapshot transfers
|
||||
// belong to an outdated term: the progress tracker has been
|
||||
// reset in become_leader() or we are now a follower.
|
||||
// Abort them before we dispatch this batch's messages, which
|
||||
// may start fresh transfers for the new term.
|
||||
//
|
||||
// A vote may also change independently of the term (e.g. a
|
||||
// follower voting for a candidate at the same term), but in
|
||||
// that case there are no in-flight transfers and the abort
|
||||
// is a no-op.
|
||||
abort_snapshot_transfers();
|
||||
}
|
||||
|
||||
if (batch.snp) {
|
||||
@@ -1218,8 +1230,6 @@ future<> server_impl::process_fsm_output(index_t& last_stable, fsm_output&& batc
|
||||
// quickly) stop happening (we're outside the config after all).
|
||||
co_await _apply_entries.push_eventually(removed_from_config{});
|
||||
}
|
||||
// request aborts of snapshot transfers
|
||||
abort_snapshot_transfers();
|
||||
// abort all read barriers
|
||||
for (auto& r : _reads) {
|
||||
r.promise.set_value(not_a_leader{_fsm->current_leader()});
|
||||
@@ -1561,7 +1571,6 @@ future<> server_impl::read_barrier(seastar::abort_source* as) {
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
read_idx = std::get<index_t>(res);
|
||||
_fsm->maybe_update_commit_idx_for_read(read_idx);
|
||||
co_return stop_iteration::yes;
|
||||
});
|
||||
|
||||
|
||||
@@ -87,6 +87,11 @@ target_include_directories(wasmtime_bindings
|
||||
target_link_libraries(wasmtime_bindings
|
||||
INTERFACE Rust::rust_combined)
|
||||
if (Scylla_USE_PRECOMPILED_HEADER_USE)
|
||||
# The PCH from scylla-precompiled-header is compiled with Seastar's compile
|
||||
# flags, including sanitizer flags in Debug/Sanitize modes. Any target reusing
|
||||
# this PCH must have matching compile options, otherwise the compiler rejects
|
||||
# the PCH due to flag mismatch (e.g., -fsanitize=address).
|
||||
target_link_libraries(wasmtime_bindings PRIVATE Seastar::seastar)
|
||||
target_precompile_headers(wasmtime_bindings REUSE_FROM scylla-precompiled-header)
|
||||
endif()
|
||||
|
||||
@@ -108,5 +113,6 @@ target_include_directories(inc
|
||||
target_link_libraries(inc
|
||||
INTERFACE Rust::rust_combined)
|
||||
if (Scylla_USE_PRECOMPILED_HEADER_USE)
|
||||
target_link_libraries(inc PRIVATE Seastar::seastar)
|
||||
target_precompile_headers(inc REUSE_FROM scylla-precompiled-header)
|
||||
endif()
|
||||
|
||||
@@ -538,6 +538,7 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
group0_id = g0_info.group0_id;
|
||||
raft::server_address my_addr{my_id, {}};
|
||||
|
||||
bool starting_server_as_follower = false;
|
||||
if (server == nullptr) {
|
||||
// This is the first time discovery is run. Create and start a Raft server for group 0 on this node.
|
||||
raft::configuration initial_configuration;
|
||||
@@ -565,6 +566,7 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
// trigger an empty snapshot transfer.
|
||||
nontrivial_snapshot = true;
|
||||
} else {
|
||||
starting_server_as_follower = true;
|
||||
co_await handshaker->pre_server_start(g0_info);
|
||||
}
|
||||
|
||||
@@ -591,7 +593,9 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
}
|
||||
|
||||
SCYLLA_ASSERT(server);
|
||||
if (server->get_configuration().contains(my_id)) {
|
||||
co_await utils::get_local_injector().inject("join_group0_pause_before_config_check",
|
||||
utils::wait_for_message(std::chrono::minutes{5}));
|
||||
if (!starting_server_as_follower && server->get_configuration().contains(my_id)) {
|
||||
// True if we started a new group or completed a configuration change initiated earlier.
|
||||
group0_log.info("server {} already in group 0 (id {}) as {}", my_id, group0_id,
|
||||
server->get_configuration().can_vote(my_id)? "voter" : "non-voter");
|
||||
|
||||
@@ -239,11 +239,9 @@ public:
|
||||
// The i-th element corresponds to the i-th entry in _entries.
|
||||
// Can be smaller than _entries. If _entries[i] doesn't have a matching element in _promoted_indexes then
|
||||
// that entry doesn't have a promoted index.
|
||||
// It's not chunked, because promoted index is present only when there are large partitions in the page,
|
||||
// which also means the page will have typically only 1 entry due to summary:data_file size ratio.
|
||||
// Kept separately to avoid paying for storage cost in pages where no entry has a promoted index,
|
||||
// which is typical in workloads with small partitions.
|
||||
managed_vector<promoted_index> _promoted_indexes;
|
||||
lsa::chunked_managed_vector<promoted_index> _promoted_indexes;
|
||||
public:
|
||||
partition_index_page() = default;
|
||||
partition_index_page(partition_index_page&&) noexcept = default;
|
||||
|
||||
75
test.py
75
test.py
@@ -11,6 +11,7 @@ from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import math
|
||||
import shlex
|
||||
import textwrap
|
||||
from random import randint
|
||||
@@ -73,6 +74,51 @@ PYTEST_RUNNER_DIRECTORIES = [
|
||||
|
||||
launch_time = time.monotonic()
|
||||
|
||||
class ThreadsCalculator:
|
||||
"""
|
||||
The ThreadsCalculator class calculates the number of jobs that can be run concurrently based on system
|
||||
memory and CPU constraints. It allows resource reservation and configurable parameters for
|
||||
flexible job scheduling in various modes, such as `debug`.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
modes: list[str],
|
||||
min_system_memory_reserve: float = 5e9,
|
||||
max_system_memory_reserve: float = 8e9,
|
||||
system_memory_reserve_fraction = 16,
|
||||
max_test_memory: float = 5e9,
|
||||
test_memory_fraction: float = 8.0,
|
||||
debug_test_memory_multiplier: float = 1.5,
|
||||
debug_cpus_per_test_job=1.5,
|
||||
non_debug_cpus_per_test_job: float =1.0,
|
||||
non_debug_max_test_memory: float = 4e9
|
||||
):
|
||||
sys_mem = int(os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES"))
|
||||
test_mem = min(sys_mem / test_memory_fraction, max_test_memory)
|
||||
if "debug" in modes:
|
||||
test_mem *= debug_test_memory_multiplier
|
||||
system_memory_reserve = int(min(
|
||||
max(sys_mem / system_memory_reserve_fraction, min_system_memory_reserve),
|
||||
max_system_memory_reserve,
|
||||
))
|
||||
available_mem = max(0, sys_mem - system_memory_reserve)
|
||||
is_debug = "debug" in modes
|
||||
test_mem = min(
|
||||
sys_mem / test_memory_fraction,
|
||||
max_test_memory if is_debug else non_debug_max_test_memory,
|
||||
)
|
||||
if is_debug:
|
||||
test_mem *= debug_test_memory_multiplier
|
||||
self.cpus_per_test_job = (
|
||||
debug_cpus_per_test_job if is_debug else non_debug_cpus_per_test_job
|
||||
)
|
||||
self.default_num_jobs_mem = max(1, int(available_mem // test_mem))
|
||||
|
||||
def get_number_of_threads(self, nr_cpus: int) -> int:
|
||||
default_num_jobs_cpu = max(1, math.ceil(nr_cpus / self.cpus_per_test_job))
|
||||
return min(self.default_num_jobs_mem, default_num_jobs_cpu)
|
||||
|
||||
|
||||
|
||||
class TabularConsoleOutput:
|
||||
"""Print test progress to the console"""
|
||||
@@ -181,7 +227,7 @@ def parse_cmd_line() -> argparse.Namespace:
|
||||
help="Run only tests for given build mode(s)")
|
||||
parser.add_argument('--repeat', action="store", default="1", type=int,
|
||||
help="number of times to repeat test execution")
|
||||
parser.add_argument('--timeout', action="store", default="24000", type=int,
|
||||
parser.add_argument('--timeout', action="store", default="3600", type=int,
|
||||
help="timeout value for single test execution")
|
||||
parser.add_argument('--session-timeout', action="store", default="24000", type=int,
|
||||
help="timeout value for test.py/pytest session execution")
|
||||
@@ -273,6 +319,13 @@ def parse_cmd_line() -> argparse.Namespace:
|
||||
if args.skip_patterns and args.k:
|
||||
parser.error(palette.fail('arguments --skip and -k are mutually exclusive, please use only one of them'))
|
||||
|
||||
if not args.modes:
|
||||
try:
|
||||
args.modes = get_configured_modes()
|
||||
except Exception:
|
||||
print(palette.fail("Failed to read output of `ninja mode_list`: please run ./configure.py first"))
|
||||
raise
|
||||
|
||||
if not args.jobs:
|
||||
if not args.cpus:
|
||||
nr_cpus = multiprocessing.cpu_count()
|
||||
@@ -280,19 +333,7 @@ def parse_cmd_line() -> argparse.Namespace:
|
||||
nr_cpus = int(subprocess.check_output(
|
||||
['taskset', '-c', args.cpus, 'python3', '-c',
|
||||
'import os; print(len(os.sched_getaffinity(0)))']))
|
||||
|
||||
cpus_per_test_job = 1
|
||||
sysmem = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
|
||||
testmem = 6e9 if os.sysconf('SC_PAGE_SIZE') > 4096 else 2e9
|
||||
default_num_jobs_mem = ((sysmem - 4e9) // testmem)
|
||||
args.jobs = min(default_num_jobs_mem, nr_cpus // cpus_per_test_job)
|
||||
|
||||
if not args.modes:
|
||||
try:
|
||||
args.modes = get_configured_modes()
|
||||
except Exception:
|
||||
print(palette.fail("Failed to read output of `ninja mode_list`: please run ./configure.py first"))
|
||||
raise
|
||||
args.jobs = ThreadsCalculator(args.modes).get_number_of_threads(nr_cpus)
|
||||
|
||||
if not args.coverage_modes and args.coverage:
|
||||
args.coverage_modes = list(args.modes)
|
||||
@@ -350,16 +391,12 @@ def run_pytest(options: argparse.Namespace) -> tuple[int, list[SimpleNamespace]]
|
||||
if options.list_tests:
|
||||
args.extend(['--collect-only', '--quiet', '--no-header'])
|
||||
else:
|
||||
threads = int(options.jobs)
|
||||
# debug mode is very CPU and memory hungry, so we need to lower the number of threads to be able to finish tests
|
||||
if 'debug' in options.modes:
|
||||
threads = int(threads * 0.5)
|
||||
args.extend([
|
||||
"--log-level=DEBUG", # Capture logs
|
||||
f'--junit-xml={junit_output_file}',
|
||||
"-rf",
|
||||
'--test-py-init',
|
||||
f'-n{threads}',
|
||||
f'-n{options.jobs}',
|
||||
f'--tmpdir={temp_dir}',
|
||||
f'--maxfail={options.max_failures}',
|
||||
f'--alluredir={report_dir / f"allure_{HOST_ID}"}',
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/core/manual_clock.hh>
|
||||
#include <seastar/util/later.hh>
|
||||
#include <seastar/core/timer.hh>
|
||||
#include <seastar/util/defer.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include <seastar/util/alloc_failure_injector.hh>
|
||||
@@ -290,12 +290,17 @@ SEASTAR_THREAD_TEST_CASE(test_address_map_replication) {
|
||||
m.set_expiring(id1);
|
||||
BOOST_CHECK(m.find(id1) && *m.find(id1) == addr1);
|
||||
m.barrier().get();
|
||||
promise<> shard0_timer_expired;
|
||||
timer<manual_clock> shard0_timer([&shard0_timer_expired] {
|
||||
shard0_timer_expired.set_value();
|
||||
});
|
||||
shard0_timer.arm(manual_clock::now() + expiration_time);
|
||||
m_svc.invoke_on(1, [] (address_map_t<manual_clock>& m) {
|
||||
BOOST_CHECK(m.find(id1) && *m.find(id1) == addr1);
|
||||
manual_clock::advance(expiration_time);
|
||||
BOOST_CHECK(!m.find(id1));
|
||||
return smp::submit_to(0, []{}); // Ensure shard 0 notices timer is expired.
|
||||
}).get();
|
||||
shard0_timer_expired.get_future().get();
|
||||
BOOST_CHECK(!m.find(id1));
|
||||
|
||||
// Expiring entries are replicated
|
||||
|
||||
@@ -1045,7 +1045,6 @@ validate_result_size(size_t i, schema_ptr schema, const utils::chunked_vector<mu
|
||||
|
||||
struct fuzzy_test_config {
|
||||
uint32_t seed;
|
||||
std::chrono::seconds timeout;
|
||||
unsigned concurrency;
|
||||
unsigned scans;
|
||||
};
|
||||
@@ -1077,6 +1076,9 @@ run_fuzzy_test_scan(size_t i, fuzzy_test_config cfg, sharded<replica::database>&
|
||||
testlog.debug("[scan#{}]: seed={}, is_stateful={}, prange={}, ckranges={}", i, seed, is_stateful, partition_range,
|
||||
partition_slice.default_row_ranges());
|
||||
|
||||
// Use a small max_size to force many pages per scan, stressing the
|
||||
// paging and result-merging logic. With the large row limit here,
|
||||
// the byte limit is typically the tighter bound.
|
||||
const auto [results, npages] = read_partitions_with_paged_scan(db, schema, 1000, 1024, is_stateful, partition_range, partition_slice);
|
||||
|
||||
const auto expected_partitions = slice_partitions(*schema, mutations, partition_index_range, partition_slice);
|
||||
@@ -1160,21 +1162,27 @@ SEASTAR_THREAD_TEST_CASE(fuzzy_test) {
|
||||
std::uniform_int_distribution<size_t>(0, 100), // clustering-rows
|
||||
std::uniform_int_distribution<size_t>(0, 100), // range-tombstones
|
||||
#else
|
||||
// Keep these values moderate: with complex randomly-generated
|
||||
// schemas (deeply nested frozen collections/UDTs), large row
|
||||
// counts cause data generation and paged scanning to be very
|
||||
// slow, leading to CI timeouts. The test's value comes from
|
||||
// schema variety and paging correctness, not from sheer data
|
||||
// volume.
|
||||
std::uniform_int_distribution<size_t>(32, 64), // partitions
|
||||
std::uniform_int_distribution<size_t>(0, 1000), // clustering-rows
|
||||
std::uniform_int_distribution<size_t>(0, 1000), // range-tombstones
|
||||
std::uniform_int_distribution<size_t>(0, 200), // clustering-rows
|
||||
std::uniform_int_distribution<size_t>(0, 200), // range-tombstones
|
||||
#endif
|
||||
tests::default_timestamp_generator());
|
||||
|
||||
#if defined DEBUG
|
||||
auto cfg = fuzzy_test_config{seed, std::chrono::seconds{8}, 1, 1};
|
||||
auto cfg = fuzzy_test_config{seed, 1, 1};
|
||||
#elif defined DEVEL
|
||||
auto cfg = fuzzy_test_config{seed, std::chrono::seconds{2}, 2, 4};
|
||||
auto cfg = fuzzy_test_config{seed, 2, 4};
|
||||
#else
|
||||
auto cfg = fuzzy_test_config{seed, std::chrono::seconds{2}, 4, 8};
|
||||
auto cfg = fuzzy_test_config{seed, 4, 8};
|
||||
#endif
|
||||
|
||||
testlog.info("Running test workload with configuration: seed={}, timeout={}s, concurrency={}, scans={}", cfg.seed, cfg.timeout.count(),
|
||||
testlog.info("Running test workload with configuration: seed={}, concurrency={}, scans={}", cfg.seed,
|
||||
cfg.concurrency, cfg.scans);
|
||||
|
||||
smp::invoke_on_all([cfg, db = &env.db(), gs = global_schema_ptr(tbl.schema), &compacted_frozen_mutations = tbl.compacted_frozen_mutations] {
|
||||
|
||||
@@ -906,9 +906,13 @@ SEASTAR_THREAD_TEST_CASE(test_timeout_is_applied_on_lookup) {
|
||||
BOOST_REQUIRE(entry.permit.timeout() == new_timeout);
|
||||
BOOST_REQUIRE(!entry.permit.get_abort_exception());
|
||||
|
||||
sleep(ttl_timeout_test_timeout * 2).get();
|
||||
// Don't waste time retrying before the timeout is up
|
||||
sleep(ttl_timeout_test_timeout).get();
|
||||
|
||||
eventually_true([&entry] {
|
||||
return bool(entry.permit.get_abort_exception());
|
||||
});
|
||||
|
||||
BOOST_REQUIRE(entry.permit.get_abort_exception());
|
||||
BOOST_REQUIRE_THROW(std::rethrow_exception(entry.permit.get_abort_exception()), seastar::named_semaphore_timed_out);
|
||||
}
|
||||
|
||||
|
||||
@@ -2644,7 +2644,10 @@ SEASTAR_TEST_CASE(test_exception_safety_of_update_from_memtable) {
|
||||
return rd;
|
||||
};
|
||||
|
||||
populate_range(cache, population_range);
|
||||
{
|
||||
memory::scoped_critical_alloc_section dfg;
|
||||
populate_range(cache, population_range);
|
||||
}
|
||||
auto rd1_v1 = assert_that(make_reader(population_range));
|
||||
mutation_reader_opt snap;
|
||||
auto close_snap = defer([&snap] {
|
||||
|
||||
@@ -257,39 +257,44 @@ async def manager(request: pytest.FixtureRequest,
|
||||
yield manager_client
|
||||
# `request.node.stash` contains a report stored in `pytest_runtest_makereport` from where we can retrieve
|
||||
# test failure.
|
||||
report = request.node.stash[PHASE_REPORT_KEY]
|
||||
failed = report.when == "call" and report.failed
|
||||
|
||||
# Check if the test has the check_nodes_for_errors marker
|
||||
found_errors = await manager_client.check_all_errors(check_all_errors=(request.node.get_closest_marker("check_nodes_for_errors") is not None))
|
||||
|
||||
cluster_status = None
|
||||
found_errors = {}
|
||||
failed = False
|
||||
failed_test_dir_path = None
|
||||
if failed or found_errors:
|
||||
# Save scylladb logs for failed tests in a separate directory and copy XML report to the same directory to have
|
||||
# all related logs in one dir.
|
||||
# Then add property to the XML report with the path to the directory, so it can be visible in Jenkins
|
||||
failed_test_dir_path = testpy_test.suite.log_dir / "failed_test" / test_case_name.translate(
|
||||
str.maketrans('[]', '()'))
|
||||
failed_test_dir_path.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
report = request.node.stash[PHASE_REPORT_KEY]
|
||||
failed = report.when == "call" and report.failed
|
||||
|
||||
if failed:
|
||||
await manager_client.gather_related_logs(
|
||||
failed_test_dir_path,
|
||||
{'pytest.log': test_log, 'test_py.log': test_py_log_test}
|
||||
)
|
||||
with open(failed_test_dir_path / "stacktrace.txt", "w") as f:
|
||||
f.write(report.longreprtext)
|
||||
if request.config.getoption('artifacts_dir_url') is not None:
|
||||
# get the relative path to the tmpdir for the failed directory
|
||||
dir_path_relative = f"{failed_test_dir_path.as_posix()[failed_test_dir_path.as_posix().find('testlog'):]}"
|
||||
full_url = urllib.parse.urljoin(request.config.getoption('artifacts_dir_url') + '/',
|
||||
urllib.parse.quote(dir_path_relative))
|
||||
record_property("TEST_LOGS", full_url)
|
||||
# Check if the test has the check_nodes_for_errors marker
|
||||
found_errors = await manager_client.check_all_errors(check_all_errors=(request.node.get_closest_marker("check_nodes_for_errors") is not None))
|
||||
|
||||
cluster_status = await manager_client.after_test(test_case_name, not failed)
|
||||
await manager_client.stop() # Stop client session and close driver after each test
|
||||
if failed or found_errors:
|
||||
# Save scylladb logs for failed tests in a separate directory and copy XML report to the same directory to have
|
||||
# all related logs in one dir.
|
||||
# Then add property to the XML report with the path to the directory, so it can be visible in Jenkins
|
||||
failed_test_dir_path = testpy_test.suite.log_dir / "failed_test" / test_case_name.translate(
|
||||
str.maketrans('[]', '()'))
|
||||
failed_test_dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if cluster_status["server_broken"] and not failed:
|
||||
if failed:
|
||||
await manager_client.gather_related_logs(
|
||||
failed_test_dir_path,
|
||||
{'pytest.log': test_log, 'test_py.log': test_py_log_test}
|
||||
)
|
||||
with open(failed_test_dir_path / "stacktrace.txt", "w") as f:
|
||||
f.write(report.longreprtext)
|
||||
if request.config.getoption('artifacts_dir_url') is not None:
|
||||
# get the relative path to the tmpdir for the failed directory
|
||||
dir_path_relative = f"{failed_test_dir_path.as_posix()[failed_test_dir_path.as_posix().find('testlog'):]}"
|
||||
full_url = urllib.parse.urljoin(request.config.getoption('artifacts_dir_url') + '/',
|
||||
urllib.parse.quote(dir_path_relative))
|
||||
record_property("TEST_LOGS", full_url)
|
||||
|
||||
cluster_status = await manager_client.after_test(test_case_name, not failed)
|
||||
finally:
|
||||
await manager_client.stop() # Stop client session and close driver after each test
|
||||
|
||||
if cluster_status is not None and cluster_status["server_broken"] and not failed:
|
||||
failed = True
|
||||
pytest.fail(
|
||||
f"test case {test_case_name} left unfinished tasks on Scylla server. Server marked as broken,"
|
||||
|
||||
@@ -10,6 +10,7 @@ import random
|
||||
import string
|
||||
import tempfile
|
||||
import time
|
||||
import threading
|
||||
from concurrent.futures.thread import ThreadPoolExecutor
|
||||
from pprint import pformat
|
||||
|
||||
@@ -273,6 +274,30 @@ class TesterAlternator(BaseAlternator):
|
||||
logger.info("Testing and validating an update query using key condition expression")
|
||||
logger.info(f"ConditionExpression update of short circuit is: {conditional_update_short_circuit}")
|
||||
dc2_table.update_item(**conditional_update_short_circuit)
|
||||
# Wait for cross-DC replication to reach both live DC1 nodes
|
||||
# before stopping dc2_node. The LWT commit uses LOCAL_QUORUM,
|
||||
# which only guarantees DC2 persistence; replication to DC1 is
|
||||
# async background work. Without this wait, stopping dc2_node
|
||||
# can drop in-flight RPCs to DC1 while CAS mutations don't
|
||||
# store hints. We must confirm both live DC1 replicas have the
|
||||
# data so that the later ConsistentRead=True (LOCAL_QUORUM)
|
||||
# read on restarted node1 is guaranteed to succeed.
|
||||
# See https://scylladb.atlassian.net/browse/SCYLLADB-1267
|
||||
dc1_live_nodes = [
|
||||
node for node in self.cluster.nodelist()
|
||||
if node.data_center == node1.data_center and node.server_id != node1.server_id
|
||||
]
|
||||
dc1_live_tables = [self.get_table(table_name=TABLE_NAME, node=node) for node in dc1_live_nodes]
|
||||
wait_for(
|
||||
lambda: all(
|
||||
t.get_item(
|
||||
Key={self._table_primary_key: new_pk_val}, ConsistentRead=False
|
||||
).get("Item", {}).get("c") == 3
|
||||
for t in dc1_live_tables
|
||||
),
|
||||
timeout=60,
|
||||
text="Waiting for cross-DC replication of conditional update to both live DC1 nodes",
|
||||
)
|
||||
dc2_node.stop()
|
||||
node1.start()
|
||||
|
||||
@@ -481,28 +506,33 @@ class TesterAlternator(BaseAlternator):
|
||||
2) Issue Alternator 'heavy' requests concurrently (create-table)
|
||||
3) wait for RequestLimitExceeded error response.
|
||||
"""
|
||||
concurrent_requests_limit = 5
|
||||
# Keep the limit low to avoid exhausting LSA memory on the 1GB test node
|
||||
# when multiple CreateTable requests (Raft + schema + flush) run concurrently.
|
||||
concurrent_requests_limit = 3
|
||||
extra_config = {"max_concurrent_requests_per_shard": concurrent_requests_limit, "num_tokens": 1}
|
||||
self.prepare_dynamodb_cluster(num_of_nodes=1, extra_config=extra_config)
|
||||
node1 = self.cluster.nodelist()[0]
|
||||
create_tables_threads = []
|
||||
for tables_num in range(concurrent_requests_limit * 5):
|
||||
create_tables_threads.append(self.run_create_table_thread())
|
||||
stop_workers = threading.Event()
|
||||
|
||||
@retrying(num_attempts=150, sleep_time=0.2, allowed_exceptions=ConcurrencyLimitNotExceededError, message="Running create-table request")
|
||||
def wait_for_create_table_request_failure():
|
||||
try:
|
||||
self.create_table(table_name=random_string(length=10), node=node1, wait_until_table_exists=False)
|
||||
except Exception as error:
|
||||
if "RequestLimitExceeded" in error.args[0]:
|
||||
return
|
||||
raise
|
||||
raise ConcurrencyLimitNotExceededError
|
||||
def run_create_table_until_limited() -> None:
|
||||
while not stop_workers.is_set():
|
||||
try:
|
||||
self.create_table(table_name=random_string(length=10), node=node1, wait_until_table_exists=False)
|
||||
except Exception as error: # noqa: BLE001
|
||||
if "RequestLimitExceeded" in str(error):
|
||||
stop_workers.set()
|
||||
return
|
||||
raise
|
||||
|
||||
wait_for_create_table_request_failure()
|
||||
with ThreadPoolExecutor(max_workers=concurrent_requests_limit * 5) as executor:
|
||||
create_table_futures = [executor.submit(run_create_table_until_limited) for _ in range(concurrent_requests_limit * 5)]
|
||||
|
||||
for thread in create_tables_threads:
|
||||
thread.join()
|
||||
if not stop_workers.wait(timeout=30):
|
||||
raise ConcurrencyLimitNotExceededError
|
||||
|
||||
stop_workers.set()
|
||||
for future in create_table_futures:
|
||||
future.result(timeout=60)
|
||||
|
||||
@staticmethod
|
||||
def _set_slow_query_logging_api(run_on_node: ScyllaNode, is_enable: bool = True, threshold: int | None = None):
|
||||
|
||||
@@ -1182,9 +1182,9 @@ class TestAuth(Tester):
|
||||
def get_session(self, node_idx=0, user=None, password=None, exclusive=True):
|
||||
node = self.cluster.nodelist()[node_idx]
|
||||
if exclusive:
|
||||
conn = self.patient_exclusive_cql_connection(node, user=user, password=password, timeout=0.1)
|
||||
conn = self.patient_exclusive_cql_connection(node, user=user, password=password)
|
||||
else:
|
||||
conn = self.patient_cql_connection(node, user=user, password=password, timeout=0.1)
|
||||
conn = self.patient_cql_connection(node, user=user, password=password)
|
||||
return conn
|
||||
|
||||
def assert_permissions_listed(self, expected, session, query, include_superuser=False):
|
||||
|
||||
@@ -199,7 +199,7 @@ class GSServer(GSFront):
|
||||
|
||||
def unpublish(self):
|
||||
for k in self.vars:
|
||||
v = self.oldvars[k]
|
||||
v = self.oldvars.get(k)
|
||||
if v:
|
||||
os.environ[k] = v
|
||||
elif os.environ.get(k):
|
||||
|
||||
@@ -215,6 +215,11 @@ async def test_node_ops_tasks_tree(manager: ManagerClient):
|
||||
servers, vt_ids = await check_remove_node_tasks_tree(manager, tm, module_name, servers, vt_ids)
|
||||
servers, vt_ids = await check_decommission_tasks_tree(manager, tm, module_name, servers, vt_ids)
|
||||
|
||||
# Reconnect the driver after topology changes (replace, removenode,
|
||||
# decommission) so that the new_test_keyspace cleanup can reach a
|
||||
# live node for DROP KEYSPACE.
|
||||
await manager.driver_connect()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_node_ops_tasks_ttl(manager: ManagerClient):
|
||||
"""Test node ops virtual tasks' ttl."""
|
||||
|
||||
@@ -17,6 +17,7 @@ import socket
|
||||
import socketserver
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from collections import namedtuple
|
||||
from contextlib import contextmanager
|
||||
@@ -33,9 +34,9 @@ from test.cluster.dtest.dtest_class import create_ks, wait_for
|
||||
from test.cluster.dtest.tools.assertions import assert_invalid
|
||||
from test.cluster.dtest.tools.data import rows_to_list, run_in_parallel
|
||||
|
||||
from test.cluster.test_config import wait_for_config
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import wait_for as wait_for_async
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -113,11 +114,10 @@ class AuditTester:
|
||||
for k in AUTH_CONFIG:
|
||||
await self.manager.server_remove_config_option(srv.server_id, k)
|
||||
|
||||
# Remove absent keys so the server reverts to compiled-in defaults.
|
||||
for k in absent_keys:
|
||||
await self.manager.server_remove_config_option(srv.server_id, k)
|
||||
|
||||
if needs_restart:
|
||||
# Remove absent keys so the server reverts to compiled-in defaults.
|
||||
for k in absent_keys:
|
||||
await self.manager.server_remove_config_option(srv.server_id, k)
|
||||
await self.manager.server_stop_gracefully(srv.server_id)
|
||||
full_cfg = self._build_server_config(needed, enable_compact_storage, user)
|
||||
await self.manager.server_update_config(srv.server_id, config_options=full_cfg)
|
||||
@@ -127,10 +127,17 @@ class AuditTester:
|
||||
# Server stays up — only push live-updatable keys.
|
||||
live_cfg = {k: v for k, v in needed.items() if k in LIVE_AUDIT_KEYS}
|
||||
live_cfg["enable_create_table_with_compact_storage"] = enable_compact_storage
|
||||
log_file = await self.manager.server_open_log(srv.server_id)
|
||||
# Each remove/update sends a SIGHUP. Wait for each one's
|
||||
# "completed re-reading configuration file" before the next
|
||||
# so we never match a stale message.
|
||||
for k in absent_keys:
|
||||
from_mark = await log_file.mark()
|
||||
await self.manager.server_remove_config_option(srv.server_id, k)
|
||||
await log_file.wait_for(r"completed re-reading configuration file", from_mark=from_mark, timeout=60)
|
||||
from_mark = await log_file.mark()
|
||||
await self.manager.server_update_config(srv.server_id, config_options=live_cfg)
|
||||
for key in LIVE_AUDIT_KEYS:
|
||||
if key in live_cfg:
|
||||
await wait_for_config(self.manager, srv, key, live_cfg[key])
|
||||
await log_file.wait_for(r"completed re-reading configuration file", from_mark=from_mark, timeout=60)
|
||||
|
||||
async def _start_fresh_servers(self, needed: dict[str, str],
|
||||
enable_compact_storage: bool,
|
||||
@@ -345,7 +352,7 @@ class UnixSockerListener:
|
||||
elif data != "Initializing syslog audit backend.":
|
||||
self.server.parent_instance.lines.append(data)
|
||||
|
||||
class UnixDatagramServer(socketserver.ThreadingUnixDatagramServer):
|
||||
class UnixDatagramServer(socketserver.UnixDatagramServer):
|
||||
def __init__(self, socket_path, handler, parent_instance, lock):
|
||||
self.parent_instance = parent_instance
|
||||
self.mutex = lock
|
||||
@@ -511,8 +518,7 @@ class AuditBackendComposite(AuditBackend):
|
||||
return rows_dict
|
||||
|
||||
|
||||
@pytest.mark.single_node
|
||||
class TestCQLAudit(AuditTester):
|
||||
class CQLAuditTester(AuditTester):
|
||||
"""
|
||||
Make sure CQL statements are audited
|
||||
"""
|
||||
@@ -1343,7 +1349,13 @@ class TestCQLAudit(AuditTester):
|
||||
conn = await self.manager.get_cql_exclusive(srv)
|
||||
stmt = SimpleStatement("INSERT INTO ks.test1 (k, v1) VALUES (1000, 1000)", consistency_level=ConsistencyLevel.THREE)
|
||||
conn.execute(stmt)
|
||||
audit_node_ips = await self.get_audit_partitions_for_operation(session, stmt.query_string)
|
||||
# The audit log entry may not be visible immediately after the
|
||||
# insert, so retry with exponential backoff until it appears.
|
||||
audit_node_ips = await wait_for_async(
|
||||
lambda: self.get_audit_partitions_for_operation(session, stmt.query_string),
|
||||
deadline=time.time() + 10,
|
||||
period=0.05,
|
||||
label=f"audit entry for node {index}")
|
||||
node_to_audit_nodes[index] = set(audit_node_ips)
|
||||
|
||||
all_addresses = set(srv.ip_addr for srv in servers)
|
||||
@@ -1763,7 +1775,7 @@ class TestCQLAudit(AuditTester):
|
||||
|
||||
async def test_audit_table_noauth(manager: ManagerClient):
|
||||
"""Table backend, no auth, single node — groups all tests that share this config."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
await t.test_using_non_existent_keyspace(AuditBackendTable)
|
||||
await t.test_audit_keyspace(AuditBackendTable)
|
||||
await t.test_audit_keyspace_extra_parameter(AuditBackendTable)
|
||||
@@ -1787,7 +1799,7 @@ async def test_audit_table_noauth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_table_auth(manager: ManagerClient):
|
||||
"""Table backend, auth enabled, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
await t.test_user_password_masking(AuditBackendTable)
|
||||
await t.test_negative_audit_records_auth()
|
||||
await t.test_negative_audit_records_admin()
|
||||
@@ -1803,7 +1815,7 @@ async def test_audit_table_auth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_table_auth_multinode(manager: ManagerClient):
|
||||
"""Table backend, auth enabled, multi-node (rf=3)."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
await t.test_negative_audit_records_ddl()
|
||||
|
||||
|
||||
@@ -1811,49 +1823,49 @@ async def test_audit_table_auth_multinode(manager: ManagerClient):
|
||||
|
||||
async def test_audit_type_none_standalone(manager: ManagerClient):
|
||||
"""audit=None — verify no auditing occurs."""
|
||||
await TestCQLAudit(manager).test_audit_type_none()
|
||||
await CQLAuditTester(manager).test_audit_type_none()
|
||||
|
||||
|
||||
async def test_audit_type_invalid_standalone(manager: ManagerClient):
|
||||
"""audit=invalid — server should fail to start."""
|
||||
await TestCQLAudit(manager).test_audit_type_invalid()
|
||||
await CQLAuditTester(manager).test_audit_type_invalid()
|
||||
|
||||
|
||||
async def test_composite_audit_type_invalid_standalone(manager: ManagerClient):
|
||||
"""audit=table,syslog,invalid — server should fail to start."""
|
||||
await TestCQLAudit(manager).test_composite_audit_type_invalid()
|
||||
await CQLAuditTester(manager).test_composite_audit_type_invalid()
|
||||
|
||||
|
||||
async def test_audit_empty_settings_standalone(manager: ManagerClient):
|
||||
"""audit=none — verify no auditing occurs."""
|
||||
await TestCQLAudit(manager).test_audit_empty_settings()
|
||||
await CQLAuditTester(manager).test_audit_empty_settings()
|
||||
|
||||
|
||||
async def test_composite_audit_empty_settings_standalone(manager: ManagerClient):
|
||||
"""audit=table,syslog,none — verify no auditing occurs."""
|
||||
await TestCQLAudit(manager).test_composite_audit_empty_settings()
|
||||
await CQLAuditTester(manager).test_composite_audit_empty_settings()
|
||||
|
||||
|
||||
async def test_audit_categories_invalid_standalone(manager: ManagerClient):
|
||||
"""Invalid audit_categories — server should fail to start."""
|
||||
await TestCQLAudit(manager).test_audit_categories_invalid()
|
||||
await CQLAuditTester(manager).test_audit_categories_invalid()
|
||||
|
||||
|
||||
async def test_insert_failure_standalone(manager: ManagerClient):
|
||||
"""7-node topology, audit=table, no auth — standalone due to unique topology."""
|
||||
await TestCQLAudit(manager).test_insert_failure_doesnt_report_success()
|
||||
await CQLAuditTester(manager).test_insert_failure_doesnt_report_success()
|
||||
|
||||
|
||||
async def test_service_level_statements_standalone(manager: ManagerClient):
|
||||
"""audit=table, auth, cmdline=--smp 1 — standalone due to special cmdline."""
|
||||
await TestCQLAudit(manager).test_service_level_statements()
|
||||
await CQLAuditTester(manager).test_service_level_statements()
|
||||
|
||||
|
||||
# AuditBackendSyslog, no auth, rf=1
|
||||
|
||||
async def test_audit_syslog_noauth(manager: ManagerClient):
|
||||
"""Syslog backend, no auth, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Syslog = functools.partial(AuditBackendSyslog, socket_path=syslog_socket_path)
|
||||
await t.test_using_non_existent_keyspace(Syslog)
|
||||
await t.test_audit_keyspace(Syslog)
|
||||
@@ -1870,7 +1882,7 @@ async def test_audit_syslog_noauth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_syslog_auth(manager: ManagerClient):
|
||||
"""Syslog backend, auth enabled, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Syslog = functools.partial(AuditBackendSyslog, socket_path=syslog_socket_path)
|
||||
await t.test_user_password_masking(Syslog)
|
||||
await t.test_role_password_masking(Syslog)
|
||||
@@ -1881,7 +1893,7 @@ async def test_audit_syslog_auth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_composite_noauth(manager: ManagerClient):
|
||||
"""Composite backend (table+syslog), no auth, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Composite = functools.partial(AuditBackendComposite, socket_path=syslog_socket_path)
|
||||
await t.test_using_non_existent_keyspace(Composite)
|
||||
await t.test_audit_keyspace(Composite)
|
||||
@@ -1898,7 +1910,7 @@ async def test_audit_composite_noauth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_composite_auth(manager: ManagerClient):
|
||||
"""Composite backend (table+syslog), auth enabled, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Composite = functools.partial(AuditBackendComposite, socket_path=syslog_socket_path)
|
||||
await t.test_user_password_masking(Composite)
|
||||
await t.test_role_password_masking(Composite)
|
||||
@@ -1910,29 +1922,29 @@ _composite = functools.partial(AuditBackendComposite, socket_path=syslog_socket_
|
||||
|
||||
|
||||
@pytest.mark.parametrize("helper_class,config_changer", [
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditCqlConfigChanger, id="composite-cql"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditCqlConfigChanger, id="composite-cql"),
|
||||
])
|
||||
async def test_config_no_liveupdate(manager: ManagerClient, helper_class, config_changer):
|
||||
"""Non-live audit config params (audit, audit_unix_socket_path, audit_syslog_write_buffer_size) must be unmodifiable."""
|
||||
await TestCQLAudit(manager).test_config_no_liveupdate(helper_class, config_changer)
|
||||
await CQLAuditTester(manager).test_config_no_liveupdate(helper_class, config_changer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("helper_class,config_changer", [
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditCqlConfigChanger, id="composite-cql"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditCqlConfigChanger, id="composite-cql"),
|
||||
])
|
||||
async def test_config_liveupdate(manager: ManagerClient, helper_class, config_changer):
|
||||
"""Live-updatable audit config params (categories, keyspaces, tables) must be modifiable at runtime."""
|
||||
await TestCQLAudit(manager).test_config_liveupdate(helper_class, config_changer)
|
||||
await CQLAuditTester(manager).test_config_liveupdate(helper_class, config_changer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("helper_class", [
|
||||
@@ -1942,4 +1954,4 @@ async def test_config_liveupdate(manager: ManagerClient, helper_class, config_ch
|
||||
])
|
||||
async def test_parallel_syslog_audit(manager: ManagerClient, helper_class):
|
||||
"""Cluster must not fail when multiple queries are audited in parallel."""
|
||||
await TestCQLAudit(manager).test_parallel_syslog_audit(helper_class)
|
||||
await CQLAuditTester(manager).test_parallel_syslog_audit(helper_class)
|
||||
|
||||
70
test/cluster/test_bootstrap_with_quick_group0_join.py
Normal file
70
test/cluster/test_bootstrap_with_quick_group0_join.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#
|
||||
# Copyright (C) 2026-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
#
|
||||
import logging
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from test.cluster.util import get_current_group0_config
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import wait_for
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_bootstrap_with_quick_group0_join(manager: ManagerClient):
|
||||
"""Regression test for https://scylladb.atlassian.net/browse/SCYLLADB-959.
|
||||
|
||||
The bug was that when the bootstrapping node joined group0 before reaching
|
||||
post_server_start, it skipped post_server_start and thus hung forever.
|
||||
|
||||
The test simulates the scenario by starting the second node with the
|
||||
join_group0_pause_before_config_check injection. Without the fix, the
|
||||
startup times out.
|
||||
"""
|
||||
logger.info("Adding first server")
|
||||
s1 = await manager.server_add()
|
||||
|
||||
logger.info("Adding second server with join_group0_pause_before_config_check enabled")
|
||||
s2 = await manager.server_add(start=False, config={
|
||||
'error_injections_at_startup': ['join_group0_pause_before_config_check']
|
||||
})
|
||||
|
||||
logger.info(f"Starting {s2}")
|
||||
start_task = asyncio.create_task(manager.server_start(s2.server_id))
|
||||
|
||||
s2_log = await manager.server_open_log(s2.server_id)
|
||||
|
||||
await s2_log.wait_for("join_group0_pause_before_config_check: waiting for message", timeout=60)
|
||||
|
||||
s1_host_id = await manager.get_host_id(s1.server_id)
|
||||
s2_host_id = await manager.get_host_id(s2.server_id)
|
||||
|
||||
async def s2_in_group0_config_on_s1():
|
||||
config = await get_current_group0_config(manager, s1)
|
||||
ids = {m[0] for m in config}
|
||||
assert s1_host_id in ids # sanity check
|
||||
return True if s2_host_id in ids else None
|
||||
|
||||
# Note: we would like to wait for s2 to see itself in the group0 config, but we can't execute
|
||||
# get_current_group0_config for s2, as s2 doesn't handle CQL requests at this point. As a workaround, we wait for s1
|
||||
# to see s2 and then perform a read barrier on s2.
|
||||
logger.info(f"Waiting for {s1} to see {s2} in the group0 config")
|
||||
await wait_for(s2_in_group0_config_on_s1, deadline=time.time() + 60, period=0.1)
|
||||
|
||||
logger.info(f"Performing read barrier on {s2} to make sure it sees itself in the group0 config")
|
||||
await read_barrier(manager.api, s2.ip_addr)
|
||||
|
||||
logger.info(f"Unblocking {s2}")
|
||||
await manager.api.message_injection(s2.ip_addr, 'join_group0_pause_before_config_check')
|
||||
|
||||
logger.info(f"Waiting for {s2} to complete bootstrap")
|
||||
await asyncio.wait_for(start_task, timeout=60)
|
||||
@@ -177,7 +177,7 @@ async def _smoke_test(manager: ManagerClient, key_provider: KeyProviderFactory,
|
||||
# restart the cluster
|
||||
if restart:
|
||||
await restart(manager, servers, cfs)
|
||||
await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
cql, _ = await manager.get_ready_cql(servers)
|
||||
else:
|
||||
await manager.rolling_restart(servers)
|
||||
for table_name in cfs:
|
||||
|
||||
@@ -438,6 +438,7 @@ async def test_lwt_fencing_upgrade(manager: ManagerClient, scylla_2025_1: Scylla
|
||||
await wait_for(all_hosts_are_alive, deadline=time.time() + 60, period=0.1)
|
||||
logger.info(f"Upgrading {s.server_id}")
|
||||
await manager.server_change_version(s.server_id, scylla_binary)
|
||||
await manager.server_sees_others(s.server_id, 2, interval=60.0)
|
||||
|
||||
logger.info("Done upgrading servers")
|
||||
|
||||
|
||||
@@ -8,7 +8,10 @@ import asyncio
|
||||
import time
|
||||
import pytest
|
||||
import logging
|
||||
from functools import partial
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.util import wait_for
|
||||
from test.pylib.internal_types import ServerInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -16,6 +19,26 @@ logger = logging.getLogger(__name__)
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_crashed_node_substitution(manager: ManagerClient):
|
||||
"""Test that a node which crashed after starting gossip but before joining group0
|
||||
(an 'orphan' node) is eventually removed from gossip by the gossiper_orphan_remover_fiber.
|
||||
|
||||
The scenario:
|
||||
1. Start 3 nodes with the 'fast_orphan_removal_fiber' injection enabled. This freezes
|
||||
the gossiper_orphan_remover_fiber on each node before it enters its polling loop,
|
||||
so it cannot remove any orphan until explicitly unblocked.
|
||||
2. Start a 4th node with the 'crash_before_group0_join' injection enabled. This node
|
||||
starts gossip normally but blocks inside pre_server_start(), just before sending
|
||||
the join RPC to the topology coordinator. It never joins group0.
|
||||
3. Wait until the 4th node's gossip state has fully propagated to all 3 running peers,
|
||||
then trigger its crash via the injection. At this point all peers see it as an orphan:
|
||||
present in gossip but absent from the group0 topology.
|
||||
4. Assert the orphan is visible in gossip (live or down) on the surviving nodes.
|
||||
5. Unblock the gossiper_orphan_remover_fiber on all 3 nodes (via message_injection) and
|
||||
enable the 'speedup_orphan_removal' injection so the fiber removes the orphan immediately
|
||||
without waiting for the normal 60-second age threshold.
|
||||
6. Wait for the 'Finished to force remove node' log line confirming removal, then assert
|
||||
the orphan is no longer present in gossip.
|
||||
"""
|
||||
servers = await manager.servers_add(3, config={
|
||||
'error_injections_at_startup': ['fast_orphan_removal_fiber']
|
||||
})
|
||||
@@ -30,10 +53,24 @@ async def test_crashed_node_substitution(manager: ManagerClient):
|
||||
log = await manager.server_open_log(failed_server.server_id)
|
||||
await log.wait_for("finished do_send_ack2_msg")
|
||||
failed_id = await manager.get_host_id(failed_server.server_id)
|
||||
|
||||
# Wait until the failed server's gossip state has propagated to all running peers.
|
||||
# "finished do_send_ack2_msg" only guarantees that one peer completed a gossip round
|
||||
# with the failed server; other nodes learn about it only in subsequent gossip rounds.
|
||||
# Querying gossip before propagation completes would cause the assertion below to fail
|
||||
# because the orphan node would not yet appear as live or down on every peer.
|
||||
async def gossip_has_node(server: ServerInfo):
|
||||
live = await manager.api.client.get_json("/gossiper/endpoint/live", host=server.ip_addr)
|
||||
down = await manager.api.client.get_json("/gossiper/endpoint/down", host=server.ip_addr)
|
||||
return True if failed_server.ip_addr in live + down else None
|
||||
|
||||
for s in servers:
|
||||
await wait_for(partial(gossip_has_node, s), deadline=time.time() + 30)
|
||||
|
||||
await manager.api.message_injection(failed_server.ip_addr, 'crash_before_group0_join')
|
||||
|
||||
|
||||
await task
|
||||
|
||||
|
||||
live_eps = await manager.api.client.get_json("/gossiper/endpoint/live", host=servers[0].ip_addr)
|
||||
down_eps = await manager.api.client.get_json("/gossiper/endpoint/down", host=servers[0].ip_addr)
|
||||
|
||||
|
||||
@@ -17,9 +17,9 @@ from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import ScyllaMetricsClient, TCPRESTClient, inject_error
|
||||
from test.pylib.tablets import get_tablet_replicas
|
||||
from test.pylib.scylla_cluster import ReplaceConfig
|
||||
from test.pylib.util import wait_for
|
||||
from test.pylib.util import gather_safely, wait_for
|
||||
|
||||
from test.cluster.util import get_topology_coordinator, find_server_by_host_id, new_test_keyspace
|
||||
from test.cluster.util import get_topology_coordinator, find_server_by_host_id, keyspace_has_tablets, new_test_keyspace, new_test_table
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -51,28 +51,42 @@ async def await_sync_point(client: TCPRESTClient, server_ip: IPAddress, sync_poi
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_cl_any_to_dead_node_generates_hints(manager: ManagerClient):
|
||||
node_count = 2
|
||||
servers = await manager.servers_add(node_count)
|
||||
cmdline = ["--logger-log-level", "hints_manager=trace"]
|
||||
servers = await manager.servers_add(node_count, cmdline=cmdline)
|
||||
|
||||
async def wait_for_hints_written(min_hint_count: int, timeout: int):
|
||||
async def aux():
|
||||
hints_written = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
if hints_written >= min_hint_count:
|
||||
return True
|
||||
return None
|
||||
assert await wait_for(aux, time.time() + timeout)
|
||||
|
||||
cql = manager.get_cql()
|
||||
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}") as ks:
|
||||
table = f"{ks}.t"
|
||||
await cql.run_async(f"CREATE TABLE {table} (pk int primary key, v int)")
|
||||
uses_tablets = await keyspace_has_tablets(manager, ks)
|
||||
# If the keyspace uses tablets, let's explicitly require the table to use multiple tablets.
|
||||
# Otherwise, it could happen that all mutations would target servers[0] only, which would
|
||||
# ultimately lead to a test failure here. We rely on the assumption that mutations will be
|
||||
# distributed more or less uniformly!
|
||||
extra_opts = "WITH tablets = {'min_tablet_count': 16}" if uses_tablets else ""
|
||||
async with new_test_table(manager, ks, "pk int PRIMARY KEY, v int", extra_opts) as table:
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
hints_before = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
|
||||
hints_before = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
stmt = cql.prepare(f"INSERT INTO {table} (pk, v) VALUES (?, ?)")
|
||||
stmt.consistency_level = ConsistencyLevel.ANY
|
||||
|
||||
# Some of the inserts will be targeted to the dead node.
|
||||
# The coordinator doesn't have live targets to send the write to, but it should write a hint.
|
||||
for i in range(100):
|
||||
await cql.run_async(SimpleStatement(f"INSERT INTO {table} (pk, v) VALUES ({i}, {i+1})", consistency_level=ConsistencyLevel.ANY))
|
||||
# Some of the inserts will be targeted to the dead node.
|
||||
# The coordinator doesn't have live targets to send the write to, but it should write a hint.
|
||||
await gather_safely(*[cql.run_async(stmt, (i, i + 1)) for i in range(100)])
|
||||
|
||||
# Verify hints are written
|
||||
hints_after = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
assert hints_after > hints_before
|
||||
# Verify hints are written
|
||||
await wait_for_hints_written(hints_before + 1, timeout=60)
|
||||
|
||||
# For dropping the keyspace
|
||||
await manager.server_start(servers[1].server_id)
|
||||
# For dropping the keyspace
|
||||
await manager.server_start(servers[1].server_id)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_limited_concurrency_of_writes(manager: ManagerClient):
|
||||
@@ -86,7 +100,7 @@ async def test_limited_concurrency_of_writes(manager: ManagerClient):
|
||||
})
|
||||
node2 = await manager.server_add()
|
||||
|
||||
cql = manager.get_cql()
|
||||
cql = await manager.get_cql_exclusive(node1)
|
||||
async with new_test_keyspace(manager, "WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 2}") as ks:
|
||||
table = f"{ks}.t"
|
||||
await cql.run_async(f"CREATE TABLE {table} (pk int primary key, v int)")
|
||||
|
||||
@@ -151,7 +151,7 @@ async def trigger_tablet_merge(manager, servers, logs):
|
||||
await s1_log.wait_for('Detected tablet merge for table', from_mark=s1_mark)
|
||||
await inject_error_off(manager, "tablet_force_tablet_count_decrease", servers)
|
||||
|
||||
async def preapre_cluster_for_incremental_repair(manager, nr_keys = 100 , cmdline = []):
|
||||
async def prepare_cluster_for_incremental_repair(manager, nr_keys = 100 , cmdline = []):
|
||||
servers, cql, hosts, ks, table_id = await create_table_insert_data_for_repair(manager, nr_keys=nr_keys, cmdline=cmdline)
|
||||
repaired_keys = set(range(0, nr_keys))
|
||||
unrepaired_keys = set()
|
||||
@@ -164,7 +164,7 @@ async def preapre_cluster_for_incremental_repair(manager, nr_keys = 100 , cmdlin
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_repair_sstable_skipped_read_metrics(manager: ManagerClient):
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await prepare_cluster_for_incremental_repair(manager)
|
||||
|
||||
await insert_keys(cql, ks, 0, 100)
|
||||
|
||||
@@ -274,7 +274,7 @@ async def test_tablet_incremental_repair_error(manager: ManagerClient):
|
||||
|
||||
async def do_tablet_incremental_repair_and_ops(manager: ManagerClient, ops: str):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys, cmdline=['--logger-log-level', 'compaction=debug'])
|
||||
token = -1
|
||||
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
|
||||
@@ -335,7 +335,7 @@ async def test_tablet_incremental_repair_and_major(manager: ManagerClient):
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_incremental_repair_and_minor(manager: ManagerClient):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# Disable autocompaction
|
||||
for server in servers:
|
||||
@@ -381,7 +381,7 @@ async def test_tablet_incremental_repair_and_minor(manager: ManagerClient):
|
||||
|
||||
async def do_test_tablet_incremental_repair_with_split_and_merge(manager, do_split, do_merge):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -442,7 +442,7 @@ async def test_tablet_incremental_repair_with_merge(manager: ManagerClient):
|
||||
async def test_tablet_incremental_repair_existing_and_repair_produced_sstable(manager: ManagerClient):
|
||||
nr_keys = 100
|
||||
cmdline = ["--hinted-handoff-enabled", "0"]
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
|
||||
@@ -466,7 +466,7 @@ async def test_tablet_incremental_repair_existing_and_repair_produced_sstable(ma
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_incremental_repair_merge_higher_repaired_at_number(manager):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -507,7 +507,7 @@ async def test_tablet_incremental_repair_merge_higher_repaired_at_number(manager
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_incremental_repair_merge_correct_repaired_at_number_after_merge(manager):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -541,7 +541,7 @@ async def do_test_tablet_incremental_repair_merge_error(manager, error):
|
||||
nr_keys = 100
|
||||
# Make sure no data commit log replay after force server stop
|
||||
cmdline = ['--enable-commitlog', '0']
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -587,7 +587,7 @@ async def test_tablet_incremental_repair_merge_error_in_merge_completion_fiber(m
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_repair_with_incremental_option(manager: ManagerClient):
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await prepare_cluster_for_incremental_repair(manager)
|
||||
token = -1
|
||||
|
||||
sstables_repaired_at = 0
|
||||
@@ -632,7 +632,7 @@ async def test_tablet_repair_with_incremental_option(manager: ManagerClient):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_incremental_repair_tablet_time_metrics(manager: ManagerClient):
|
||||
servers, _, _, ks, _, _, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
|
||||
servers, _, _, ks, _, _, _, _, _, token = await prepare_cluster_for_incremental_repair(manager)
|
||||
time1 = 0
|
||||
time2 = 0
|
||||
|
||||
@@ -820,7 +820,7 @@ async def test_repair_sigsegv_with_diff_shard_count(manager: ManagerClient, use_
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_incremental_repair_table_drop_compaction_group_gone(manager: ManagerClient):
|
||||
cmdline = ['--logger-log-level', 'repair=debug']
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, _ = await preapre_cluster_for_incremental_repair(manager, cmdline=cmdline)
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, _ = await prepare_cluster_for_incremental_repair(manager, cmdline=cmdline)
|
||||
|
||||
coord = await get_topology_coordinator(manager)
|
||||
coord_serv = await find_server_by_host_id(manager, servers, coord)
|
||||
|
||||
@@ -20,6 +20,7 @@ from cassandra.query import SimpleStatement
|
||||
from test.pylib.async_cql import _wrap_future
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.random_tables import RandomTables, TextType, Column
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import unique_name
|
||||
from test.cluster.conftest import cluster_con
|
||||
|
||||
@@ -403,6 +404,7 @@ async def test_arbiter_dc_rf_rack_valid_keyspaces(manager: ManagerClient):
|
||||
for task in [*valid_keyspaces, *invalid_keyspaces]:
|
||||
_ = tg.create_task(task)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces(manager: ManagerClient):
|
||||
"""
|
||||
This test verifies that starting a Scylla node fails when there's an RF-rack-invalid keyspace.
|
||||
@@ -464,22 +466,50 @@ async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces(manager:
|
||||
for rfs, tablets in valid_keyspaces:
|
||||
_ = tg.create_task(create_keyspace(rfs, tablets))
|
||||
|
||||
await manager.server_stop_gracefully(s1.server_id)
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true")
|
||||
|
||||
# Precondition: s1 has rf_rack_valid_keyspaces set to false.
|
||||
# Postcondition: s1 still has rf_rack_valid_keyspaces set to false.
|
||||
async def try_fail(rfs: List[int], dc: str, rf: int, rack_count: int):
|
||||
running_servers = await manager.running_servers()
|
||||
should_start = s1.server_id not in [server.server_id for server in running_servers]
|
||||
if should_start:
|
||||
await manager.server_start(s1.server_id)
|
||||
|
||||
ks = await create_keyspace(rfs, True)
|
||||
# We need to wait for the new schema to propagate.
|
||||
# Otherwise, it's not clear when the mutation
|
||||
# corresponding to the created keyspace will
|
||||
# arrive at server 1.
|
||||
# It could happen only after the node performs
|
||||
# the check upon start-up, effectively leading
|
||||
# to a successful start-up, which we don't want.
|
||||
# For more context, see issue: SCYLLADB-1137.
|
||||
await read_barrier(manager.api, s1.ip_addr)
|
||||
|
||||
await manager.server_stop_gracefully(s1.server_id)
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true")
|
||||
|
||||
err = f"The keyspace '{ks}' is required to be RF-rack-valid. " \
|
||||
f"That condition is violated for DC '{dc}': RF={rf} vs. rack count={rack_count}."
|
||||
_ = await manager.server_start(s1.server_id, expected_error=err)
|
||||
await manager.server_start(s1.server_id, expected_error=err)
|
||||
await cql.run_async(f"DROP KEYSPACE {ks}")
|
||||
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "false")
|
||||
|
||||
# Test RF-rack-invalid keyspaces.
|
||||
await try_fail([2, 0], "dc1", 2, 3)
|
||||
await try_fail([3, 2], "dc2", 2, 1)
|
||||
await try_fail([4, 1], "dc1", 4, 3)
|
||||
|
||||
_ = await manager.server_start(s1.server_id)
|
||||
# We need to perform a read barrier on the node to make
|
||||
# sure that it processes the last DROP KEYSPACE.
|
||||
# Otherwise, the node could think the RF-rack-invalid
|
||||
# keyspace still exists.
|
||||
await manager.server_start(s1.server_id)
|
||||
await read_barrier(manager.api, s1.ip_addr)
|
||||
await manager.server_stop_gracefully(s1.server_id)
|
||||
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true")
|
||||
await manager.server_start(s1.server_id)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces_but_not_enforced(manager: ManagerClient):
|
||||
|
||||
@@ -23,10 +23,25 @@ from test.cluster.object_store.conftest import format_tuples
|
||||
from test.cluster.object_store.test_backup import topo, take_snapshot, do_test_streaming_scopes
|
||||
from test.cluster.util import new_test_keyspace
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import unique_name
|
||||
from test.pylib.util import unique_name, wait_for
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def wait_for_upload_dir_empty(upload_dir, timeout=30):
|
||||
'''
|
||||
Wait until the upload directory is empty with a timeout.
|
||||
SSTable unlinking is asynchronous and in rare situations, it can happen
|
||||
that not all sstables are deleted from the upload dir immediately after refresh is done.
|
||||
'''
|
||||
deadline = time.time() + timeout
|
||||
async def check_empty():
|
||||
files = os.listdir(upload_dir)
|
||||
if not files:
|
||||
return True
|
||||
return None
|
||||
await wait_for(check_empty, deadline, period=0.5)
|
||||
|
||||
class SSTablesOnLocalStorage:
|
||||
def __init__(self):
|
||||
self.tmpdir = f'tmpbackup-{str(uuid.uuid4())}'
|
||||
@@ -153,7 +168,8 @@ async def test_refresh_deletes_uploaded_sstables(manager: ManagerClient):
|
||||
|
||||
for s in servers:
|
||||
cf_dir = dirs[s.server_id]["cf_dir"]
|
||||
files = os.listdir(os.path.join(cf_dir, 'upload'))
|
||||
assert files == [], f'Upload dir not empty on server {s.server_id}: {files}'
|
||||
upload_dir = os.path.join(cf_dir, 'upload')
|
||||
assert os.path.exists(upload_dir)
|
||||
await wait_for_upload_dir_empty(upload_dir)
|
||||
|
||||
shutil.rmtree(tmpbackup)
|
||||
|
||||
@@ -11,7 +11,6 @@ from test.cluster.util import check_token_ring_and_group0_consistency, new_test_
|
||||
import pytest
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@pytest.mark.asyncio
|
||||
@@ -53,7 +52,7 @@ async def test_cleanup_stop(manager: ManagerClient):
|
||||
await s0_log.wait_for('sstable_cleanup_wait: waiting', from_mark=s0_mark)
|
||||
|
||||
stop_cleanup = asyncio.create_task(manager.api.stop_compaction(servers[0].ip_addr, "CLEANUP"))
|
||||
time.sleep(1)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
await manager.api.message_injection(servers[0].ip_addr, "sstable_cleanup_wait")
|
||||
await stop_cleanup
|
||||
|
||||
@@ -2279,7 +2279,7 @@ async def test_split_stopped_on_shutdown(manager: ManagerClient):
|
||||
|
||||
shutdown_task = asyncio.create_task(manager.server_stop_gracefully(server.server_id))
|
||||
|
||||
await log.wait_for('Stopping.*ongoing compactions')
|
||||
await log.wait_for('Stopping.*ongoing compactions', from_mark=log_mark)
|
||||
await manager.api.message_injection(server.ip_addr, "splitting_mutation_writer_switch_wait")
|
||||
|
||||
await log.wait_for('storage_service_drain_wait: waiting', from_mark=log_mark)
|
||||
|
||||
@@ -196,7 +196,7 @@ async def test_group0_tombstone_gc(manager: ManagerClient):
|
||||
tombstone_mark = datetime.now(timezone.utc)
|
||||
|
||||
# test #2: the tombstones are not cleaned up when one node is down
|
||||
with pytest.raises(AssertionError, match="Deadline exceeded"):
|
||||
with pytest.raises(AssertionError, match="timed out"):
|
||||
# waiting for shorter time (5s normally enough for a successful case, we expect the timeout here)
|
||||
await verify_tombstone_gc(tombstone_mark, timeout=5)
|
||||
|
||||
@@ -249,7 +249,7 @@ async def test_group0_tombstone_gc(manager: ManagerClient):
|
||||
await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
|
||||
# test #4a: the tombstones are not cleaned up after both live nodes join the new group0
|
||||
with pytest.raises(AssertionError, match="Deadline exceeded"):
|
||||
with pytest.raises(AssertionError, match="timed out"):
|
||||
await verify_tombstone_gc(tombstone_mark, timeout=5)
|
||||
|
||||
await manager.remove_node(servers[0].server_id, down_server.server_id)
|
||||
|
||||
@@ -165,7 +165,7 @@ async def wait_for_cdc_generations_publishing(cql: Session, hosts: list[Host], d
|
||||
unpublished_generations = topo_res[0].unpublished_cdc_generations
|
||||
return unpublished_generations is None or len(unpublished_generations) == 0 or None
|
||||
|
||||
await wait_for(all_generations_published, deadline=deadline, period=1.0)
|
||||
await wait_for(all_generations_published, deadline=deadline)
|
||||
|
||||
|
||||
async def check_system_topology_and_cdc_generations_v3_consistency(manager: ManagerClient, live_hosts: list[Host], cqls: Optional[list[Session]] = None, ignored_hosts: list[Host] = []):
|
||||
@@ -470,6 +470,17 @@ async def new_materialized_view(manager: ManagerClient, table, select, pk, where
|
||||
await manager.get_cql().run_async(f"DROP MATERIALIZED VIEW {mv}")
|
||||
|
||||
|
||||
async def keyspace_has_tablets(manager: ManagerClient, keyspace: str) -> bool:
|
||||
"""
|
||||
Checks whether the given keyspace uses tablets.
|
||||
Adapted from its counterpart in the cqlpy test: cqlpy/util.py::keyspace_has_tablets.
|
||||
"""
|
||||
cql = manager.get_cql()
|
||||
rows_iter = await cql.run_async(f"SELECT * FROM system_schema.scylla_keyspaces WHERE keyspace_name='{keyspace}'")
|
||||
rows = list(rows_iter)
|
||||
return len(rows) > 0 and getattr(rows[0], "initial_tablets", None) is not None
|
||||
|
||||
|
||||
async def get_raft_log_size(cql, host) -> int:
|
||||
query = "select count(\"index\") from system.raft"
|
||||
return (await cql.run_async(query, host=host))[0][0]
|
||||
|
||||
@@ -271,10 +271,21 @@ future<std::tuple<tests::proc::process_fixture, int>> tests::proc::start_docker_
|
||||
// arbitrary timeout of 120s for the server to make some output. Very generous.
|
||||
// but since we (maybe) run docker, and might need to pull image, this can take
|
||||
// some time if we're unlucky.
|
||||
co_await with_timeout(std::chrono::steady_clock::now() + 120s, when_all(std::move(out_fut), std::move(err_fut)));
|
||||
} catch (in_use&) {
|
||||
retry = true;
|
||||
p = std::current_exception();
|
||||
auto [f1, f2] = co_await with_timeout(std::chrono::steady_clock::now() + 120s, when_all(std::move(out_fut), std::move(err_fut)));
|
||||
for (auto* f : {&f1, &f2}) {
|
||||
if (f->failed()) {
|
||||
try {
|
||||
f->get();
|
||||
} catch (in_use&) {
|
||||
retry = true;
|
||||
p = std::current_exception();
|
||||
} catch (...) {
|
||||
if (!p) {
|
||||
p = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
p = std::current_exception();
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -59,7 +60,14 @@ async def server_address(request, testpy_test: None|Test):
|
||||
ip = await testpy_test.suite.hosts.lease_host()
|
||||
else:
|
||||
ip = f"127.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
|
||||
port = random.randint(10000, 65535)
|
||||
# Ask the OS to pick a free port by binding to port 0. This avoids
|
||||
# collisions with ports still in TIME_WAIT from a previous test module
|
||||
# that used the same IP. SO_REUSEADDR is set on the probe socket so it
|
||||
# can reclaim a TIME_WAIT port itself
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
s.bind((ip, 0))
|
||||
port = s.getsockname()[1]
|
||||
yield ServerAddress(ip, port)
|
||||
if testpy_test is not None:
|
||||
await testpy_test.suite.hosts.release_host(ip)
|
||||
|
||||
@@ -257,7 +257,7 @@ async def run_server(ip, port):
|
||||
|
||||
runner = aiohttp.web.AppRunner(app)
|
||||
await runner.setup()
|
||||
site = aiohttp.web.TCPSite(runner, ip, port)
|
||||
site = aiohttp.web.TCPSite(runner, ip, port, reuse_address=True, reuse_port=True)
|
||||
await site.start()
|
||||
|
||||
try:
|
||||
|
||||
@@ -4,14 +4,18 @@
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
#
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import itertools
|
||||
import asyncio
|
||||
import pathlib
|
||||
import re
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Callable
|
||||
|
||||
logger = logging.getLogger("DockerizedServer")
|
||||
|
||||
class DockerizedServer:
|
||||
"""class for running an external dockerized service image, typically mock server"""
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
@@ -37,6 +41,7 @@ class DockerizedServer:
|
||||
self.port = None
|
||||
self.proc = None
|
||||
self.service_port = port
|
||||
self.echo_thread = None
|
||||
|
||||
async def start(self):
|
||||
"""Starts docker image on a random port"""
|
||||
@@ -45,77 +50,107 @@ class DockerizedServer:
|
||||
if exe is not None)).resolve()
|
||||
sid = f"{os.getpid()}-{DockerizedServer.newid()}"
|
||||
name = f'{self.logfilenamebase}-{sid}'
|
||||
logfilename = (pathlib.Path(self.tmpdir) / name).with_suffix(".log")
|
||||
self.logfile = logfilename.open("wb")
|
||||
|
||||
docker_args = self.docker_args(self.host, self.service_port)
|
||||
image_args = self.image_args(self.host, self.service_port)
|
||||
|
||||
args = [exe, "run", "--name", name, "--rm" ]
|
||||
if self.service_port is None:
|
||||
args = args + ["-P"]
|
||||
else:
|
||||
args = args + ["-p", str(self.service_port)]
|
||||
|
||||
args = args + docker_args + [self.image] + image_args
|
||||
|
||||
# This seems weird, using the blocking IO subprocess.
|
||||
# However, we want to use a pipe reader so we can push the
|
||||
# output into the test log (because we are bad at propagating
|
||||
# log files etc from CI)
|
||||
# But the pipe reader needs to read until EOF, otherwise the
|
||||
# docker process will eventually hang. So we can't await a
|
||||
# coroutine.
|
||||
# We _can_, sort of, use pool.create_task(...) to send a coro
|
||||
# to the background, and use a signal for waiting, like here,
|
||||
# thus ensuring the coro runs forever, sort of... However,
|
||||
# this currently breaks, probably due to some part of the
|
||||
# machinery/tests that don't async fully, causing us to not
|
||||
# process the log, and thus hand/fail, bla bla.
|
||||
# The solution is to make the process synced, and use a
|
||||
# background thread (execution pool) for the processing.
|
||||
# This way we know the pipe reader will not suddenly get
|
||||
# blocked at inconvinient times.
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
loop = asyncio.get_running_loop()
|
||||
ready_fut = loop.create_future()
|
||||
|
||||
def process_io():
|
||||
f = ready_fut
|
||||
try:
|
||||
while True:
|
||||
data = proc.stderr.readline()
|
||||
if not data:
|
||||
if f:
|
||||
loop.call_soon_threadsafe(f.set_exception, RuntimeError("Log EOF"))
|
||||
logger.debug("EOF received")
|
||||
break
|
||||
line = data.decode()
|
||||
self.logfile.write(data)
|
||||
logger.debug(line)
|
||||
if f and self.is_success_line(line, self.service_port):
|
||||
logger.info('Got start message: %s', line)
|
||||
loop.call_soon_threadsafe(f.set_result, True)
|
||||
f = None
|
||||
if f and self.is_failure_line(line, self.service_port):
|
||||
logger.info('Got fail message: %s', line)
|
||||
loop.call_soon_threadsafe(f.set_result, False)
|
||||
f = None
|
||||
except Exception as e:
|
||||
logger.error("Exception in log processing: %s", e)
|
||||
if f:
|
||||
loop.call_soon_threadsafe(f.set_exception, e)
|
||||
|
||||
self.echo_thread = loop.run_in_executor(None, process_io)
|
||||
ok = await ready_fut
|
||||
if not ok:
|
||||
self.logfile.close()
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
raise RuntimeError("Could not parse expected launch message from container")
|
||||
|
||||
check_proc = await asyncio.create_subprocess_exec(exe
|
||||
, *["container", "port", name]
|
||||
, stdout=asyncio.subprocess.PIPE
|
||||
)
|
||||
while True:
|
||||
logfilename = (pathlib.Path(self.tmpdir) / name).with_suffix(".log")
|
||||
self.logfile = logfilename.open("wb")
|
||||
data = await check_proc.stdout.readline()
|
||||
if not data:
|
||||
break
|
||||
s = data.decode()
|
||||
m = re.search(r"\d+\/\w+ -> [\w+\.\[\]\:]+:(\d+)", s)
|
||||
if m:
|
||||
self.port = int(m.group(1))
|
||||
|
||||
docker_args = self.docker_args(self.host, self.service_port)
|
||||
image_args = self.image_args(self.host, self.service_port)
|
||||
|
||||
args = ["run", "--name", name, "--rm" ]
|
||||
if self.service_port is None:
|
||||
args = args + ["-P"]
|
||||
else:
|
||||
args = args + ["-p", str(self.service_port)]
|
||||
|
||||
args = args + docker_args + [self.image] + image_args
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(exe, *args, stderr=self.logfile)
|
||||
failed = False
|
||||
|
||||
# In any sane world we would just pipe stderr to a pipe and launch a background
|
||||
# task to just readline from there to both check the start message as well as
|
||||
# add it to the log (preferrably via logger).
|
||||
# This works fine when doing this in a standalone python script.
|
||||
# However, for some reason, when run in a pytest fixture, the pipe will fill up,
|
||||
# without or reader waking up and doing anyhing, and for any test longer than very
|
||||
# short, we will fill the stderr buffer and hang.
|
||||
# I cannot figure out how to get around this, so we workaround it
|
||||
# instead by directing stderr to a log file, and simply repeatedly
|
||||
# try to read the info from this file until we are happy.
|
||||
async with asyncio.timeout(120):
|
||||
done = False
|
||||
while not done and not failed:
|
||||
with logfilename.open("r") as f:
|
||||
for line in f:
|
||||
if self.is_success_line(line, self.service_port):
|
||||
print(f'Got start message: {line}')
|
||||
done = True
|
||||
break
|
||||
if self.is_failure_line(line, self.service_port):
|
||||
print(f'Got fail message: {line}')
|
||||
failed = True
|
||||
break
|
||||
|
||||
if failed:
|
||||
self.logfile.close()
|
||||
await proc.wait()
|
||||
continue
|
||||
|
||||
check_proc = await asyncio.create_subprocess_exec(exe
|
||||
, *["container", "port", name]
|
||||
, stdout=asyncio.subprocess.PIPE
|
||||
)
|
||||
while True:
|
||||
data = await check_proc.stdout.readline()
|
||||
if not data:
|
||||
break
|
||||
s = data.decode()
|
||||
m = re.search(r"\d+\/\w+ -> [\w+\.\[\]\:]+:(\d+)", s)
|
||||
if m:
|
||||
self.port = int(m.group(1))
|
||||
|
||||
await check_proc.wait()
|
||||
if not self.port:
|
||||
proc.kill()
|
||||
raise RuntimeError("Could not query port from container")
|
||||
self.proc = proc
|
||||
break
|
||||
await check_proc.wait()
|
||||
if not self.port:
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
raise RuntimeError("Could not query port from container")
|
||||
self.proc = proc
|
||||
|
||||
async def stop(self):
|
||||
"""Stops docker image"""
|
||||
if self.proc:
|
||||
logger.debug("Stopping docker process")
|
||||
self.proc.terminate()
|
||||
await self.proc.wait()
|
||||
self.proc.wait()
|
||||
self.proc = None
|
||||
if self.echo_thread:
|
||||
logger.debug("Waiting for IO thread")
|
||||
await self.echo_thread
|
||||
self.echo_thread = None
|
||||
if self.logfile:
|
||||
logger.debug("Closing log file")
|
||||
self.logfile.close()
|
||||
self.logfile = None
|
||||
|
||||
@@ -747,6 +747,8 @@ class ScyllaServer:
|
||||
self.notify_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM | socket.SOCK_CLOEXEC)
|
||||
self.notify_socket.bind(str(self.notify_socket_path))
|
||||
self._received_serving = False
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def poll_status(s: socket.socket, f: asyncio.Future, logger: Union[logging.Logger, logging.LoggerAdapter]):
|
||||
# Try to read all available messages from the socket
|
||||
while True:
|
||||
@@ -756,7 +758,7 @@ class ScyllaServer:
|
||||
message = data.decode('utf-8', errors='replace')
|
||||
if 'STATUS=serving' in message:
|
||||
logger.debug("Received sd_notify 'serving' message")
|
||||
f.set_result(True)
|
||||
loop.call_soon_threadsafe(f.set_result, True)
|
||||
return
|
||||
if 'STATUS=entering maintenance mode' in message:
|
||||
logger.debug("Receive sd_notify 'entering maintenance mode'")
|
||||
@@ -766,9 +768,9 @@ class ScyllaServer:
|
||||
except Exception as e:
|
||||
logger.debug("Error reading from notify socket: %s", e)
|
||||
break
|
||||
f.set_result(False)
|
||||
loop.call_soon_threadsafe(f.set_result, False)
|
||||
|
||||
self.serving_signal = asyncio.get_running_loop().create_future()
|
||||
self.serving_signal = loop.create_future()
|
||||
t = threading.Thread(target=poll_status, args=[self.notify_socket, self.serving_signal, self.logger], daemon=True)
|
||||
t.start()
|
||||
|
||||
@@ -892,7 +894,6 @@ class ScyllaServer:
|
||||
return
|
||||
await report_error("the node startup failed, but the log file doesn't contain the expected error")
|
||||
await report_error("failed to start the node")
|
||||
self.logger.info("Wait me %s expect %s is %s", self.server_id, expected_server_up_state, server_up_state)
|
||||
if await self.try_get_host_id(api):
|
||||
if server_up_state == ServerUpState.PROCESS_STARTED:
|
||||
server_up_state = ServerUpState.HOST_ID_QUERIED
|
||||
|
||||
@@ -56,15 +56,39 @@ def unique_name(unique_name_prefix = 'test_'):
|
||||
async def wait_for(
|
||||
pred: Callable[[], Awaitable[Optional[T]]],
|
||||
deadline: float,
|
||||
period: float = 1,
|
||||
period: float = 0.1,
|
||||
before_retry: Optional[Callable[[], Any]] = None,
|
||||
backoff_factor: float = 1,
|
||||
max_period: float = None) -> T:
|
||||
backoff_factor: float = 1.5,
|
||||
max_period: float = 1.0,
|
||||
label: Optional[str] = None) -> T:
|
||||
tag = label or getattr(pred, '__name__', 'unlabeled')
|
||||
start = time.time()
|
||||
retries = 0
|
||||
last_exception: Exception | None = None
|
||||
while True:
|
||||
assert(time.time() < deadline), "Deadline exceeded, failing test."
|
||||
res = await pred()
|
||||
elapsed = time.time() - start
|
||||
if time.time() >= deadline:
|
||||
timeout_msg = f"wait_for({tag}) timed out after {elapsed:.2f}s ({retries} retries)"
|
||||
if last_exception is not None:
|
||||
timeout_msg += (
|
||||
f"; last exception: {type(last_exception).__name__}: {last_exception}"
|
||||
)
|
||||
raise AssertionError(timeout_msg) from last_exception
|
||||
raise AssertionError(timeout_msg)
|
||||
|
||||
try:
|
||||
res = await pred()
|
||||
last_exception = None
|
||||
except Exception as exc:
|
||||
res = None
|
||||
last_exception = exc
|
||||
|
||||
if res is not None:
|
||||
if retries > 0:
|
||||
logger.debug(f"wait_for({tag}) completed "
|
||||
f"in {elapsed:.2f}s ({retries} retries)")
|
||||
return res
|
||||
retries += 1
|
||||
await asyncio.sleep(period)
|
||||
period *= backoff_factor
|
||||
if max_period is not None:
|
||||
@@ -273,14 +297,14 @@ async def wait_for_view_v1(cql: Session, name: str, node_count: int, timeout: in
|
||||
done = await cql.run_async(f"SELECT COUNT(*) FROM system_distributed.view_build_status WHERE status = 'SUCCESS' AND view_name = '{name}' ALLOW FILTERING")
|
||||
return done[0][0] == node_count or None
|
||||
deadline = time.time() + timeout
|
||||
await wait_for(view_is_built, deadline)
|
||||
await wait_for(view_is_built, deadline, label=f"view_v1_{name}")
|
||||
|
||||
async def wait_for_view(cql: Session, name: str, node_count: int, timeout: int = 120):
|
||||
async def view_is_built():
|
||||
done = await cql.run_async(f"SELECT COUNT(*) FROM system.view_build_status_v2 WHERE status = 'SUCCESS' AND view_name = '{name}' ALLOW FILTERING")
|
||||
return done[0][0] == node_count or None
|
||||
deadline = time.time() + timeout
|
||||
await wait_for(view_is_built, deadline)
|
||||
await wait_for(view_is_built, deadline, label=f"view_{name}")
|
||||
|
||||
|
||||
async def wait_for_first_completed(coros: list[Coroutine], timeout: int|None = None):
|
||||
|
||||
@@ -200,13 +200,13 @@ trace_keyspace_helper::trace_keyspace_helper(tracing& tr)
|
||||
_metrics.add_group("tracing_keyspace_helper", {
|
||||
sm::make_counter("tracing_errors", [this] { return _stats.tracing_errors; },
|
||||
sm::description("Counts a number of errors during writing to a system_traces keyspace. "
|
||||
"One error may cause one or more tracing records to be lost.")),
|
||||
"One error may cause one or more tracing records to be lost.")).set_skip_when_empty(),
|
||||
|
||||
sm::make_counter("bad_column_family_errors", [this] { return _stats.bad_column_family_errors; },
|
||||
sm::description("Counts a number of times write failed due to one of the tables in the system_traces keyspace has an incompatible schema. "
|
||||
"One error may result one or more tracing records to be lost. "
|
||||
"Non-zero value indicates that the administrator has to take immediate steps to fix the corresponding schema. "
|
||||
"The appropriate error message will be printed in the syslog.")),
|
||||
"The appropriate error message will be printed in the syslog.")).set_skip_when_empty(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -39,17 +39,17 @@ tracing::tracing(sstring tracing_backend_helper_class_name)
|
||||
_metrics.add_group("tracing", {
|
||||
sm::make_counter("dropped_sessions", stats.dropped_sessions,
|
||||
sm::description("Counts a number of dropped sessions due to too many pending sessions/records. "
|
||||
"High value indicates that backend is saturated with the rate with which new tracing records are created.")),
|
||||
"High value indicates that backend is saturated with the rate with which new tracing records are created.")).set_skip_when_empty(),
|
||||
|
||||
sm::make_counter("dropped_records", stats.dropped_records,
|
||||
sm::description("Counts a number of dropped records due to too many pending records. "
|
||||
"High value indicates that backend is saturated with the rate with which new tracing records are created.")),
|
||||
"High value indicates that backend is saturated with the rate with which new tracing records are created.")).set_skip_when_empty(),
|
||||
|
||||
sm::make_counter("trace_records_count", stats.trace_records_count,
|
||||
sm::description("This metric is a rate of tracing records generation.")),
|
||||
|
||||
sm::make_counter("trace_errors", stats.trace_errors,
|
||||
sm::description("Counts a number of trace records dropped due to an error (e.g. OOM).")),
|
||||
sm::description("Counts a number of trace records dropped due to an error (e.g. OOM).")).set_skip_when_empty(),
|
||||
|
||||
sm::make_gauge("active_sessions", _active_sessions,
|
||||
sm::description("Holds a number of a currently active tracing sessions.")),
|
||||
|
||||
Reference in New Issue
Block a user