Files
scylladb/test/cluster/test_raft_cluster_features.py
Andrei Chekun c950c2e582 test.py: convert skip_mode function to pytest.mark
Function skip_mode works only on function and only in cluster test. This if OK
when we need to skip one test, but it's not possible to use it with pytestmark
to automatically mark all tests in the file. The goal of this PR is to migrate

skip_mode to be dynamic pytest.mark that can be used as ordinary mark.

Closes scylladb/scylladb#27853

[avi: apply to test/cluster/test_tablets.py::test_table_creation_wakes_up_balancer]
2026-01-08 21:55:16 +02:00

135 lines
5.6 KiB
Python

#
# Copyright (C) 2023-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#
"""
Tests that are specific to the raft-based cluster feature implementation.
"""
import asyncio
import time
from test.cluster.conftest import skip_mode
from test.pylib.manager_client import ManagerClient
from test.pylib.util import wait_for_cql_and_get_hosts, wait_for_feature
from test.cluster import test_cluster_features
import pytest
@pytest.mark.asyncio
async def test_rolling_upgrade_happy_path(manager: ManagerClient) -> None:
await manager.servers_add(3, auto_rack_dc="dc1")
await test_cluster_features.test_rolling_upgrade_happy_path(manager)
@pytest.mark.asyncio
async def test_downgrade_after_partial_upgrade(manager: ManagerClient) -> None:
await manager.servers_add(3, auto_rack_dc="dc1")
await test_cluster_features.test_downgrade_after_partial_upgrade(manager)
@pytest.mark.asyncio
async def test_joining_old_node_fails(manager: ManagerClient) -> None:
await manager.servers_add(3, auto_rack_dc="dc1")
await test_cluster_features.test_joining_old_node_fails(manager)
@pytest.mark.asyncio
async def test_downgrade_after_successful_upgrade_fails(manager: ManagerClient) -> None:
await manager.servers_add(3, auto_rack_dc="dc1")
await test_cluster_features.test_downgrade_after_successful_upgrade_fails(manager)
@pytest.mark.asyncio
async def test_partial_upgrade_can_be_finished_with_removenode(manager: ManagerClient) -> None:
await manager.servers_add(3, auto_rack_dc="dc1")
await test_cluster_features.test_partial_upgrade_can_be_finished_with_removenode(manager)
@pytest.mark.asyncio
async def test_cannot_disable_cluster_feature_after_all_declare_support(manager: ManagerClient) -> None:
"""Upgrade all nodes to support the test cluster feature, but suppress
the topology coordinator and prevent it from enabling the feature.
Try to downgrade one of the nodes - it should fail because of the
missing feature. Unblock the topology coordinator, restart the node
and observe that the feature was enabled.
"""
servers = await manager.servers_add(3, auto_rack_dc="dc1")
# Rolling restart so that all nodes support the feature - but do not
# allow enabling yet
for srv in servers:
await manager.server_update_config(srv.server_id, 'error_injections_at_startup', [
'raft_topology_suppress_enabling_features',
'features_enable_test_feature',
])
await manager.server_restart(srv.server_id)
# Try to downgrade one node
await manager.server_update_config(servers[0].server_id, 'error_injections_at_startup', [])
await manager.server_stop(servers[0].server_id)
await manager.server_start(servers[0].server_id,
expected_error="Feature 'TEST_ONLY_FEATURE' was previously supported by all nodes in the cluster")
# Unblock enabling features on nodes
for srv in servers[1:]:
await manager.api.disable_injection(srv.ip_addr, 'raft_topology_suppress_enabling_features')
# Re-enable the feature again and restart
await manager.server_update_config(servers[0].server_id, 'error_injections_at_startup', [
'features_enable_test_feature',
])
await manager.server_start(servers[0].server_id)
# Nodes should start supporting the feature
cql = cql = manager.get_cql()
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
await asyncio.gather(*(wait_for_feature('TEST_ONLY_FEATURE', cql, h, time.time() + 60) for h in hosts))
@pytest.mark.asyncio
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
async def test_simulate_upgrade_legacy_to_raft_listener_registration(manager: ManagerClient):
"""
We simulate an upgrade from legacy mode to Raft. Our goal is
to make sure that the cluster successfully reaches the state
where it can start the upgrade procedure.
This test effectively reproduces the problem described
in scylladb/scylladb#18049.
"""
# We need this so that the first logs we wait for appear.
cmdline = ["--logger-log-level", "raft_topology=debug"]
# Tablets and legacy mode are incompatible with each other.
config = {"force_gossip_topology_changes": True,
"tablets_mode_for_new_keyspaces": "disabled"}
error_injection = { "name": "suppress_features", "value": "SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES"}
bad_config = config | {"error_injections_at_startup": [error_injection]}
# We need to bootstrap the nodes one-by-one.
# We can't do it concurrently without Raft.
s1 = await manager.server_add(cmdline=cmdline, config=bad_config)
s2 = await manager.server_add(cmdline=cmdline, config=bad_config)
# Simulate upgrading node 1.
await manager.server_stop_gracefully(s1.server_id)
await manager.server_update_config(s1.server_id, "error_injections_at_startup", [])
log = await manager.server_open_log(s1.server_id)
mark = await log.mark()
await manager.server_start(s1.server_id)
# The node should block after this.
await log.wait_for("Waiting for cluster feature `SUPPORTS_CONSISTENT_TOPOLOGY_CHANGES`", from_mark=mark)
mark = await log.mark()
# Simulate upgrading node 2.
await manager.server_stop_gracefully(s2.server_id)
await manager.server_update_config(s2.server_id, "error_injections_at_startup", [])
await manager.server_start(s2.server_id)
# If everything went smoothly, we'll get to this.
await log.wait_for("The cluster is ready to start upgrade to the raft topology")