mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-25 11:00:35 +00:00
Compare commits
3 Commits
ykaul/skip
...
next
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70261dc674 | ||
|
|
e414b2b0b9 | ||
|
|
99ac36b353 |
@@ -1088,7 +1088,7 @@ void compaction_manager::register_metrics() {
|
||||
sm::make_gauge("normalized_backlog", [this] { return _last_backlog / available_memory(); },
|
||||
sm::description("Holds the sum of normalized compaction backlog for all tables in the system. Backlog is normalized by dividing backlog by shard's available memory.")),
|
||||
sm::make_counter("validation_errors", [this] { return _validation_errors; },
|
||||
sm::description("Holds the number of encountered validation errors.")).set_skip_when_empty(),
|
||||
sm::description("Holds the number of encountered validation errors.")),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ from concurrent.futures.thread import ThreadPoolExecutor
|
||||
from multiprocessing import Event
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from test import TOP_SRC_DIR, path_to
|
||||
from test import TOP_SRC_DIR, MODES_TIMEOUT_FACTOR, path_to
|
||||
from test.pylib.random_tables import RandomTables
|
||||
from test.pylib.skip_types import skip_env
|
||||
from test.pylib.util import unique_name
|
||||
@@ -394,3 +394,8 @@ async def key_provider(request, tmpdir, scylla_binary):
|
||||
"""Encryption providers fixture"""
|
||||
async with make_key_provider_factory(request.param, tmpdir, scylla_binary) as res:
|
||||
yield res
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def failure_detector_timeout(build_mode):
|
||||
return 2000 * MODES_TIMEOUT_FACTOR[build_mode]
|
||||
|
||||
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_kill_coordinator_during_op(manager: ManagerClient) -> None:
|
||||
async def test_kill_coordinator_during_op(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
""" Kill coordinator with error injection while topology operation is running for cluster: decommission,
|
||||
bootstrap, removenode, replace.
|
||||
|
||||
@@ -41,7 +41,7 @@ async def test_kill_coordinator_during_op(manager: ManagerClient) -> None:
|
||||
"""
|
||||
# Decrease the failure detector threshold so we don't have to wait for too long.
|
||||
config = {
|
||||
'failure_detector_timeout_in_ms': 2000
|
||||
'failure_detector_timeout_in_ms': failure_detector_timeout
|
||||
}
|
||||
cmdline = [
|
||||
'--logger-log-level', 'raft_topology=trace',
|
||||
|
||||
@@ -22,11 +22,11 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.nightly
|
||||
async def test_banned_node_notification(manager: ManagerClient) -> None:
|
||||
async def test_banned_node_notification(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
"""Test that a node banned from the cluster get notification about been banned"""
|
||||
# Decrease the failure detector threshold so we don't have to wait for too long.
|
||||
config = {
|
||||
'failure_detector_timeout_in_ms': 2000
|
||||
'failure_detector_timeout_in_ms': failure_detector_timeout
|
||||
}
|
||||
srvs = await manager.servers_add(3, config=config, auto_rack_dc="dc")
|
||||
cql = manager.get_cql()
|
||||
|
||||
@@ -60,14 +60,14 @@ async def make_servers(manager: ManagerClient, servers_num: int,
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raft_replace_ignore_nodes(manager: ManagerClient) -> None:
|
||||
async def test_raft_replace_ignore_nodes(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
"""Replace 3 dead nodes.
|
||||
|
||||
This is a slow test with a 7 node cluster and 3 replace operations,
|
||||
we want to run it only in dev mode.
|
||||
"""
|
||||
logger.info("Booting initial cluster")
|
||||
servers = await make_servers(manager, 7, config={'failure_detector_timeout_in_ms': 2000})
|
||||
servers = await make_servers(manager, 7, config={'failure_detector_timeout_in_ms': failure_detector_timeout})
|
||||
|
||||
s1_id = await manager.get_host_id(servers[1].server_id)
|
||||
s2_id = await manager.get_host_id(servers[2].server_id)
|
||||
|
||||
@@ -21,9 +21,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replace_different_ip(manager: ManagerClient) -> None:
|
||||
async def test_replace_different_ip(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
"""Replace an existing node with new node using a different IP address"""
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': 2000})
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': failure_detector_timeout})
|
||||
logger.info(f"cluster started, servers {servers}")
|
||||
|
||||
logger.info(f"replacing server {servers[0]}")
|
||||
@@ -67,18 +67,18 @@ async def test_replace_different_ip(manager: ManagerClient) -> None:
|
||||
logger.info(f"server {s} system.peers and gossiper state is valid")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replace_different_ip_using_host_id(manager: ManagerClient) -> None:
|
||||
async def test_replace_different_ip_using_host_id(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
"""Replace an existing node with new node reusing the replaced node host id"""
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': 2000})
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': failure_detector_timeout})
|
||||
await manager.server_stop(servers[0].server_id)
|
||||
replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = False, use_host_id = True)
|
||||
await manager.server_add(replace_cfg)
|
||||
await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replace_reuse_ip(request, manager: ManagerClient) -> None:
|
||||
async def test_replace_reuse_ip(request, manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
"""Replace an existing node with new node using the same IP address"""
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': 2000}, auto_rack_dc="dc1")
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': failure_detector_timeout}, auto_rack_dc="dc1")
|
||||
host2 = (await wait_for_cql_and_get_hosts(manager.get_cql(), [servers[2]], time.time() + 60))[0]
|
||||
|
||||
logger.info(f"creating test table")
|
||||
@@ -130,9 +130,9 @@ async def test_replace_reuse_ip(request, manager: ManagerClient) -> None:
|
||||
await manager.server_sees_other_server(servers[2].ip_addr, servers[0].ip_addr)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replace_reuse_ip_using_host_id(manager: ManagerClient) -> None:
|
||||
async def test_replace_reuse_ip_using_host_id(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
"""Replace an existing node with new node using the same IP address and same host id"""
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': 2000})
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': failure_detector_timeout})
|
||||
await manager.server_stop(servers[0].server_id)
|
||||
replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = True, use_host_id = True)
|
||||
await manager.server_add(replace_cfg)
|
||||
|
||||
@@ -14,9 +14,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_replace_with_same_ip_twice(manager: ManagerClient) -> None:
|
||||
async def test_replace_with_same_ip_twice(manager: ManagerClient, failure_detector_timeout) -> None:
|
||||
logger.info("starting a cluster with two nodes")
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': 2000})
|
||||
servers = await manager.servers_add(3, config={'failure_detector_timeout_in_ms': failure_detector_timeout})
|
||||
logger.info(f"cluster started {servers}")
|
||||
|
||||
async def replace_with_same_ip(s: ServerInfo) -> ServerInfo:
|
||||
|
||||
@@ -119,14 +119,14 @@ async def test_tablet_transition_sanity(manager: ManagerClient, action):
|
||||
@pytest.mark.parametrize("fail_stage", ["streaming", "allow_write_both_read_old", "write_both_read_old", "write_both_read_new", "use_new", "cleanup", "cleanup_target", "end_migration", "revert_migration"])
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_node_failure_during_tablet_migration(manager: ManagerClient, fail_replica, fail_stage):
|
||||
async def test_node_failure_during_tablet_migration(manager: ManagerClient, fail_replica, fail_stage, failure_detector_timeout):
|
||||
if fail_stage == 'cleanup' and fail_replica == 'destination':
|
||||
skip_env('Failing destination during cleanup is pointless')
|
||||
if fail_stage == 'cleanup_target' and fail_replica == 'source':
|
||||
skip_env('Failing source during target cleanup is pointless')
|
||||
|
||||
logger.info("Bootstrapping cluster")
|
||||
cfg = {'enable_user_defined_functions': False, 'tablets_mode_for_new_keyspaces': 'enabled', 'failure_detector_timeout_in_ms': 2000}
|
||||
cfg = {'enable_user_defined_functions': False, 'tablets_mode_for_new_keyspaces': 'enabled', 'failure_detector_timeout_in_ms': failure_detector_timeout}
|
||||
host_ids = []
|
||||
servers = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user