Files
scylladb/test/cluster/storage/conftest.py
Tomasz Grabiec 624fe11178 test: Annotate server_stop() calls where conviction is useless
Pass convict=False explicitly to server_stop() calls where conviction
provides no benefit because there is no consumer of the failure
detection:

 - single-node clusters (no other node to call the API on)
 - all nodes being stopped concurrently (no live node remains)
 - immediate restart (no test logic between stop and start
   depends on other nodes detecting the stopped node as dead)
 - node stopped for file manipulation or bootstrap abort
 - majority killed with no quorum on surviving nodes to react
 - no test logic depends on other nodes detecting the failure

This is a no-op change since the default is already convict=False,
but makes the intent explicit for each call site.
2026-05-21 21:13:55 +02:00

99 lines
4.0 KiB
Python

#!/usr/bin/python3
#
# Copyright (C) 2025-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
#
import pytest
import logging
import os
import pathlib
import shutil
import subprocess
import time
import uuid
from typing import Callable
from contextlib import asynccontextmanager, contextmanager
from dataclasses import dataclass
from test.cluster.conftest import PHASE_REPORT_KEY
from test.pylib.manager_client import ManagerClient
from test.pylib.util import gather_safely
logger = logging.getLogger(__name__)
@pytest.fixture(scope="function")
def volumes_factory(pytestconfig, build_mode, request):
hash = str(uuid.uuid4())
base = pathlib.Path(f"{pytestconfig.getoption("tmpdir")}/{build_mode}/volumes/{hash}")
volumes = []
@dataclass
class VolumeInfo:
img: pathlib.Path
mount: pathlib.Path
log: pathlib.Path
@contextmanager
def wrapper(sizes: list[str]):
try:
for id, size in enumerate(sizes):
path = base / f"scylla-{id}"
path.mkdir(parents=True)
volume = VolumeInfo(path.with_name(f"{path.name}.img"), path, path.with_name(f"{path.name}.log"))
subprocess.run(["truncate", "-s", size, volume.img], check=True)
subprocess.run(["mkfs.ext4", volume.img], check=True, stdout=subprocess.DEVNULL)
# -o uid=... and -o gid=... to avoid root:root ownership of mounted files
# -o fakeroot to avoid permission denied errors on creating files inside docker
subprocess.run(["fuse2fs", "-o", f"uid={os.getuid()}", "-o", f"gid={os.getgid()}", "-o", "fakeroot", volume.img, volume.mount], check=True)
volumes.append(volume)
yield volumes
finally:
pass
yield wrapper
# Unmount volumes and optionally preserve data. Copying cannot be done in the finally
# clause of the wrapper above as at that point test is not yet marked as failed. So the
# copy and consequently volumes cleanup have to be done here.
reports = request.node.stash[PHASE_REPORT_KEY]
call_report = reports.get("call")
test_failed = call_report is not None and call_report.failed
preserve_data = test_failed or request.config.getoption("save_log_on_success")
for id, volume in enumerate(volumes):
if preserve_data:
shutil.copytree(volume.mount, base.parent.parent / f"scylla-{hash}-{id}", ignore=shutil.ignore_patterns('commitlog*', 'lost+found*'))
shutil.copyfile(volume.log, base.parent.parent / f"scylla-{hash}-{id}.log")
retries = 10
for attempt in range(retries):
result = subprocess.run(["fusermount3", "-u", volume.mount], capture_output=True, text=True)
if result.returncode == 0:
break
logger.warning("fusermount3 -u attempt %d/%d failed: %s", attempt + 1, retries, result.stderr.strip())
if attempt + 1 == retries:
raise subprocess.CalledProcessError(result.returncode, result.args, result.stdout, result.stderr)
time.sleep(0.5)
os.unlink(volume.img)
@asynccontextmanager
async def space_limited_servers(manager: ManagerClient, volumes_factory: Callable, sizes: list[str], property_file=None, **server_args):
servers = []
cmdline = server_args.pop("cmdline", [])
with volumes_factory(sizes) as volumes:
try:
if not property_file:
property_file = [{"dc": "dc1", "rack": f"r{id}"} for id in range(len(volumes))]
servers = [await manager.server_add(cmdline = [*cmdline, '--workdir', str(volume.mount)],
property_file=property_file[id],
**server_args) for id, volume in enumerate(volumes)]
yield servers
finally:
# Stop servers to be able to unmount volumes
await gather_safely(*(manager.server_stop(server.server_id, convict=False) for server in servers))