mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-28 18:50:53 +00:00
scylla_gdb: use persistent GDB - decrease test execution time
This commit replaces the previous approach of running pytest inside GDB’s Python interpreter. Instead, tests are executed by driving a persistent GDB process externally using pexpect. - pexpect: Python library for controlling interactive programs (used here to send commands to GDB and capture its output) - persistent GDB: keep one GDB session alive across multiple tests instead of starting a new process for each test Tests can now be executed via `./test.py gdb` or with `pytest test/scylla_gdb`. This improves performance and makes failures easier to debug since pytest no longer runs hidden inside GDB subprocesses. Closes scylladb/scylladb#24804
This commit is contained in:
committed by
Nadav Har'El
parent
347c69b7e2
commit
e978cc2a80
@@ -124,6 +124,7 @@ There are several test directories that are excluded from orchestration by `test
|
||||
- test/cql
|
||||
- test/cqlpy
|
||||
- test/rest_api
|
||||
- test/scylla_gdb
|
||||
|
||||
This means that `test.py` will not run tests directly, but will delegate all work to `pytest`.
|
||||
That's why all these directories do not have `suite.yaml` files.
|
||||
|
||||
1
test.py
1
test.py
@@ -68,6 +68,7 @@ PYTEST_RUNNER_DIRECTORIES = [
|
||||
TEST_DIR / 'cqlpy',
|
||||
TEST_DIR / 'rest_api',
|
||||
TEST_DIR / 'nodetool',
|
||||
TEST_DIR / 'scylla_gdb',
|
||||
]
|
||||
|
||||
launch_time = time.monotonic()
|
||||
|
||||
@@ -1,80 +1,92 @@
|
||||
# Copyright 2022-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
"""Conftest for Scylla GDB tests"""
|
||||
|
||||
# This file configures pytest for all tests in this directory, and also
|
||||
# defines common test fixtures for all of them to use. A "fixture" is some
|
||||
# setup which an individual test requires to run; The fixture has setup code
|
||||
# and teardown code, and if multiple tests require the same fixture, it can
|
||||
# be set up only once - while still allowing the user to run individual tests
|
||||
# and automatically setting up the fixtures they need.
|
||||
|
||||
import pytest
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
import gdb as gdb_library
|
||||
except:
|
||||
print('This test must be run inside gdb. Run ./run instead.')
|
||||
exit(1)
|
||||
import pexpect
|
||||
import pytest
|
||||
|
||||
from test.pylib.suite.python import PythonTest
|
||||
from test.pylib.util import LogPrefixAdapter
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption('--scylla-pid', action='store', default=None,
|
||||
help='Process ID of running Scylla to attach gdb to')
|
||||
parser.addoption('--scylla-tmp-dir', action='store', default=None,
|
||||
help='Temporary directory where Scylla runs')
|
||||
@pytest.fixture(scope="module")
|
||||
async def scylla_server(testpy_test: PythonTest | None):
|
||||
"""Return a running Scylla server instance from the active test cluster."""
|
||||
logger_prefix = testpy_test.mode + "/" + testpy_test.uname
|
||||
logger = LogPrefixAdapter(
|
||||
logging.getLogger(logger_prefix), {"prefix": logger_prefix}
|
||||
)
|
||||
scylla_cluster = await testpy_test.suite.clusters.get(logger)
|
||||
scylla_server = next(iter(scylla_cluster.running.values()))
|
||||
|
||||
# Scylla's "scylla-gdb.py" does two things: It configures gdb to add new
|
||||
# "scylla" commands, and it implements a bunch of useful functions in Python.
|
||||
# Doing just the former is easy (just add "-x scylla-gdb.py" when running
|
||||
# gdb), but we also want the latter - we want to be able to use some of those
|
||||
# extra functions in the test code. For that, we need to actually import the
|
||||
# scylla-gdb.py module from the test code here - and remember the module
|
||||
# object.
|
||||
@pytest.fixture(scope="session")
|
||||
def scylla_gdb(request):
|
||||
save_sys_path = sys.path
|
||||
sys.path.insert(1, sys.path[0] + '/../..')
|
||||
# Unfortunately, the file's name includes a dash which requires some
|
||||
# funky workarounds to import.
|
||||
import importlib
|
||||
yield scylla_server
|
||||
|
||||
await testpy_test.suite.clusters.put(scylla_cluster, is_dirty=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def gdb_process(scylla_server, request):
|
||||
"""Spawn an interactive GDB attached to the Scylla process.
|
||||
|
||||
Loads `scylla-gdb.py` and test helpers (`gdb_utils.py`) so tests can run GDB/Python helpers
|
||||
against the live Scylla process.
|
||||
"""
|
||||
scylla_gdb_py = os.path.join(request.fspath.dirname, "..", "..", "scylla-gdb.py")
|
||||
script_py = os.path.join(request.fspath.dirname, "gdb_utils.py")
|
||||
cmd = (
|
||||
f"gdb -q "
|
||||
"--nx "
|
||||
"-iex 'set confirm off' "
|
||||
"-iex 'set pagination off' "
|
||||
f"-se {scylla_server.exe} "
|
||||
f"-p {scylla_server.cmd.pid} "
|
||||
f"-ex set python print-stack full "
|
||||
f"-x {scylla_gdb_py} "
|
||||
f"-x {script_py}"
|
||||
)
|
||||
gdb_process = pexpect.spawn(cmd, maxread=10, searchwindowsize=10)
|
||||
gdb_process.expect_exact("(gdb)")
|
||||
|
||||
yield gdb_process
|
||||
|
||||
gdb_process.terminate()
|
||||
|
||||
|
||||
def execute_gdb_command(
|
||||
gdb_process, scylla_command: str = None, full_command: str = None
|
||||
):
|
||||
"""
|
||||
Execute a command in an interactive GDB session and return its output.
|
||||
|
||||
The command can be provided either as a Scylla GDB command (which will be
|
||||
wrapped and executed via GDB's Python interface) or as a full raw GDB
|
||||
command string.
|
||||
|
||||
The function waits for the GDB prompt to reappear, enforces a timeout,
|
||||
and fails the test if the command does not complete or if GDB reports an
|
||||
error.
|
||||
|
||||
Args:
|
||||
gdb_process (pexpect.pty_spawn.spawn): An active GDB process spawned via pexpect
|
||||
scylla_command (str, optional): A GDB Scylla command (from scylla-gdb.py) to execute.
|
||||
full_command (str, optional): A raw GDB command string to execute.
|
||||
"""
|
||||
command = f"python gdb.execute('scylla {scylla_command}')"
|
||||
if full_command:
|
||||
command = full_command
|
||||
|
||||
gdb_process.sendline(command)
|
||||
try:
|
||||
mod = importlib.import_module("scylla-gdb")
|
||||
except Exception as e:
|
||||
pytest.exit(f'Failed to load scylla-gdb: {e}')
|
||||
sys.path = save_sys_path
|
||||
yield mod
|
||||
gdb_process.expect_exact("(gdb)", timeout=180)
|
||||
except pexpect.exceptions.TIMEOUT:
|
||||
gdb_process.sendcontrol("c")
|
||||
gdb_process.expect_exact("(gdb)", timeout=1)
|
||||
pytest.fail("GDB command did not complete within the timeout period")
|
||||
result = gdb_process.before.decode("utf-8")
|
||||
|
||||
# "gdb" fixture, attaching to a running Scylla and letting the tests
|
||||
# run gdb commands on it. The fixture returns a module
|
||||
# The gdb fixture depends on scylla_gdb, to ensure that the "scylla"
|
||||
# subcommands are loaded into gdb.
|
||||
@pytest.fixture(scope="session")
|
||||
def gdb(request, scylla_gdb):
|
||||
try:
|
||||
gdb_library.lookup_type('size_t')
|
||||
except:
|
||||
pytest.exit('ERROR: Scylla executable was compiled without debugging '
|
||||
'information (-g) so cannot be used to test gdb. Please '
|
||||
'set SCYLLA environment variable.')
|
||||
|
||||
# The gdb tests are known to be broken on aarch64 (see
|
||||
# https://sourceware.org/bugzilla/show_bug.cgi?id=27886) and untested
|
||||
# on anything else. So skip them.
|
||||
if os.uname().machine != 'x86_64':
|
||||
pytest.skip('test/scylla-gdb/conftest.py: gdb tests skipped for non-x86_64')
|
||||
gdb_library.execute('set python print-stack full')
|
||||
scylla_pid = request.config.getoption('scylla_pid')
|
||||
gdb_library.execute(f'attach {scylla_pid}')
|
||||
# FIXME: We can start the test here, but at this point Scylla may be
|
||||
# completely idle. To make the situation more interesting (and, e.g., have
|
||||
# live live tasks for test_misc.py::task()), we can set a breakpoint and
|
||||
# let Scylla run a bit more and stop in the middle of its work. However,
|
||||
# I'm not sure where to set a break point that is actually guaranteed to
|
||||
# happen :(
|
||||
#gdb_library.execute('handle SIG34 SIG35 SIGUSR1 nostop noprint pass')
|
||||
#gdb_library.execute('break sstables::compact_sstables')
|
||||
#gdb_library.execute('continue')
|
||||
yield gdb_library
|
||||
assert "Error" not in result
|
||||
return result
|
||||
|
||||
80
test/scylla_gdb/gdb_utils.py
Normal file
80
test/scylla_gdb/gdb_utils.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# Copyright 2025-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
"""
|
||||
GDB helper functions for `scylla_gdb` tests.
|
||||
They should be loaded to GDB by "-x {dir}/gdb_utils.py}",
|
||||
when loaded, they can be run in gdb e.g. `python get_sstables()`
|
||||
|
||||
Depends on helper functions injected to GDB by `scylla-gdb.py` script.
|
||||
(sharded, for_each_table, seastar_lw_shared_ptr, find_sstables, find_vptrs, resolve,
|
||||
get_seastar_memory_start_and_size).
|
||||
"""
|
||||
|
||||
import gdb
|
||||
import uuid
|
||||
|
||||
|
||||
def get_schema():
|
||||
"""Execute GDB commands to get schema information."""
|
||||
db = sharded(gdb.parse_and_eval('::debug::the_database')).local()
|
||||
table = next(for_each_table(db))
|
||||
ptr = seastar_lw_shared_ptr(table['_schema']).get()
|
||||
print('schema=', ptr)
|
||||
|
||||
|
||||
def get_sstables():
|
||||
"""Execute GDB commands to get sstables information."""
|
||||
sst = next(find_sstables())
|
||||
print(f"sst=(sstables::sstable *)", sst)
|
||||
|
||||
|
||||
def get_task():
|
||||
"""
|
||||
Some commands need a task to work on. The following fixture finds one.
|
||||
Because we stopped Scylla while it was idle, we don't expect to find
|
||||
any ready task with get_local_tasks(), but we can find one with a
|
||||
find_vptrs() loop. I noticed that a nice one (with multiple tasks chained
|
||||
to it for "scylla fiber") is one from http_server::do_accept_one.
|
||||
"""
|
||||
for obj_addr, vtable_addr in find_vptrs():
|
||||
name = resolve(vtable_addr, startswith='vtable for seastar::continuation')
|
||||
if name and 'do_accept_one' in name:
|
||||
print(f"task={obj_addr.cast(gdb.lookup_type('uintptr_t'))}")
|
||||
break
|
||||
|
||||
|
||||
def get_coroutine():
|
||||
"""Similar to get_task(), but looks for a coroutine frame."""
|
||||
target = 'service::topology_coordinator::run() [clone .resume]'
|
||||
for obj_addr, vtable_addr in find_vptrs():
|
||||
name = resolve(vtable_addr)
|
||||
if name and name.strip() == target:
|
||||
print(f"coroutine_config={obj_addr.cast(gdb.lookup_type('uintptr_t'))}")
|
||||
|
||||
|
||||
def coroutine_debug_config(tmpdir):
|
||||
"""
|
||||
Check if scylla_find agrees with find_vptrs, for debugging.
|
||||
|
||||
Execute GDB commands for coroutine debugging with detailed output.
|
||||
This test fails sometimes, but rarely and unreliably.
|
||||
We want to get a coredump from it the next time it fails.
|
||||
Sending a SIGSEGV should induce that.
|
||||
https://github.com/scylladb/scylladb/issues/22501
|
||||
"""
|
||||
target = 'service::topology_coordinator::run() [clone .resume]'
|
||||
target_addr = int(gdb.parse_and_eval(f"&'{target}'"))
|
||||
find_command = f"scylla find -a 0x{target_addr:x}"
|
||||
gdb.write(f"Didn't find {target} (0x{target_addr:x}). Running '{find_command}'\n")
|
||||
mem_range = get_seastar_memory_start_and_size()
|
||||
gdb.execute(find_command)
|
||||
gdb.write(f"Memory range: 0x{mem_range[0]:x} 0x{mem_range[1]:x}\n")
|
||||
gdb.write("Found coroutines:\n")
|
||||
for obj_addr, vtable_addr in find_vptrs():
|
||||
name = resolve(vtable_addr)
|
||||
if name and '.resume' in name.strip():
|
||||
gdb.write(f"{name}\n")
|
||||
core_filename = f"{tmpdir}/../scylla_gdb_coro_task-{uuid.uuid4()}.core"
|
||||
gdb.execute(f"gcore {core_filename}")
|
||||
raise gdb.error(f"No coroutine frames found with expected name. Dumped Scylla core to {core_filename}")
|
||||
@@ -1,119 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
# Use the run.py library from ../cqlpy:
|
||||
sys.path.insert(1, sys.path[0] + '/../cqlpy')
|
||||
import run
|
||||
|
||||
print('Scylla is: ' + run.find_scylla() + '.')
|
||||
|
||||
# Gdb will only work if the executable was built with debug symbols
|
||||
# (e.g., Scylla's release or debug build modes mode, but not dev mode).
|
||||
# Check this quickly up-front, instead of waiting for gdb to fail in a
|
||||
# mysterious way when it can't look up types.
|
||||
if not '.debug_info' in subprocess.run(['objdump', '-h', run.scylla],
|
||||
capture_output=True, text=True).stdout:
|
||||
print(f'Scylla executable was compiled without debugging information (-g)')
|
||||
print(f'so cannot be used to test gdb. Please set SCYLLA environment variable.')
|
||||
exit(1)
|
||||
|
||||
# Run Scylla, waiting until it can respond to CQL
|
||||
pid = run.run_with_temporary_dir(run.run_scylla_cmd)
|
||||
ip = run.pid_to_ip(pid)
|
||||
run.wait_for_services(pid, [lambda: run.check_cql(ip)])
|
||||
|
||||
# We do something strange here: We start pytest *inside* gdb's Python
|
||||
# interpreter. This will allow us to test various gdb commands added
|
||||
# by scylla-gdb.py inside gdb using the pytest framework.
|
||||
# TODO: consider a more straightforward implementation, where we don't
|
||||
# run pytest inside gdb - and instead run gdb as a separate process and
|
||||
# pytest just sends commands to it.
|
||||
# TODO: think if we can avoid code duplication with run.run_ptest().
|
||||
def run_pytest_in_gdb(pytest_dir, executable, additional_parameters):
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
pid = os.fork()
|
||||
if pid == 0:
|
||||
# child:
|
||||
run.run_with_temporary_dir_pids = set() # no children to clean up on child
|
||||
run.run_pytest_pids = set()
|
||||
os.chdir(pytest_dir)
|
||||
pytest_args = ['-o', 'junit_family=xunit2'] + additional_parameters
|
||||
pytest_cmd = f'print("Starting pytest {" ".join(pytest_args)}"); import pytest; sys.argv[0]="pytest"; sys.exit(pytest.main({str(pytest_args)}))'
|
||||
print(f'Starting gdb {executable}')
|
||||
sys.stdout.flush()
|
||||
args = ['gdb',
|
||||
'-batch', '-n',
|
||||
'-ex', 'set python print-stack full',
|
||||
'-ex', 'python ' + pytest_cmd,
|
||||
]
|
||||
if executable:
|
||||
args += ['-se', executable]
|
||||
os.execvp('gdb', args)
|
||||
exit(1)
|
||||
# parent:
|
||||
run.run_pytest_pids.add(pid)
|
||||
if os.waitpid(pid, 0)[1]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def modify_junit_xml_filename(args, filename_modifier):
|
||||
"""
|
||||
Modify the filename part of --junit-xml parameter while preserving the path.
|
||||
|
||||
Args:
|
||||
args: List of command line arguments
|
||||
filename_modifier: suffix to append to the filename part of --junit-xml
|
||||
|
||||
Returns:
|
||||
Modified list of arguments
|
||||
"""
|
||||
if filename_modifier is None:
|
||||
return args
|
||||
|
||||
modified_args = []
|
||||
for arg in args:
|
||||
if arg.startswith('--junit-xml='):
|
||||
junit_xml = Path(arg.split('=', 1)[1])
|
||||
new_junit_xml = junit_xml.with_suffix(f'.{filename_modifier}{junit_xml.suffix}')
|
||||
modified_args.append(f'--junit-xml={new_junit_xml}')
|
||||
else:
|
||||
modified_args.append(arg)
|
||||
|
||||
return modified_args
|
||||
|
||||
# Interesting note: We must use "--scylla-tmp-dir=DIR" here instead of
|
||||
# "--scylla-tmp-dir DIR": While the latter does work, pytest has a bug that
|
||||
# its command-line parser finds the given directory name in the original
|
||||
# command line, saves it as "initialpaths", and uses it to print what it
|
||||
# thinks are nice (but are really incorrect) relative paths for "nodes" (test
|
||||
# source files).
|
||||
success = True
|
||||
for with_scylla in [True, False]:
|
||||
modified_argv = None
|
||||
if with_scylla:
|
||||
args = ['--scylla-pid='+str(pid),
|
||||
'--scylla-tmp-dir='+run.pid_to_dir(pid),
|
||||
'-m', 'not without_scylla']
|
||||
executable = run.scylla
|
||||
modified_argv = modify_junit_xml_filename(sys.argv[1:], 'with_scylla')
|
||||
else:
|
||||
args = ['-m', 'without_scylla']
|
||||
executable = ''
|
||||
modified_argv = modify_junit_xml_filename(sys.argv[1:], 'without_scylla')
|
||||
if not run_pytest_in_gdb(sys.path[0], executable, args + modified_argv):
|
||||
success = False
|
||||
|
||||
run.summary = 'Scylla GDB tests pass' if success else 'Scylla GDB tests failure'
|
||||
|
||||
exit(0 if success else 1)
|
||||
|
||||
# Note that the run.cleanup_all() function runs now, just like on any exit
|
||||
# for any reason in this script. It will delete the temporary files and
|
||||
# announce the failure or success of the test (printing run.summary).
|
||||
@@ -1,3 +0,0 @@
|
||||
type: Run
|
||||
run_in_release:
|
||||
- run
|
||||
76
test/scylla_gdb/test_basic_commands.py
Normal file
76
test/scylla_gdb/test_basic_commands.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# Copyright 2025-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
"""
|
||||
Basic tests for commands that does not require additional options.
|
||||
Each only checks that the command does not fail - but not what it does or returns.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from test.scylla_gdb.conftest import execute_gdb_command
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug"],
|
||||
reason="Scylla was built without debug symbols; use release mode",
|
||||
),
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug", "release"],
|
||||
platform_key="aarch64",
|
||||
reason="GDB is broken on aarch64: https://sourceware.org/bugzilla/show_bug.cgi?id=27886",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"features",
|
||||
"compaction-tasks",
|
||||
"databases",
|
||||
"commitlog",
|
||||
"tables",
|
||||
"table system.local",
|
||||
"tablet-metadata",
|
||||
"keyspaces",
|
||||
"active-sstables",
|
||||
"sstables",
|
||||
"memtables",
|
||||
"repairs",
|
||||
"gms",
|
||||
"heapprof",
|
||||
"io-queues",
|
||||
"cache",
|
||||
"mem-range",
|
||||
"mem-ranges",
|
||||
"memory",
|
||||
"segment-descs",
|
||||
"small-object -o 32 --random-page",
|
||||
"small-object -o 64 --summarize",
|
||||
"large-objects -o 131072 --random-page",
|
||||
"large-objects -o 32768 --summarize",
|
||||
"lsa",
|
||||
"netw",
|
||||
"smp-queues",
|
||||
"task-queues",
|
||||
"task_histogram",
|
||||
"task_histogram -a",
|
||||
"tasks",
|
||||
"threads",
|
||||
"timers",
|
||||
"get-config-value compaction_static_shares",
|
||||
"read-stats",
|
||||
"prepared-statements",
|
||||
],
|
||||
)
|
||||
def test_scylla_commands(gdb_process, command):
|
||||
execute_gdb_command(gdb_process, command)
|
||||
|
||||
|
||||
def test_nonexistent_scylla_command(gdb_process):
|
||||
"""Verifies that running unknown command will produce correct error message"""
|
||||
with pytest.raises(
|
||||
AssertionError, match=r'Undefined scylla command: "nonexistent_command"'
|
||||
):
|
||||
execute_gdb_command(gdb_process, "nonexistent_command")
|
||||
1
test/scylla_gdb/test_config.yaml
Normal file
1
test/scylla_gdb/test_config.yaml
Normal file
@@ -0,0 +1 @@
|
||||
type: Python
|
||||
@@ -1,234 +0,0 @@
|
||||
import pytest
|
||||
import re
|
||||
import uuid
|
||||
|
||||
# Convenience function to execute a scylla command in gdb, returning its
|
||||
# output as a string - or a gdb.error exception.
|
||||
def scylla(gdb, cmd):
|
||||
return gdb.execute('scylla ' + cmd, from_tty=False, to_string=True)
|
||||
|
||||
# Check that trying an unknown subcommand of the "scylla" subcommand
|
||||
# produces the right error message.
|
||||
def test_nonexistent_scylla_command(gdb):
|
||||
with pytest.raises(gdb.error, match='Undefined scylla command'):
|
||||
scylla(gdb, 'nonexistent_command')
|
||||
|
||||
# Minimal test for some of the commands. Each only checks that the command
|
||||
# does not fail - but not what it does or returns. These tests are still
|
||||
# useful - importantly, they can detect that one of the commands relies on
|
||||
# some internal implementation detail which no longer works, and needs to
|
||||
# be fixed.
|
||||
|
||||
def test_features(gdb):
|
||||
scylla(gdb, 'features')
|
||||
|
||||
def test_compaction_tasks(gdb):
|
||||
scylla(gdb, 'compaction-tasks')
|
||||
|
||||
def test_databases(gdb):
|
||||
scylla(gdb, 'databases')
|
||||
|
||||
def test_commitlog(gdb):
|
||||
scylla(gdb, 'commitlog')
|
||||
|
||||
def test_tables(gdb):
|
||||
scylla(gdb, 'tables')
|
||||
|
||||
def test_table(gdb):
|
||||
scylla(gdb, 'table system.local')
|
||||
|
||||
def test_tablet_metadata(gdb):
|
||||
scylla(gdb, 'tablet-metadata')
|
||||
|
||||
def test_keyspaces(gdb):
|
||||
scylla(gdb, 'keyspaces')
|
||||
|
||||
def test_active_sstables(gdb):
|
||||
scylla(gdb, 'active-sstables')
|
||||
|
||||
def test_sstables(gdb):
|
||||
scylla(gdb, 'sstables')
|
||||
|
||||
def test_memtables(gdb):
|
||||
scylla(gdb, 'memtables')
|
||||
|
||||
def test_repairs(gdb):
|
||||
scylla(gdb, 'repairs')
|
||||
|
||||
def test_gms(gdb):
|
||||
scylla(gdb, 'gms')
|
||||
|
||||
def test_heapprof(gdb):
|
||||
scylla(gdb, 'heapprof')
|
||||
|
||||
def test_io_queues(gdb):
|
||||
scylla(gdb, 'io-queues')
|
||||
|
||||
def test_cache(gdb):
|
||||
scylla(gdb, 'cache')
|
||||
|
||||
def test_mem_range(gdb):
|
||||
scylla(gdb, 'mem-range')
|
||||
|
||||
def test_mem_ranges(gdb):
|
||||
scylla(gdb, 'mem-ranges')
|
||||
|
||||
def test_memory(gdb):
|
||||
scylla(gdb, 'memory')
|
||||
|
||||
def test_segment_descs(gdb):
|
||||
scylla(gdb, 'segment-descs')
|
||||
|
||||
def test_small_object_1(gdb):
|
||||
scylla(gdb, 'small-object -o 32 --random-page')
|
||||
|
||||
def test_small_object_2(gdb):
|
||||
scylla(gdb, 'small-object -o 64 --summarize')
|
||||
|
||||
def test_large_objects_1(gdb):
|
||||
scylla(gdb, 'large-objects -o 131072 --random-page')
|
||||
|
||||
def test_large_objects_2(gdb):
|
||||
scylla(gdb, 'large-objects -o 32768 --summarize')
|
||||
|
||||
def test_lsa(gdb):
|
||||
scylla(gdb, 'lsa')
|
||||
|
||||
def test_netw(gdb):
|
||||
scylla(gdb, 'netw')
|
||||
|
||||
def test_smp_queues(gdb):
|
||||
scylla(gdb, 'smp-queues')
|
||||
|
||||
def test_task_queues(gdb):
|
||||
scylla(gdb, 'task-queues')
|
||||
|
||||
def test_task_histogram(gdb):
|
||||
scylla(gdb, 'task_histogram')
|
||||
|
||||
def test_task_histogram_coro(gdb):
|
||||
h = scylla(gdb, 'task_histogram -a')
|
||||
if re.search(r'\) \[clone \.\w+\]', h) is None:
|
||||
raise gdb.error('no coroutine entries in task histogram')
|
||||
|
||||
def test_tasks(gdb):
|
||||
scylla(gdb, 'tasks')
|
||||
|
||||
def test_threads(gdb):
|
||||
scylla(gdb, 'threads')
|
||||
|
||||
def test_timers(gdb):
|
||||
scylla(gdb, 'timers')
|
||||
|
||||
# Some commands need a schema to work on. The following fixture finds
|
||||
# one (the schema of the first table - note that even without any user
|
||||
# tables, we will always have system tables).
|
||||
@pytest.fixture(scope="module")
|
||||
def schema(gdb, scylla_gdb):
|
||||
db = scylla_gdb.sharded(gdb.parse_and_eval('::debug::the_database')).local()
|
||||
table = next(scylla_gdb.for_each_table(db))
|
||||
gdb.set_convenience_variable('schema',
|
||||
scylla_gdb.seastar_lw_shared_ptr(table['_schema']).get())
|
||||
yield '$schema'
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def sstable(gdb, scylla_gdb):
|
||||
db = scylla_gdb.sharded(gdb.parse_and_eval('::debug::the_database')).local()
|
||||
sst = next(scylla_gdb.find_sstables())
|
||||
gdb.set_convenience_variable('sst', sst)
|
||||
yield '$sst'
|
||||
|
||||
def test_schema(gdb, schema):
|
||||
scylla(gdb, f'schema {schema}')
|
||||
|
||||
def test_find(gdb, schema):
|
||||
scylla(gdb, f'find -r {schema}')
|
||||
|
||||
def test_ptr(gdb, schema):
|
||||
scylla(gdb, f'ptr {schema}')
|
||||
|
||||
def test_generate_object_graph(gdb, schema, request):
|
||||
tmpdir = request.config.getoption('scylla_tmp_dir')
|
||||
scylla(gdb, f'generate-object-graph -o {tmpdir}/og.dot -d 2 -t 10 {schema}')
|
||||
|
||||
# Some commands need a task to work on. The following fixture finds one.
|
||||
# Because we stopped Scylla while it was idle, we don't expect to find
|
||||
# any ready task with get_local_tasks(), but we can find one with a
|
||||
# find_vptrs() loop. I noticed that a nice one (with multiple tasks chained
|
||||
# to it for "scylla fiber") is one from http_server::do_accept_one.
|
||||
@pytest.fixture(scope="module")
|
||||
def task(gdb, scylla_gdb):
|
||||
for obj_addr, vtable_addr in scylla_gdb.find_vptrs():
|
||||
name = scylla_gdb.resolve(vtable_addr, startswith='vtable for seastar::continuation')
|
||||
if name and 'do_accept_one' in name:
|
||||
return obj_addr.cast(gdb.lookup_type('uintptr_t'))
|
||||
raise gdb.error("no tasks found with expected name")
|
||||
|
||||
def test_fiber(gdb, task):
|
||||
scylla(gdb, f'fiber {task}')
|
||||
|
||||
# Similar to task(), but looks for a coroutine frame.
|
||||
@pytest.fixture(scope="module")
|
||||
def coro_task(gdb, scylla_gdb, request):
|
||||
target = 'service::topology_coordinator::run() [clone .resume]'
|
||||
for obj_addr, vtable_addr in scylla_gdb.find_vptrs():
|
||||
name = scylla_gdb.resolve(vtable_addr)
|
||||
if name and name.strip() == target:
|
||||
return obj_addr.cast(gdb.lookup_type('uintptr_t'))
|
||||
# Something is wrong. We should have found the target.
|
||||
# Check if scylla_find agrees with find_vptrs, for debugging.
|
||||
target_addr = int(gdb.parse_and_eval(f"&'{target}'"))
|
||||
find_command = f"scylla find -a 0x{target_addr:x}"
|
||||
gdb.write(f"Didn't find {target} (0x{target_addr:x}). Running '{find_command}'\n")
|
||||
mem_range = scylla_gdb.get_seastar_memory_start_and_size()
|
||||
gdb.execute(find_command)
|
||||
gdb.write(f"Memory range: 0x{mem_range[0]:x} 0x{mem_range[1]:x}\n")
|
||||
gdb.write(f"Found coroutines:\n")
|
||||
for obj_addr, vtable_addr in scylla_gdb.find_vptrs():
|
||||
name = scylla_gdb.resolve(vtable_addr)
|
||||
if name and '.resume' in name.strip():
|
||||
gdb.write(f'{name}\n')
|
||||
# This test fails sometimes, but rarely and unreliably.
|
||||
# We want to get a coredump from it the next time it fails.
|
||||
# https://github.com/scylladb/scylladb/issues/22501
|
||||
tmpdir = request.config.getoption('scylla_tmp_dir')
|
||||
core_filename = f"{tmpdir}/../scylla_gdb_coro_task-{uuid.uuid4()}.core"
|
||||
gdb.execute(f"gcore {core_filename}")
|
||||
raise gdb.error(f"No coroutine frames found with expected name. Dumped Scylla core to {core_filename}")
|
||||
|
||||
def test_coro_frame(gdb, coro_task):
|
||||
# Note the offset by two words.
|
||||
# This moves the pointer from the outer coroutine frame to the inner seastar::task.
|
||||
# $coro_frame expects a seastar::task*.
|
||||
gdb.execute(f'p *$coro_frame({coro_task} + 16)')
|
||||
|
||||
def test_sstable_summary(gdb, sstable):
|
||||
scylla(gdb, f'sstable-summary {sstable}')
|
||||
|
||||
def test_sstable_index_cache(gdb, sstable):
|
||||
scylla(gdb, f'sstable-index-cache {sstable}')
|
||||
|
||||
def test_read_stats(gdb, sstable):
|
||||
scylla(gdb, f'read-stats')
|
||||
|
||||
def test_get_config_value(gdb):
|
||||
scylla(gdb, f'get-config-value compaction_static_shares')
|
||||
|
||||
def test_prepared_statements(gdb):
|
||||
scylla(gdb, f'prepared-statements')
|
||||
|
||||
@pytest.mark.without_scylla
|
||||
def test_run_without_scylla(scylla_gdb):
|
||||
# just try to load the scylla-gdb module without attaching to scylla.
|
||||
#
|
||||
# please note, if this test fails, there are good chances that scylla-gdb.py
|
||||
# is unable to load without debug symbols. Calls to "gdb.lookup_type()" and
|
||||
# similar functions that rely on debug symbols should be made within GDB
|
||||
# commands themselves when they get exuecuted. To address potential
|
||||
# failures, consider moving code that references debug symbols into a code
|
||||
# path executed only when debug symbols is loaded. If the value of the
|
||||
# symbol is a constant, consider caching it. using functools.cache
|
||||
# decorator.
|
||||
_ = scylla_gdb
|
||||
|
||||
# FIXME: need a simple test for lsa-segment
|
||||
57
test/scylla_gdb/test_schema_commands.py
Normal file
57
test/scylla_gdb/test_schema_commands.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Copyright 2025-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
"""
|
||||
Tests for commands, that need a schema to work on.
|
||||
Each only checks that the command does not fail - but not what it does or returns.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import re
|
||||
|
||||
from test.scylla_gdb.conftest import execute_gdb_command
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug"],
|
||||
reason="Scylla was built without debug symbols; use release mode",
|
||||
),
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug", "release"],
|
||||
platform_key="aarch64",
|
||||
reason="GDB is broken on aarch64: https://sourceware.org/bugzilla/show_bug.cgi?id=27886",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def schema(gdb_process):
|
||||
"""
|
||||
Returns pointer to schema of the first table it finds
|
||||
Even without any user tables, we will always have system tables.
|
||||
"""
|
||||
result = execute_gdb_command(gdb_process, full_command="python get_schema()")
|
||||
match = re.search(r"schema=\s*(0x[0-9a-fA-F]+)", result)
|
||||
assert match, f"Failed to find schema pointer in response: {result}"
|
||||
schema_pointer = match.group(1) if match else None
|
||||
|
||||
return schema_pointer
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"find -r",
|
||||
"ptr",
|
||||
"schema (const schema *)", # `schema` requires type-casted pointer
|
||||
],
|
||||
)
|
||||
def test_schema(gdb_process, command, schema):
|
||||
execute_gdb_command(gdb_process, f"{command} {schema}")
|
||||
|
||||
|
||||
def test_generate_object_graph(gdb_process, schema, request):
|
||||
tmpdir = request.config.getoption("--tmpdir")
|
||||
execute_gdb_command(
|
||||
gdb_process, f"generate-object-graph -o {tmpdir}/og.dot -d 2 -t 10 {schema}"
|
||||
)
|
||||
46
test/scylla_gdb/test_sstable_commands.py
Normal file
46
test/scylla_gdb/test_sstable_commands.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Copyright 2025-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
"""
|
||||
Tests for commands, that need a sstable to work on.
|
||||
Each only checks that the command does not fail - but not what it does or returns.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import re
|
||||
|
||||
from test.scylla_gdb.conftest import execute_gdb_command
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug"],
|
||||
reason="Scylla was built without debug symbols; use release mode",
|
||||
),
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug", "release"],
|
||||
platform_key="aarch64",
|
||||
reason="GDB is broken on aarch64: https://sourceware.org/bugzilla/show_bug.cgi?id=27886",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def sstable(gdb_process):
|
||||
"""Finds sstable"""
|
||||
result = execute_gdb_command(gdb_process, full_command="python get_sstables()")
|
||||
match = re.search(r"(\(sstables::sstable \*\) 0x)([0-9a-f]+)", result)
|
||||
assert match is not None, "No sstable was present in result.stdout"
|
||||
sstable_pointer = match.group(0).strip() if match else None
|
||||
|
||||
return sstable_pointer
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
"sstable-summary",
|
||||
"sstable-index-cache",
|
||||
],
|
||||
)
|
||||
def test_sstable(gdb_process, command, sstable):
|
||||
execute_gdb_command(gdb_process, f"{command} {sstable}")
|
||||
85
test/scylla_gdb/test_task_commands.py
Normal file
85
test/scylla_gdb/test_task_commands.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# Copyright 2025-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
"""
|
||||
Tests for commands, that need a some task to work on.
|
||||
Each only checks that the command does not fail - but not what it does or returns.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from test.scylla_gdb.conftest import execute_gdb_command
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug"],
|
||||
reason="Scylla was built without debug symbols; use release mode",
|
||||
),
|
||||
pytest.mark.skip_mode(
|
||||
mode=["dev", "debug", "release"],
|
||||
platform_key="aarch64",
|
||||
reason="GDB is broken on aarch64: https://sourceware.org/bugzilla/show_bug.cgi?id=27886",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def task(gdb_process):
|
||||
"""
|
||||
Finds a Scylla fiber task using a `find_vptrs()` loop.
|
||||
|
||||
Since Scylla is fresh‑booted, `get_local_tasks()` returns nothing.
|
||||
Nevertheless, a `find_vptrs()` scan can still discover the first task
|
||||
skeleton created by `http_server::do_accept_one` (often the earliest
|
||||
“Scylla fiber” to appear).
|
||||
"""
|
||||
result = execute_gdb_command(gdb_process, full_command="python get_task()")
|
||||
match = re.search(r"task=(\d+)", result)
|
||||
assert match is not None, f"No task was present in {result.stdout}"
|
||||
task = match.group(1) if match else None
|
||||
return task
|
||||
|
||||
|
||||
def test_fiber(gdb_process, task):
|
||||
execute_gdb_command(gdb_process, f"fiber {task}")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def coroutine_task(gdb_process, scylla_server):
|
||||
"""
|
||||
Finds a coroutine task, similar to the `task` fixture.
|
||||
|
||||
This fixture executes the `coroutine_config` script in GDB to locate a
|
||||
specific coroutine task. If the task is not found, the `coroutine_debug_config`
|
||||
debugging script is called which checks if scylla_find agrees with find_vptrs.
|
||||
|
||||
This debugging script then forces a coredump to capture additional
|
||||
diagnostic information before the test is marked as failed.
|
||||
Coredump is saved to `testlog/release/{scylla}`.
|
||||
"""
|
||||
result = execute_gdb_command(gdb_process, full_command="python get_coroutine()")
|
||||
match = re.search(r"coroutine_config=\s*(.*)", result)
|
||||
if not match:
|
||||
result = execute_gdb_command(
|
||||
gdb_process,
|
||||
full_command=f"python coroutine_debug_config('{scylla_server.workdir}')",
|
||||
)
|
||||
pytest.fail(
|
||||
f"Failed to find coroutine task. Debugging logs have been collected\n"
|
||||
f"Debugging code result: {result}\n"
|
||||
)
|
||||
|
||||
return match.group(1).strip()
|
||||
|
||||
|
||||
def test_coroutine_frame(gdb_process, coroutine_task):
|
||||
"""
|
||||
Offsets the pointer by two words to shift from the outer coroutine frame
|
||||
to the inner `seastar::task`, as required by `$coro_frame`, which expects
|
||||
a `seastar::task*`.
|
||||
"""
|
||||
execute_gdb_command(
|
||||
gdb_process, full_command=f"p *$coro_frame({coroutine_task} + 16)"
|
||||
)
|
||||
Reference in New Issue
Block a user