From 6cb4c27f8ccfabe9a2b39e0101f7d26679d26a64 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 19 Apr 2026 14:38:33 +0300 Subject: [PATCH 1/3] test/cluster/dtest/ccmlib/scylla_node: add debug logging Signed-off-by: Benny Halevy --- test/cluster/dtest/ccmlib/scylla_node.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/cluster/dtest/ccmlib/scylla_node.py b/test/cluster/dtest/ccmlib/scylla_node.py index e09b893b19..258e036fdf 100644 --- a/test/cluster/dtest/ccmlib/scylla_node.py +++ b/test/cluster/dtest/ccmlib/scylla_node.py @@ -17,6 +17,7 @@ from itertools import chain from functools import cached_property from pathlib import Path from typing import TYPE_CHECKING, Any +import logging from test.cluster.dtest.ccmlib.common import ArgumentError, wait_for, BIN_DIR from test.pylib.internal_types import ServerUpState @@ -28,6 +29,9 @@ if TYPE_CHECKING: from test.cluster.dtest.ccmlib.scylla_cluster import ScyllaCluster +logger = logging.getLogger("scylla_node") + + NODETOOL_STDERR_IGNORED_PATTERNS = ( re.compile(r"WARNING: debug mode. Not for benchmarking or production"), re.compile( @@ -149,15 +153,20 @@ class ScyllaNode: return self.cluster.scylla_mode def set_smp(self, smp: int) -> None: + logger.debug(f"Setting smp: {self=} {smp=}") self._smp_set_during_test = smp def smp(self) -> int: + logger.debug(f"Getting smp: {self=} _smp_set_during_test={self._smp_set_during_test} _smp={self._smp} {DEFAULT_SMP=}") return self._smp_set_during_test or self._smp or DEFAULT_SMP def memory(self) -> int: return self._memory or self.smp() * DEFAULT_MEMORY_PER_CPU def _adjust_smp_and_memory(self, smp: int | None = None, memory: int | None = None) -> None: + if not memory and not smp: + return + logger.debug(f"Adjusting smp={smp} memory={memory} current_smp={self._smp} current_memory={self._memory}") if memory: self._memory = memory // (smp or self.smp()) * self.smp() if smp: @@ -446,6 +455,8 @@ class ScyllaNode: self.mark = self.mark_log() + logger.debug(f"Starting server: server_id={self.server_id} {scylla_args=} {scylla_env=}") + self.cluster.manager.server_start( server_id=self.server_id, seeds=None if self.bootstrap else [self.address()], From 7430c1efd77aa60ae64fc29e70f14c99bff42ed9 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 19 Apr 2026 12:18:01 +0300 Subject: [PATCH 2/3] test/cluster/dtest: cache ScyllaNode instances in ScyllaCluster ScyllaCluster.nodelist() was creating new ScyllaNode objects on every call, so per-node state set via set_smp(), set_log_level(), and _adjust_smp_and_memory() was lost between calls. Fix by caching ScyllaNode instances in a list populated by _add_nodes() using the list returned by servers_add() in populate(). Nodes are assigned monotonically increasing names (node1, node2, ...). nodelist() simply returns the cached list. --- test/cluster/dtest/ccmlib/scylla_cluster.py | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/test/cluster/dtest/ccmlib/scylla_cluster.py b/test/cluster/dtest/ccmlib/scylla_cluster.py index ca49dbe153..7416c6895a 100644 --- a/test/cluster/dtest/ccmlib/scylla_cluster.py +++ b/test/cluster/dtest/ccmlib/scylla_cluster.py @@ -11,13 +11,11 @@ from typing import TYPE_CHECKING from cassandra.auth import PlainTextAuthProvider -from test.pylib.internal_types import ServerInfo from test.pylib.manager_client import ManagerClient from test.cluster.dtest.ccmlib.common import logger from test.cluster.dtest.ccmlib.scylla_node import ScyllaNode if TYPE_CHECKING: - from collections.abc import Iterable from typing import Any @@ -29,6 +27,10 @@ class ScyllaCluster: self.manager = manager self.scylla_mode = scylla_mode self._config_options = {} + # Cached ScyllaNode instances. Nodes are appended by _add_nodes() + # in the order they are created by servers_add(). + self._nodes: list[ScyllaNode] = [] + self._next_node_num: int = 1 if self.scylla_mode == "debug": self.default_wait_other_notice_timeout = 600 @@ -39,19 +41,20 @@ class ScyllaCluster: self.force_wait_for_cluster_start = force_wait_for_cluster_start - @staticmethod - def _sorted_nodes(servers: Iterable[ServerInfo]) -> list[ServerInfo]: - return sorted(servers, key=lambda s: s.server_id) + def _add_nodes(self, servers: list) -> None: + """Create ScyllaNode instances for the given servers and cache them.""" + for server in servers: + name = f"node{self._next_node_num}" + self._next_node_num += 1 + self._nodes.append(ScyllaNode( + cluster=self, server=server, name=name)) @property def nodes(self) -> dict[str, ScyllaNode]: return {node.name: node for node in self.nodelist()} def nodelist(self) -> list[ScyllaNode]: - return [ - ScyllaNode(cluster=self, server=server, name=f"node{n}") - for n, server in enumerate(self._sorted_nodes(self.manager.all_servers()), start=1) - ] + return list(self._nodes) def get_node_ip(self, nodeid: int) -> str: return self.nodelist()[nodeid-1].address() @@ -61,16 +64,16 @@ class ScyllaCluster: self.manager.auth_provider = PlainTextAuthProvider(username="cassandra", password="cassandra") match nodes: case int(): - self.manager.servers_add(servers_num=nodes, config=self._config_options, start=False, auto_rack_dc="dc1") + self._add_nodes(self.manager.servers_add(servers_num=nodes, config=self._config_options, start=False, auto_rack_dc="dc1")) case list(): for dc, n_nodes in enumerate(nodes, start=1): dc_name = f"dc{dc}" - self.manager.servers_add( + self._add_nodes(self.manager.servers_add( servers_num=n_nodes, config=self._config_options, start=False, auto_rack_dc=dc_name - ) + )) case dict(): # Supported spec: {"dc1": {"rack1": 3, "rack2": 2}, "dc2": {"rack1": 2}} for dc, dc_nodes in nodes.items(): @@ -79,7 +82,7 @@ class ScyllaCluster: for rack, rack_nodes in dc_nodes.items(): if not isinstance(rack_nodes, int): raise RuntimeError(f"Unsupported topology specification: {nodes}") - self.manager.servers_add( + self._add_nodes(self.manager.servers_add( servers_num=rack_nodes, config=self._config_options, property_file={ @@ -87,7 +90,7 @@ class ScyllaCluster: "rack": rack, }, start=False, - ) + )) case _: raise RuntimeError(f"Unsupported topology specification: {nodes}") From 5eaa979f358966250eeeb3b35c940f39f55d650b Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 19 Apr 2026 12:17:54 +0300 Subject: [PATCH 3/3] test/cluster/dtest: add test for node.set_smp() persistence Add a test that reproduces SCYLLADB-1629: set_smp() had no effect because nodelist() created new ScyllaNode objects on every call, losing the _smp_set_during_test value. The test fails without the fix in the previous patch. --- test/cluster/dtest/set_smp_test.py | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 test/cluster/dtest/set_smp_test.py diff --git a/test/cluster/dtest/set_smp_test.py b/test/cluster/dtest/set_smp_test.py new file mode 100644 index 0000000000..4e0b40b665 --- /dev/null +++ b/test/cluster/dtest/set_smp_test.py @@ -0,0 +1,46 @@ +# +# Copyright (C) 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 +# + +import logging + +import pytest + +from dtest_class import Tester + +logger = logging.getLogger(__file__) + + +@pytest.mark.single_node +class TestSetSmp(Tester): + """Test that node.set_smp() properly persists across restarts.""" + + def _get_smp_from_log(self, node, from_mark=None): + """Extract smp value from the node's log by looking at the SHARD_COUNT gossip value.""" + matches = node.grep_log(r"SHARD_COUNT : Value\((\d+),\d+\)", from_mark=from_mark) + assert matches, "Could not find SHARD_COUNT in node log" + # Return the last match (most recent start) + return int(matches[-1][1].group(1)) + + def test_set_smp(self): + """Verify that set_smp() takes effect on the next start.""" + cluster = self.cluster + cluster.populate(1).start(wait_for_binary_proto=True) + node1 = cluster.nodelist()[0] + + default_smp = self._get_smp_from_log(node1) + + cluster.stop() + + # set_smp to a different value and restart without jvm_args + target_smp = 1 if default_smp != 1 else 2 + node1.set_smp(target_smp) + mark = node1.mark_log() + cluster.start(wait_for_binary_proto=True) + + node1 = cluster.nodelist()[0] + actual_smp = self._get_smp_from_log(node1, from_mark=mark) + assert actual_smp == target_smp, \ + f"Expected smp={target_smp} after set_smp({target_smp}), got {actual_smp}"