Files
scylladb/test/cluster/test_describe.py
Dario Mirovic cf237e060a test: auth_cluster: use safe_driver_shutdown() for Cluster teardown
A handful of cassandra-driver Cluster.shutdown() call sites in the
auth_cluster tests were missed by the previous sweep that introduced
safe_driver_shutdown(), because the local variable holding the Cluster
is named "c" rather than "cluster".

Direct Cluster.shutdown() is racy: the driver's "Task Scheduler"
thread may raise RuntimeError ("cannot schedule new futures after
shutdown") during or after the call, occasionally failing tests.
safe_driver_shutdown() suppresses this expected RuntimeError and
joins the scheduler thread.

Replace the remaining c.shutdown() calls in:
  - test/cluster/auth_cluster/test_startup_response.py
  - test/cluster/auth_cluster/test_maintenance_socket.py
with safe_driver_shutdown(c) and add the corresponding import from
test.pylib.driver_utils.

No behavioral change to the tests; only the driver teardown is
hardened against a known driver-side race.

Fixes SCYLLADB-1662

Closes scylladb/scylladb#29576
2026-04-21 17:45:11 +02:00

93 lines
4.1 KiB
Python

#
# Copyright (C) 2025-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
#
import asyncio
import pytest
from test.cluster.util import new_test_keyspace, new_test_table
from test.pylib.manager_client import ManagerClient, safe_driver_shutdown
from test.pylib.util import wait_for
from cassandra.connection import UnixSocketEndPoint
from cassandra.policies import WhiteListRoundRobinPolicy
from test.cluster.conftest import cluster_con
from time import time
import os
# The following test verifies that Scylla avoids making an oversized allocation
# when generating a large create statement when performing a DESCRIBE statement.
# The threshold for generating a warning about an oversized allocation is set
# to 128 * 2^10 bytes.
#
# Reproducer for issue scylladb/scylladb#24018.
@pytest.mark.asyncio
async def test_large_create_statement(manager: ManagerClient):
cmdline = ["--logger-log-level", "describe=trace"]
srv = await manager.server_add(cmdline=cmdline)
cql = manager.get_cql()
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}") as ks:
async with new_test_table(manager, ks, "p int PRIMARY KEY") as table:
# CQL will not accept identifiers longers than ~2^16.
col_name_len = 60_000
# An oversized allocation warning is issued for allocations bigger than 128 * 2^10.
target_size_threshold = 128 * (2 ** 10)
async def add_and_drop(col_name: str) -> None:
await cql.run_async(f"ALTER TABLE {table} ADD {col_name} int")
await cql.run_async(f"ALTER TABLE {table} DROP {col_name}")
# Let's get ourselves a little bit more room with the size just
# to make sure an oversized allocation will be triggered.
col_count = 2 * (target_size_threshold // col_name_len) + 1
col_name_prefix = "a" * col_name_len
await asyncio.gather(*[add_and_drop(f"{col_name_prefix}{idx}") for idx in range(col_count)])
log = await manager.server_open_log(srv.server_id)
marker = await log.mark()
await cql.run_async("DESCRIBE SCHEMA WITH INTERNALS")
matches = await log.grep("oversized allocation", from_mark=marker)
assert len(matches) == 0
@pytest.mark.parametrize("mode", ["normal", "maintenance"])
@pytest.mark.asyncio
async def test_describe_cluster_sanity(manager: ManagerClient, mode: str):
"""
Parametrized test that DESCRIBE CLUSTER returns correct cluster information
in both normal and maintenance modes.
This test verifies that cluster metadata from gossiper is properly initialized
and the cluster name is consistent with system.local in both:
- normal mode: standard cluster operation
- maintenance mode: node isolated from the cluster
"""
if mode == "normal":
await manager.server_add()
cql = manager.get_cql()
else: # maintenance mode
srv = await manager.server_add(config={"maintenance_mode": True}, connect_driver=False)
maintenance_socket_path = await manager.server_get_maintenance_socket_path(srv.server_id)
async def socket_exists():
return True if os.path.exists(maintenance_socket_path) else None
await wait_for(socket_exists, time() + 30)
socket_endpoint = UnixSocketEndPoint(maintenance_socket_path)
cluster = cluster_con([socket_endpoint], load_balancing_policy=WhiteListRoundRobinPolicy([socket_endpoint]))
cql = cluster.connect()
try:
system_local_results = await cql.run_async("SELECT cluster_name FROM system.local")
assert system_local_results[0].cluster_name != "" # sanity check
describe_results = await cql.run_async("DESCRIBE CLUSTER")
assert describe_results[0].partitioner == 'org.apache.cassandra.dht.Murmur3Partitioner'
assert describe_results[0].snitch == 'org.apache.cassandra.locator.SimpleSnitch'
assert describe_results[0].cluster == system_local_results[0].cluster_name
finally:
if mode == "maintenance":
safe_driver_shutdown(cluster)