mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-21 09:00:35 +00:00
In `ks_prop_defs::as_ks_metadata(...)` a default initial tablets count
is set to 0, when tablets are enabled and the replication strategy
is NetworkReplicationStrategy.
This effectively sets _uses_tablets = false in abstract_replication_strategy
for the remaining strategies when no `tablets = {...}` options are specified.
As a consequence, it is possible to create vnode-based keyspaces even
when tablets are enforced with `tablets_mode_for_new_keyspaces`.
The patch sets a default initial tablets count to zero regardless of
the chosen replication strategy. Then each of the replication strategy
validates the options and raises a configuration exception when tablets
are not supported.
All tests are altered in the following way:
+ whenever it was correct, SimpleStrategy was replaced with NetworkTopologyStrategy
+ otherwise, tablets were explicitly disabled with ` AND tablets = {'enabled': false}`
Fixes https://github.com/scylladb/scylladb/issues/25340
Closes scylladb/scylladb#25342
81 lines
3.7 KiB
Python
81 lines
3.7 KiB
Python
#
|
|
# Copyright (C) 2025-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
#
|
|
import asyncio
|
|
import pytest
|
|
import time
|
|
import logging
|
|
import random
|
|
|
|
from cassandra.cluster import NoHostAvailable # type: ignore
|
|
|
|
from test.pylib.manager_client import ManagerClient
|
|
from test.pylib.rest_client import inject_error
|
|
from test.pylib.util import wait_for, wait_for_cql_and_get_hosts
|
|
from test.cluster.util import new_test_keyspace, new_test_table
|
|
|
|
logger = logging.getLogger(__name__)
|
|
pytestmark = pytest.mark.prepare_3_racks_cluster
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
|
async def test_cancel_mapreduce(manager: ManagerClient):
|
|
"""
|
|
This test verifies that stopping the supercoordinator of a mapreduce task cancels
|
|
outgoing queries to other nodes, which would otherwise prevent the shutdown.
|
|
"""
|
|
|
|
running_servers = await manager.running_servers()
|
|
assert len(running_servers) >= 2
|
|
|
|
s1, s2 = running_servers[0], running_servers[1]
|
|
cql = manager.get_cql()
|
|
hosts = await wait_for_cql_and_get_hosts(cql, [s1, s2], time.time() + 30)
|
|
|
|
await manager.api.set_logger_level(s1.ip_addr, "forward_service", "debug")
|
|
|
|
[host1] = filter(lambda host: host.address == s1.ip_addr, hosts)
|
|
host_id2 = await manager.get_host_id(s2.server_id)
|
|
|
|
async with new_test_keyspace(manager, f"WITH REPLICATION = {{'class': 'NetworkTopologyStrategy', 'dc1': {[s2.rack]}}}") as ks:
|
|
async with new_test_table(manager, ks, "pk int PRIMARY KEY, v int") as t:
|
|
# Distribute data across the nodes.
|
|
for _ in range(250):
|
|
# Note: CQL int is a 32-bit integer.
|
|
pk = random.randint(-2**30, 2**30)
|
|
v = random.randint(-2**30, 2**30)
|
|
await cql.run_async(f"INSERT INTO {t} (pk, v) VALUES ({pk}, {v})")
|
|
|
|
s1_log = await manager.server_open_log(s1.server_id)
|
|
s2_log = await manager.server_open_log(s2.server_id)
|
|
|
|
s1_mark = await s1_log.mark()
|
|
s2_mark = await s2_log.mark()
|
|
|
|
# Prevent finishing local mapreduce tasks on node 2.
|
|
async with inject_error(manager.api, s2.ip_addr, "mapreduce_pause_dispatch_to_shards"):
|
|
async def do_select():
|
|
# Make node 1 the supercoordinator of the mapreduce task corresponding to aggregation.
|
|
# We use this timeout because it's longer than the cumulative timeout of the following
|
|
# steps. For the test to be reliable, the query cannot end on its own.
|
|
try:
|
|
await cql.run_async(f"SELECT count(*) FROM {t} BYPASS CACHE USING TIMEOUT 600s", host=host1)
|
|
pytest.fail(f"Query finished, but it wasn't supposed to")
|
|
except NoHostAvailable:
|
|
pass
|
|
|
|
async def wait_and_shutdown():
|
|
# Make sure node 1 is the supercoordinator and sends a mapreduce task to node 2.
|
|
await s1_log.wait_for(f"dispatching mapreduce_request=.* to address={host_id2}", from_mark=s1_mark, timeout=60)
|
|
# Make sure that node 2 is preventing its local mapreduce task from finishing.
|
|
await s2_log.wait_for("mapreduce_pause_dispatch_to_shards: waiting for message", from_mark=s2_mark, timeout=60)
|
|
# Verify that the supercoordinator stops without an issue despite the ongoing mapreduce task.
|
|
await manager.server_stop_gracefully(s1.server_id, timeout=120)
|
|
|
|
async with asyncio.TaskGroup() as tg:
|
|
_ = tg.create_task(do_select())
|
|
_ = tg.create_task(wait_and_shutdown())
|