mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
test: test group0 tombstone GC in the Raft-based recovery procedure
We add a regression test for the bug fixed in the previous commits.
(cherry picked from commit c57f097630)
This commit is contained in:
committed by
GitHub Action
parent
8a11535a12
commit
76560ca095
@@ -11,10 +11,11 @@ import logging
|
||||
import time
|
||||
import pytest
|
||||
|
||||
from test.pylib.internal_types import ServerInfo
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.util import wait_for, wait_for_cql_and_get_hosts
|
||||
from test.cluster.conftest import skip_mode
|
||||
from test.cluster.util import disable_schema_agreement_wait, new_test_keyspace, new_test_table
|
||||
from test.cluster.util import delete_discovery_state_and_group0_id, delete_raft_group_data, disable_schema_agreement_wait, new_test_keyspace, new_test_table, reconnect_driver
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -83,6 +84,19 @@ async def test_group0_tombstone_gc(manager: ManagerClient):
|
||||
- tombstones exist from the previous test
|
||||
Assert:
|
||||
- the tombstones are cleaned up eventually
|
||||
|
||||
Test #4:
|
||||
Regression test for https://github.com/scylladb/scylladb/issues/26534
|
||||
|
||||
Arrange:
|
||||
- kill one of the nodes
|
||||
Act:
|
||||
- create new group0 tombstones by updating the schema (create/alter/delete random tables),
|
||||
- perform the Raft-based recovery procedure, which creates a new group0
|
||||
Assert:
|
||||
- (4a) the tombstones are not cleaned up after both live nodes join the new group0 even though the killed
|
||||
node doesn't belong to it,
|
||||
- (4b) the tombstones are cleaned up after removing the killed node
|
||||
"""
|
||||
cmdline = [
|
||||
# disabling caches as the tombstones still remain in the cache even after the compaction
|
||||
@@ -203,6 +217,55 @@ async def test_group0_tombstone_gc(manager: ManagerClient):
|
||||
# test #3: the tombstones are cleaned up after the node is started again
|
||||
await verify_tombstone_gc(tombstone_mark)
|
||||
|
||||
first_group0_id = (await cql.run_async(
|
||||
"SELECT value FROM system.scylla_local WHERE key = 'raft_group0_id'"))[0].value
|
||||
|
||||
# Kill one server.
|
||||
await manager.server_stop(down_server.server_id)
|
||||
servers.pop()
|
||||
|
||||
await alter_system_schema(keyspace)
|
||||
tombstone_mark = datetime.now(timezone.utc)
|
||||
|
||||
# Start the Raft-based recovery procedure.
|
||||
await manager.rolling_restart(servers, wait_for_cql=False)
|
||||
|
||||
# TODO: do not reconnect the driver (here and below) once scylladb/python-driver#295 is fixed.
|
||||
# It doesn't go well with disable_schema_agreement_wait above.
|
||||
cql = await reconnect_driver(manager)
|
||||
cql.cluster.max_schema_agreement_wait = 0
|
||||
hosts = [(await wait_for_cql_and_get_hosts(cql, [s], time.time() + 60))[0]
|
||||
for s in servers]
|
||||
|
||||
for h in hosts:
|
||||
await delete_discovery_state_and_group0_id(cql, h)
|
||||
|
||||
recovery_leader_id = await manager.get_host_id(servers[0].server_id)
|
||||
|
||||
async def set_recovery_leader(srv: ServerInfo):
|
||||
await manager.server_update_config(srv.server_id, 'recovery_leader', recovery_leader_id)
|
||||
|
||||
await manager.rolling_restart(servers, with_down=set_recovery_leader, wait_for_cql=False)
|
||||
|
||||
cql = await reconnect_driver(manager)
|
||||
cql.cluster.max_schema_agreement_wait = 0
|
||||
await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
|
||||
# test #4a: the tombstones are not cleaned up after both live nodes join the new group0
|
||||
with pytest.raises(AssertionError, match="Deadline exceeded"):
|
||||
await verify_tombstone_gc(tombstone_mark, timeout=5)
|
||||
|
||||
await manager.remove_node(servers[0].server_id, down_server.server_id)
|
||||
|
||||
# test #4b: the tombstones are cleaned up after removing the killed node
|
||||
await verify_tombstone_gc(tombstone_mark)
|
||||
|
||||
# Finish the Raft-based recovery procedure. We leave the cluster in a clean state for future tests.
|
||||
for srv in servers:
|
||||
await manager.server_remove_config_option(srv.server_id, 'recovery_leader')
|
||||
for h in hosts:
|
||||
await delete_raft_group_data(first_group0_id, cql, h)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@skip_mode('release', "test only needs to run once - allowing only the 'dev' mode")
|
||||
|
||||
Reference in New Issue
Block a user