From 419354bc9fa8a07bfcdded61375716fd707ae906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Thu, 15 Feb 2024 15:42:11 +0100 Subject: [PATCH] test: harden test_cdc_generation_clearing In one of the previous patches, we fixed scylladb/scylladb#16916 as a side effect. We removed `system_keyspace::get_cdc_generations_cleanup_candidate`, which contained the bug causing the issue. Even though we didn't have to fix this issue directly, it showed us that `test_cdc_generation_clearing` was too weak. If something went wrong during/after the only clearing, the test still could pass because the clearing was the last action in the test. In scylladb/scylladb#16916, the CDC generation publisher was stuck after the clearing because of a recurring error. The test wouldn't detect it. Therefore, we harden the test by expecting two clearings instead of one. If something goes wrong during the first clearing, there is a high chance that the second clearing will fail. The new test version wouldn't pass with the old bug in the code. --- .../test_cdc_generation_clearing.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/test/topology_experimental_raft/test_cdc_generation_clearing.py b/test/topology_experimental_raft/test_cdc_generation_clearing.py index 97095b2f85..e805f6f01e 100644 --- a/test/topology_experimental_raft/test_cdc_generation_clearing.py +++ b/test/topology_experimental_raft/test_cdc_generation_clearing.py @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: AGPL-3.0-or-later # -from test.pylib.rest_client import inject_error_one_shot +from test.pylib.rest_client import inject_error from test.pylib.manager_client import ManagerClient from test.pylib.util import wait_for, wait_for_cql_and_get_hosts from test.topology.util import check_system_topology_and_cdc_generations_v3_consistency @@ -63,13 +63,22 @@ async def test_cdc_generation_clearing(manager: ManagerClient): await check_system_topology_and_cdc_generations_v3_consistency(manager, hosts) second_gen_id = max(gen_ids) - await inject_error_one_shot(manager.api, servers[0].ip_addr, "clean_obsolete_cdc_generations_change_ts_ub") + async with inject_error(manager.api, servers[0].ip_addr, "clean_obsolete_cdc_generations_change_ts_ub"): + logger.info("Bootstrapping third node") + servers += [await manager.server_add()] - logger.info("Bootstrapping third node") - servers += [await manager.server_add()] + # The first and second generations should be removed thanks to the above injection. + mark, gen_ids, hosts = await wait_for(tried_to_remove_new_gen, time.time() + 60) + logger.info(f"Generations after third clearing attempt: {gen_ids}") + assert len(gen_ids) == 1 and first_gen_id not in gen_ids and second_gen_id not in gen_ids + await check_system_topology_and_cdc_generations_v3_consistency(manager, hosts) + third_gen_id = max(gen_ids) - # The first and second generations should be removed thanks to the above injection. - mark, gen_ids, hosts = await wait_for(tried_to_remove_new_gen, time.time() + 60) - logger.info(f"Generations after third clearing attempt: {gen_ids}") - assert len(gen_ids) == 1 and first_gen_id not in gen_ids and second_gen_id not in gen_ids - await check_system_topology_and_cdc_generations_v3_consistency(manager, hosts) + logger.info("Bootstrapping fourth node") + servers += [await manager.server_add()] + + # The third generation should be removed thanks to the above injection. + mark, gen_ids, hosts = await wait_for(tried_to_remove_new_gen, time.time() + 60) + logger.info(f"Generations after fourth clearing attempt: {gen_ids}") + assert len(gen_ids) == 1 and third_gen_id not in gen_ids + await check_system_topology_and_cdc_generations_v3_consistency(manager, hosts)