test: test_tablets_removenode: Verify replacing when there is no spare node
The test is changed to be more strict. Verifies the case of replacing when RF=N in which case tablet replicas have to be rebuilt using the replacing node. This would fail if tablets are drained as part of replace operation, since replacing node is not yet a viable target for tablet migration.
This commit is contained in:
@@ -30,20 +30,24 @@ async def run_async_cl_all(cql, query: str):
|
||||
@pytest.mark.asyncio
|
||||
async def test_replace(manager: ManagerClient):
|
||||
logger.info("Bootstrapping cluster")
|
||||
cmdline = ['--logger-log-level', 'storage_service=trace']
|
||||
cmdline = [
|
||||
'--logger-log-level', 'storage_service=trace',
|
||||
'--logger-log-level', 'raft_topology=trace',
|
||||
]
|
||||
|
||||
# 4 nodes so that we can find new tablet replica for the RF=3 table on removenode
|
||||
servers = await manager.servers_add(4, cmdline=cmdline)
|
||||
servers = await manager.servers_add(3, cmdline=cmdline)
|
||||
|
||||
cql = manager.get_cql()
|
||||
|
||||
await create_keyspace(cql, "test", 32, rf=1)
|
||||
await cql.run_async("CREATE TABLE test.test (pk int PRIMARY KEY, c int);")
|
||||
|
||||
# We want RF=2 table to validate that quorum reads work after replacing node finishes
|
||||
# bootstrap which indicates that bootstrap waits for rebuilt.
|
||||
# Otherwise, some reads would fail to find a quorum.
|
||||
await create_keyspace(cql, "test2", 32, rf=2)
|
||||
await cql.run_async("CREATE TABLE test2.test (pk int PRIMARY KEY, c int);")
|
||||
|
||||
# RF=3
|
||||
await create_keyspace(cql, "test3", 32, rf=3)
|
||||
await cql.run_async("CREATE TABLE test3.test (pk int PRIMARY KEY, c int);")
|
||||
|
||||
@@ -54,23 +58,19 @@ async def test_replace(manager: ManagerClient):
|
||||
await asyncio.gather(*[run_async_cl_all(cql, f"INSERT INTO test2.test (pk, c) VALUES ({k}, {k});") for k in keys])
|
||||
await asyncio.gather(*[run_async_cl_all(cql, f"INSERT INTO test3.test (pk, c) VALUES ({k}, {k});") for k in keys])
|
||||
|
||||
async def check_ks(ks):
|
||||
logger.info(f"Checking {ks}")
|
||||
query = SimpleStatement(f"SELECT * FROM {ks}.test;", consistency_level=ConsistencyLevel.QUORUM)
|
||||
rows = await cql.run_async(query)
|
||||
assert len(rows) == len(keys)
|
||||
for r in rows:
|
||||
assert r.c == r.pk
|
||||
|
||||
async def check():
|
||||
# RF=1 table "test" will experience data loss so don't check it.
|
||||
# We include it to check that the system doesn't crash.
|
||||
|
||||
logger.info("Checking table test2")
|
||||
query = SimpleStatement("SELECT * FROM test2.test;", consistency_level=ConsistencyLevel.ONE)
|
||||
rows = await cql.run_async(query)
|
||||
assert len(rows) == len(keys)
|
||||
for r in rows:
|
||||
assert r.c == r.pk
|
||||
|
||||
logger.info("Checking table test3")
|
||||
query = SimpleStatement("SELECT * FROM test3.test;", consistency_level=ConsistencyLevel.ONE)
|
||||
rows = await cql.run_async(query)
|
||||
assert len(rows) == len(keys)
|
||||
for r in rows:
|
||||
assert r.c == r.pk
|
||||
# RF=1 keyspace will experience data loss so don't check it.
|
||||
# We include it in the test only to check that the system doesn't crash.
|
||||
await check_ks("test2")
|
||||
await check_ks("test3")
|
||||
|
||||
await check()
|
||||
|
||||
@@ -81,11 +81,21 @@ async def test_replace(manager: ManagerClient):
|
||||
logger.info('Replacing a node')
|
||||
await manager.server_stop(servers[0].server_id)
|
||||
replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = False, use_host_id = True)
|
||||
await manager.server_add(replace_cfg)
|
||||
servers.append(await manager.server_add(replace_cfg))
|
||||
servers = servers[1:]
|
||||
|
||||
await check()
|
||||
|
||||
# Verify that QUORUM reads from RF=3 table work when replacing finished and we down a single node.
|
||||
# This validates that replace waits for tablet rebuilt before finishing bootstrap, otherwise some reads
|
||||
# would fail to find a quorum.
|
||||
logger.info('Downing a node')
|
||||
await manager.server_stop_gracefully(servers[0].server_id)
|
||||
await manager.server_not_sees_other_server(servers[1].ip_addr, servers[0].ip_addr)
|
||||
await manager.server_not_sees_other_server(servers[2].ip_addr, servers[0].ip_addr)
|
||||
|
||||
await check_ks("test3")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_removenode(manager: ManagerClient):
|
||||
|
||||
Reference in New Issue
Block a user