Files
scylladb/test/cluster/test_ensure_committed_by_group0.py
Gleb Natapov cc034f84c5 schema: ensure committed_by_group0 is set for all non-system tables on boot
Tables created before the GROUP0_SCHEMA_VERSIONING feature was enabled
have committed_by_group0 = null in system_schema.scylla_tables. This
causes maybe_delete_schema_version() to delete their version cell,
forcing the legacy hash-based schema version computation path.

Add ensure_committed_by_group0() which runs on boot and fixes up any
non-system tables where committed_by_group0 is not true (null or false):

1. Queries system_schema.scylla_tables for rows where committed_by_group0
   is null or false, skipping system keyspaces (system, system_schema).
2. Takes a group0 guard
3. Re-checks after the raft barrier in case another node already fixed it.
4. For each table needing fixup, creates a mutation writing the version
   cell (from the in-memory schema). The committed_by_group0 = true flag
   is stamped by add_committed_by_group0_flag() inside announce().
5. Announces via raft group0.
6. Retries with a small random delay on group0_concurrent_modification.

On other nodes, schema_applier will detect these as "altered" tables
(scylla_tables mutation changed), but since the actual table definition
is unchanged, update_column_family is effectively a no-op.

This is a prerequisite for eventually removing the legacy hash-based
schema versioning code path.

Closes scylladb/scylladb#29911
2026-05-21 10:22:07 +02:00

53 lines
2.1 KiB
Python

#
# Copyright (C) 2026-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
#
"""
Test that ensure_committed_by_group0() fixes tables missing the flag on boot.
"""
import pytest
import logging
from test.pylib.manager_client import ManagerClient
logger = logging.getLogger(__name__)
@pytest.mark.asyncio
async def test_ensure_committed_by_group0(manager: ManagerClient):
"""Tables with committed_by_group0 = null or false get fixed on restart."""
servers = await manager.servers_add(1)
(cql, _) = await manager.get_ready_cql(servers)
await cql.run_async("CREATE KEYSPACE ks WITH replication = "
"{'class': 'NetworkTopologyStrategy', 'replication_factor': 1}")
await cql.run_async("CREATE TABLE ks.tbl_null (pk int PRIMARY KEY)")
await cql.run_async("CREATE TABLE ks.tbl_false (pk int PRIMARY KEY)")
# Verify both have committed_by_group0 = true initially
for tbl in ['tbl_null', 'tbl_false']:
rows = await cql.run_async(
f"SELECT committed_by_group0 FROM system_schema.scylla_tables "
f"WHERE keyspace_name = 'ks' AND table_name = '{tbl}'")
assert rows[0].committed_by_group0 == True
# Simulate pre-group0 table (null) and recovery-mode table (false)
await cql.run_async(
"DELETE committed_by_group0 FROM system_schema.scylla_tables "
"WHERE keyspace_name = 'ks' AND table_name = 'tbl_null'")
await cql.run_async(
"UPDATE system_schema.scylla_tables SET committed_by_group0 = false "
"WHERE keyspace_name = 'ks' AND table_name = 'tbl_false'")
# Restart — ensure_committed_by_group0() should fix both on boot
await manager.server_restart(servers[0].server_id)
(cql, _) = await manager.get_ready_cql(servers)
# Verify fixup happened for both tables
for tbl in ['tbl_null', 'tbl_false']:
rows = await cql.run_async(
f"SELECT committed_by_group0 FROM system_schema.scylla_tables "
f"WHERE keyspace_name = 'ks' AND table_name = '{tbl}'")
assert rows[0].committed_by_group0 == True, \
f"committed_by_group0 not fixed for {tbl}"