mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-22 07:42:16 +00:00
Merge '[Backport 2026.1] load_balancer: fix tablet allocator dropped table' from Scylladb[bot]
- Handle dropped tables gracefully in the tablet load balancer's `get_schema_and_rs()` instead of aborting with `on_internal_error`
- The load balancer operates on a token metadata snapshot but accesses the live schema for table lookups. A DROP TABLE applied by another fiber between coroutine yield points can remove a table from the live schema while it still exists in the snapshot, causing an abort.
`get_schema_and_rs()` now returns `std::optional` and logs a warning in debug log level instead of aborting when a table is missing. All callers skip dropped tables:
- `make_sizing_plan`: skips to next table
- `make_resize_plan`: skips to next table (merge suppression is moot)
- `check_constraints`: returns `skip_info{}` with empty viable targets
- `get_rs`: returns `nullptr`, checked by `check_constraints`
The call chain is: `make_plan` → `make_internode_plan` → `check_constraints` → `get_rs` → `get_schema_and_rs`. The `make_internode_plan` coroutine has multiple `co_await` yield points (`maybe_yield`, `pick_candidate`) between building the candidate tablet list and checking replication constraints. A DROP TABLE schema mutation applied during any of these yields removes the table from `_db.get_tables_metadata()` while the candidate list still references it.
Added `test_load_balancing_with_dropped_table` which simulates the race by capturing a token metadata snapshot, dropping the table, then calling `balance_tablets` with the stale snapshot.
Fixes: SCYLLADB-1906
This fix needs to be backported to versions: 2025.4, 2026.1
- (cherry picked from commit 4375b502ea)
- (cherry picked from commit be56bf031f)
Parent PR: #29585
Closes scylladb/scylladb#29883
* github.com:scylladb/scylladb:
test: verify load balancer handles dropped tables gracefully
tablet_allocator: handle dropped tables gracefully in get_schema_and_rs
This commit is contained in:
@@ -1655,10 +1655,14 @@ public:
|
||||
co_return std::move(plan);
|
||||
}
|
||||
|
||||
// Returns the schema and tablet-aware replication strategy for a given table.
|
||||
// Returns {nullptr, nullptr} if the table has been dropped concurrently (race between
|
||||
// the token metadata snapshot and the live schema).
|
||||
std::tuple<schema_ptr, const tablet_aware_replication_strategy*> get_schema_and_rs(table_id table) {
|
||||
auto t = _db.get_tables_metadata().get_table_if_exists(table);
|
||||
if (!t) {
|
||||
on_internal_error(lblogger, format("Table {} does not exist", table));
|
||||
lblogger.debug("Table {} no longer exists, skipping", table);
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
|
||||
auto s = t->schema();
|
||||
@@ -1673,6 +1677,8 @@ public:
|
||||
return {s, rs};
|
||||
}
|
||||
|
||||
// Returns the tablet-aware replication strategy for a given table, or nullptr
|
||||
// if the table has been dropped concurrently.
|
||||
const tablet_aware_replication_strategy* get_rs(table_id id) {
|
||||
auto [s, rs] = get_schema_and_rs(id);
|
||||
return rs;
|
||||
@@ -1870,7 +1876,9 @@ public:
|
||||
for (const auto& [table, tables] : _tm->tablets().all_table_groups()) {
|
||||
const auto& tmap = _tm->tablets().get_tablet_map(table);
|
||||
auto [s, rs] = get_schema_and_rs(table);
|
||||
|
||||
if (s == nullptr || rs == nullptr) {
|
||||
continue;
|
||||
}
|
||||
auto tablet_options = combine_tablet_options(
|
||||
tables | std::views::transform([&] (table_id table) { return _db.get_tables_metadata().get_table_if_exists(table); })
|
||||
| std::views::filter([] (auto t) { return t != nullptr; })
|
||||
@@ -2699,6 +2707,10 @@ public:
|
||||
std::unordered_map<sstring, int> rack_load;
|
||||
|
||||
auto rs = get_rs(tablet.table);
|
||||
if (rs == nullptr) {
|
||||
// Table was dropped concurrently. Skip this tablet.
|
||||
return skip_info{};
|
||||
}
|
||||
|
||||
auto get_viable_targets = [&] () {
|
||||
std::unordered_set<host_id> viable_targets;
|
||||
|
||||
@@ -6207,4 +6207,61 @@ SEASTAR_THREAD_TEST_CASE(test_get_secondary_replica) {
|
||||
topo.clear_gently().get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_dropped_table) {
|
||||
// Verifies that balance_tablets() gracefully handles a table that exists
|
||||
// in the token metadata snapshot but has been dropped from the live schema.
|
||||
// This simulates the race where a DROP TABLE is applied between yield
|
||||
// points during load balancer planning.
|
||||
do_with_cql_env_thread([] (auto& e) {
|
||||
topology_builder topo(e);
|
||||
|
||||
unsigned shard_count = 2;
|
||||
auto host1 = topo.add_node(node_state::normal, shard_count);
|
||||
auto host2 = topo.add_node(node_state::normal, shard_count);
|
||||
auto host3 = topo.add_node(node_state::normal, shard_count);
|
||||
|
||||
auto ks_name = add_keyspace(e, {{topo.dc(), 1}}, 4);
|
||||
auto table1 = add_table(e, ks_name).get();
|
||||
|
||||
mutate_tablets(e, [&] (tablet_metadata& tmeta) -> future<> {
|
||||
tablet_map tmap(4);
|
||||
auto tid = tmap.first_tablet();
|
||||
tmap.set_tablet(tid, tablet_info{tablet_replica_set{tablet_replica{host1, 0}}});
|
||||
tid = *tmap.next_tablet(tid);
|
||||
tmap.set_tablet(tid, tablet_info{tablet_replica_set{tablet_replica{host1, 1}}});
|
||||
tid = *tmap.next_tablet(tid);
|
||||
tmap.set_tablet(tid, tablet_info{tablet_replica_set{tablet_replica{host2, 0}}});
|
||||
tid = *tmap.next_tablet(tid);
|
||||
tmap.set_tablet(tid, tablet_info{tablet_replica_set{tablet_replica{host2, 1}}});
|
||||
tmeta.set_tablet_map(table1, std::move(tmap));
|
||||
co_return;
|
||||
});
|
||||
|
||||
auto& stm = e.shared_token_metadata().local();
|
||||
|
||||
shared_load_stats& load_stats = topo.get_shared_load_stats();
|
||||
load_stats.set_default_tablet_sizes(stm.get());
|
||||
|
||||
// Capture the token metadata snapshot while the table still exists.
|
||||
auto stale_tm = stm.get();
|
||||
|
||||
// Drop the table from the live schema. The stale snapshot still has
|
||||
// the table's tablet map, simulating the race condition.
|
||||
e.execute_cql(fmt::format("DROP TABLE \"{}\".\"{}\"", ks_name, table1.to_sstring())).get();
|
||||
|
||||
// balance_tablets should handle the stale table gracefully without
|
||||
// throwing or aborting.
|
||||
auto& talloc = e.get_tablet_allocator().local();
|
||||
auto& topology = e.get_topology_state_machine().local()._topology;
|
||||
auto& sys_ks = e.get_system_keyspace().local();
|
||||
auto plan = talloc.balance_tablets(stale_tm, &topology, &sys_ks,
|
||||
load_stats.get(), {}).get();
|
||||
|
||||
// No migrations should reference the dropped table.
|
||||
for (auto& mig : plan.migrations()) {
|
||||
BOOST_REQUIRE_NE(mig.tablet.table, table1);
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
Reference in New Issue
Block a user