mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-28 10:41:12 +00:00
tablets: deallocate storage state on end_migration
When a tablet is migrated and cleaned up, deallocate the tablet storage
group state on `end_migration` stage, instead of `cleanup` stage:
* When the stage is updated from `cleanup` to `end_migration`, the
storage group is removed on the leaving replica.
* When the table is initialized, if the tablet stage is `end_migration`
then we don't allocate a storage group for it. This happens for
example if the leaving replica is restarted during tablet migration.
If it's initialized in `cleanup` stage then we allocate a storage
group, and it will be deallocated when transitioning to
`end_migration`.
This guarantees that the storage group is always deallocated on the
leaving replica by `end_migration`, and that it is always allocated if
the tablet wasn't cleaned up fully yet.
It is a similar case also for the pending replica when the migration is
aborted. We deallocate the state on `revert_migration` which is the
stage following `cleanup_target`.
Previously the storage group would be allocated when the tablet is
initialized on any of the tablet replicas - also on the leaving replica,
and when the tablet stage is `cleanup` or `end_migration`, and
deallocated during `cleanup`.
This fixes the following issue:
1. A migrating tablet enters cleanup stage
2. the tablet is cleaned up successfuly
3. The leaving replica is restarted, and allocates storage group
4. tablet cleanup is not called because it was already cleaned up
4. the storage group remains allocated on the leaving replica after the
migration is completed - it's not cleaned up properly.
Fixes scylladb/scylladb#23481
(cherry picked from commit 34f15ca871)
This commit is contained in:
committed by
GitHub Action
parent
4a18a08284
commit
0b46cfb60d
@@ -136,6 +136,18 @@ std::optional<tablet_replica> get_leaving_replica(const tablet_info& tinfo, cons
|
||||
return *leaving.begin();
|
||||
}
|
||||
|
||||
bool is_post_cleanup(tablet_replica replica, const tablet_info& tinfo, const tablet_transition_info& trinfo) {
|
||||
if (replica == locator::get_leaving_replica(tinfo, trinfo)) {
|
||||
// we do tablet cleanup on the leaving replica in the `cleanup` stage, after which there is only the `end_migration` stage.
|
||||
return trinfo.stage == locator::tablet_transition_stage::end_migration;
|
||||
}
|
||||
if (replica == trinfo.pending_replica) {
|
||||
// we do tablet cleanup on the pending replica in the `cleanup_target` stage, after which there is only the `revert_migration` stage.
|
||||
return trinfo.stage == locator::tablet_transition_stage::revert_migration;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
tablet_replica_set get_new_replicas(const tablet_info& tinfo, const tablet_migration_info& mig) {
|
||||
return replace_replica(tinfo.replicas, mig.src, mig.dst);
|
||||
}
|
||||
|
||||
@@ -222,6 +222,10 @@ struct tablet_transition_info {
|
||||
// Returns the leaving replica for a given transition.
|
||||
std::optional<tablet_replica> get_leaving_replica(const tablet_info&, const tablet_transition_info&);
|
||||
|
||||
// True if the tablet is transitioning and it's in a stage that follows the stage
|
||||
// where we clean up the tablet on the given replica.
|
||||
bool is_post_cleanup(tablet_replica replica, const tablet_info& tinfo, const tablet_transition_info& trinfo);
|
||||
|
||||
/// Represents intention to move a single tablet replica from src to dst.
|
||||
struct tablet_migration_info {
|
||||
locator::tablet_transition_kind kind;
|
||||
|
||||
@@ -806,12 +806,19 @@ public:
|
||||
auto local_replica = locator::tablet_replica{_my_host_id, this_shard_id()};
|
||||
|
||||
for (auto tid : tmap.tablet_ids()) {
|
||||
auto range = tmap.get_token_range(tid);
|
||||
|
||||
if (tmap.has_replica(tid, local_replica)) {
|
||||
tlogger.debug("Tablet with id {} and range {} present for {}.{}", tid, range, schema()->ks_name(), schema()->cf_name());
|
||||
ret[tid.value()] = allocate_storage_group(tmap, tid, std::move(range));
|
||||
if (!tmap.has_replica(tid, local_replica)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// if the tablet was cleaned up already on this replica, don't allocate a storage group for it.
|
||||
auto trinfo = tmap.get_tablet_transition_info(tid);
|
||||
if (trinfo && locator::is_post_cleanup(local_replica, tmap.get_tablet_info(tid), *trinfo)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto range = tmap.get_token_range(tid);
|
||||
tlogger.debug("Tablet with id {} and range {} present for {}.{}", tid, range, schema()->ks_name(), schema()->cf_name());
|
||||
ret[tid.value()] = allocate_storage_group(tmap, tid, std::move(range));
|
||||
}
|
||||
_storage_groups = std::move(ret);
|
||||
}
|
||||
@@ -2433,7 +2440,7 @@ future<> tablet_storage_group_manager::update_effective_replication_map(const lo
|
||||
co_return;
|
||||
}
|
||||
|
||||
// Allocate storage group if tablet is migrating in.
|
||||
// Allocate storage group if tablet is migrating in, or deallocate if it's migrating out.
|
||||
auto this_replica = locator::tablet_replica{
|
||||
.host = erm.get_token_metadata().get_my_id(),
|
||||
.shard = this_shard_id()
|
||||
@@ -2449,6 +2456,8 @@ future<> tablet_storage_group_manager::update_effective_replication_map(const lo
|
||||
auto range = new_tablet_map->get_token_range(tid);
|
||||
_storage_groups[tid.value()] = allocate_storage_group(*new_tablet_map, tid, std::move(range));
|
||||
tablet_migrating_in = true;
|
||||
} else if (_storage_groups.contains(tid.value()) && locator::is_post_cleanup(this_replica, new_tablet_map->get_tablet_info(tid), transition_info)) {
|
||||
remove_storage_group(tid.value());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3803,7 +3812,6 @@ future<> table::cleanup_tablet(database& db, db::system_keyspace& sys_ks, locato
|
||||
co_await stop_compaction_groups(sg);
|
||||
co_await utils::get_local_injector().inject("delay_tablet_compaction_groups_cleanup", std::chrono::seconds(5));
|
||||
co_await cleanup_compaction_groups(db, sys_ks, tid, sg);
|
||||
_sg_manager->remove_storage_group(tid.value());
|
||||
}
|
||||
|
||||
future<> table::cleanup_tablet_without_deallocation(database& db, db::system_keyspace& sys_ks, locator::tablet_id tid) {
|
||||
|
||||
Reference in New Issue
Block a user