From 1d64ddbdd37fabfc5be31975c2f354b823eb4ba5 Mon Sep 17 00:00:00 2001 From: Ferenc Szili Date: Mon, 23 Feb 2026 13:38:04 +0100 Subject: [PATCH] hint_sender: use per-tablet is_leaving() to avoid losing hints on RF reduction hint_sender decides whether to send a hint directly to its destination or to re-mutate from scratch based on token_metadata::is_leaving(), which only checks whether the *host* is leaving the cluster. When a tablet is dropped from a host due to RF reduction (RF--), the host is still alive and is_leaving() returns false, so hint_sender sends directly to a replica that will no longer own the data -- effectively losing the hint. Switch to the new ermp->is_leaving(host, token) which is tablet-aware. When the destination's tablet is being migrated away *and* there are pending endpoints, send directly (the pending endpoints will receive the data via streaming); otherwise fall through to the re-mutate path so all current replicas receive the mutation. --- db/hints/internal/hint_sender.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db/hints/internal/hint_sender.cc b/db/hints/internal/hint_sender.cc index 2dc0a3edfa..fc4ae1e109 100644 --- a/db/hints/internal/hint_sender.cc +++ b/db/hints/internal/hint_sender.cc @@ -252,20 +252,20 @@ future<> hint_sender::send_one_mutation(frozen_mutation_and_schema m) { host_id_vector_replica_set natural_endpoints = ermp->get_natural_replicas(token); host_id_vector_topology_change pending_endpoints = ermp->get_pending_replicas(token); - return futurize_invoke([this, m = std::move(m), ermp = std::move(ermp), &natural_endpoints, &pending_endpoints] () mutable -> future<> { + return futurize_invoke([this, m = std::move(m), ermp = std::move(ermp), &natural_endpoints, &pending_endpoints, &token] () mutable -> future<> { // The fact that we send with CL::ALL in both cases below ensures that new hints are not going // to be generated as a result of hints sending. - const auto& tm = ermp->get_token_metadata(); const auto dst = end_point_key(); - if (std::ranges::contains(natural_endpoints, dst) && !tm.is_leaving(dst)) { + const bool is_leaving = ermp->is_leaving(dst, token); + if (std::ranges::contains(natural_endpoints, dst) && (!is_leaving || !pending_endpoints.empty())) { manager_logger.trace("hint_sender[{}]:send_one_mutation: Sending directly", dst); // dst is not duplicated in pending_endpoints because it's in natural_endpoints return _proxy.send_hint_to_endpoint(std::move(m), std::move(ermp), dst, std::move(pending_endpoints)); } else { if (manager_logger.is_enabled(log_level::trace)) { - if (tm.is_leaving(end_point_key())) { - manager_logger.trace("hint_sender[{}]:send_one_mutation: Original target is leaving. Mutating from scratch", dst); + if (is_leaving) { + manager_logger.trace("hint_sender[{}]:send_one_mutation: Original target host or tablet replica is leaving. Mutating from scratch", dst); } else { manager_logger.trace("hint_sender[{}]:send_one_mutation: Endpoint set has changed and original target is no longer a replica. Mutating from scratch", dst); }