storage_service: Don't retry split when table is dropped

The split monitor wasn't handling the scenario where the table being
split is dropped. The monitor would be unable to find the tablet map
of such a table, and the error would be treated as a retryable one
causing the monitor to fall into an endless retry loop, with sleeps
in between. And that would block further splits, since the monitor
would be busy with the retries. The fix is about detecting table
was dropped and skipping to the next candidate, if any.

Fixes #21859.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>

Closes scylladb/scylladb#22933

(cherry picked from commit 4d8a333a7f)

Closes scylladb/scylladb#23480
This commit is contained in:
Raphael S. Carvalho
2025-02-19 10:38:40 -03:00
committed by Botond Dénes
parent 636ec802c3
commit 7007dabdf9
3 changed files with 19 additions and 3 deletions

View File

@@ -235,18 +235,23 @@ tablet_transition_info migration_to_transition_info(const tablet_info& ti, const
};
}
no_such_tablet_map::no_such_tablet_map(const table_id& id)
: runtime_error{fmt::format("Tablet map not found for table {}", id)}
{
}
const tablet_map& tablet_metadata::get_tablet_map(table_id id) const {
try {
return *_tablets.at(id);
} catch (const std::out_of_range&) {
throw_with_backtrace<std::runtime_error>(format("Tablet map not found for table {}", id));
throw_with_backtrace<no_such_tablet_map>(id);
}
}
void tablet_metadata::mutate_tablet_map(table_id id, noncopyable_function<void(tablet_map&)> func) {
auto it = _tablets.find(id);
if (it == _tablets.end()) {
throw std::runtime_error(format("Tablet map not found for table {}", id));
throw no_such_tablet_map(id);
}
auto tablet_map_copy = make_lw_shared<tablet_map>(*it->second);
func(*tablet_map_copy);
@@ -256,7 +261,7 @@ void tablet_metadata::mutate_tablet_map(table_id id, noncopyable_function<void(t
future<> tablet_metadata::mutate_tablet_map_async(table_id id, noncopyable_function<future<>(tablet_map&)> func) {
auto it = _tablets.find(id);
if (it == _tablets.end()) {
throw std::runtime_error(format("Tablet map not found for table {}", id));
throw no_such_tablet_map(id);
}
auto tablet_map_copy = make_lw_shared<tablet_map>(*it->second);
co_await func(*tablet_map_copy);

View File

@@ -393,6 +393,11 @@ struct tablet_desc {
const tablet_transition_info* transition; // null if there's no transition.
};
class no_such_tablet_map : public std::runtime_error {
public:
no_such_tablet_map(const table_id& id);
};
/// Stores information about tablets of a single table.
///
/// The map contains a constant number of tablets, tablet_count().

View File

@@ -5454,6 +5454,12 @@ future<> storage_service::process_tablet_split_candidate(table_id table) noexcep
release_guard(std::move(guard));
co_await split_all_compaction_groups();
}
} catch (const locator::no_such_tablet_map& ex) {
slogger.warn("Failed to complete splitting of table {} due to {}", table, ex);
break;
} catch (const replica::no_such_column_family& ex) {
slogger.warn("Failed to complete splitting of table {} due to {}", table, ex);
break;
} catch (const seastar::abort_requested_exception& ex) {
slogger.warn("Failed to complete splitting of table {} due to {}", table, ex);
break;