diff --git a/configure.py b/configure.py index b02502876d..8fb5d33935 100755 --- a/configure.py +++ b/configure.py @@ -1126,6 +1126,7 @@ scylla_core = (['message/messaging_service.cc', 'utils/lister.cc', 'repair/repair.cc', 'repair/row_level.cc', + 'repair/table_check.cc', 'exceptions/exceptions.cc', 'auth/allow_all_authenticator.cc', 'auth/allow_all_authorizer.cc', diff --git a/repair/table_check.cc b/repair/table_check.cc new file mode 100644 index 0000000000..7095749b89 --- /dev/null +++ b/repair/table_check.cc @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2024-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include "replica/database.hh" +#include "repair/table_check.hh" +#include "service/migration_manager.hh" + +namespace repair { + +future table_sync_and_check(replica::database& db, service::migration_manager& mm, const table_id& uuid) { + if (mm.use_raft()) { + abort_on_expiry aoe(lowres_clock::now() + std::chrono::seconds{10}); + auto& as = aoe.abort_source(); + auto sub = mm.get_abort_source().subscribe([&as] () noexcept { + if (!as.abort_requested()) { + as.request_abort(); + } + }); + + // Trigger read barrier to synchronize schema. + co_await mm.get_group0_barrier().trigger(as); + } + + co_return !db.column_family_exists(uuid); +} + +future with_table_drop_silenced(replica::database& db, service::migration_manager& mm, const table_id& uuid, + std::function(const table_id&)> f) { + std::exception_ptr ex = nullptr; + try { + co_await f(uuid); + co_return table_dropped::no; + } catch (replica::no_such_column_family&) { + // No need to synchronize while we know the table was dropped. + } catch (...) { + // This node may still see a table while it is dropped on the remote node + // and so the remote node returns an error. In that case we want to skip + // that table and continue with the operation. + // + // But since RPC does not enable returning the exception type, the cause + // of the failure cannot be determined. Synchronize schema to see the latest + // changes and determine whether the table was dropped. + ex = std::current_exception(); + } + + if (ex) { + auto dropped = co_await table_sync_and_check(db, mm, uuid); + if (!dropped) { + co_await coroutine::return_exception_ptr(std::move(ex)); + } + } + co_return table_dropped::yes; +} + +} diff --git a/repair/table_check.hh b/repair/table_check.hh new file mode 100644 index 0000000000..587ba87b5a --- /dev/null +++ b/repair/table_check.hh @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2024-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once + +#include +#include + +#include "schema/schema_fwd.hh" + +using table_dropped = bool_class; + +namespace raft { +class server; +} + +namespace replica { +class database; +} + +namespace service { +class migration_manager; +} + +namespace repair { + +class database; + +future table_sync_and_check(replica::database& db, service::migration_manager& mm, const table_id& uuid); + +// Runs function f on given table. If f throws and the table is dropped, the exception is swallowed. +// Function is aimed to handle no_such_column_family on remote node or different shard, as it synchronizes +// schema before checking the table. Prefer standard error handling whenever possible. +future with_table_drop_silenced(replica::database& db, service::migration_manager& mm, const table_id& uuid, + std::function(const table_id&)> f); + +} diff --git a/service/migration_manager.hh b/service/migration_manager.hh index 71e9d645fc..f90d5756af 100644 --- a/service/migration_manager.hh +++ b/service/migration_manager.hh @@ -94,6 +94,11 @@ public: const migration_notifier& get_notifier() const { return _notifier; } service::storage_proxy& get_storage_proxy() { return _storage_proxy; } const service::storage_proxy& get_storage_proxy() const { return _storage_proxy; } + abort_source& get_abort_source() noexcept { return _as; } + const abort_source& get_abort_source() const noexcept { return _as; } + serialized_action& get_group0_barrier() noexcept { return _group0_barrier; } + const serialized_action& get_group0_barrier() const noexcept { return _group0_barrier; } + bool use_raft() const noexcept { return !_enable_schema_pulls; } // Disable schema pulls when Raft group 0 is fully responsible for managing schema. future<> disable_schema_pulls();